In [1]:
import numpy as np
import pandas as pd

In [2]:
games = pd.read_csv('games_info.csv')

In [3]:
games.head(1)

Unnamed: 0,Name,Developer,Producer,Genre,Operating System,Date Released
0,A-Men 2,Bloober Team,Bloober Team,"Adventure, Puzzle",Microsoft Windows,"June 24, 2015"


In [4]:
games = games[['Name','Producer','Genre']]

In [5]:
games['tags'] = games[['Producer','Genre']].apply(lambda x: ', '.join(x.dropna().astype(str)), axis=1)
games.drop(['Producer','Genre'], axis=1, inplace=True)

In [6]:
games.head()

Unnamed: 0,Name,tags
0,A-Men 2,"Bloober Team, Adventure, Puzzle"
1,A-Train,"Artdink, Maxis, Ocean Software, Vehicle Simula..."
2,A-10 Cuba!,"Activision, Flight simulator"
3,A.D. 2044,"LK Avalon, Adventure"
4,A.D.A.M. Life's Greatest Mysteries,"Columbia Healthcare Corporation, Educational"


In [7]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')

In [8]:
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(games['tags']).toarray()

In [9]:
vector.shape

(1095, 690)

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
similarity = cosine_similarity(vector)

In [12]:
similarity

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.25      ],
       [0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.18898224],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.25      , 0.18898224, 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [13]:
games.drop_duplicates(subset=['Name'], keep='first', inplace=True)
games.head()

Unnamed: 0,Name,tags
0,A-Men 2,"Bloober Team, Adventure, Puzzle"
1,A-Train,"Artdink, Maxis, Ocean Software, Vehicle Simula..."
2,A-10 Cuba!,"Activision, Flight simulator"
3,A.D. 2044,"LK Avalon, Adventure"
4,A.D.A.M. Life's Greatest Mysteries,"Columbia Healthcare Corporation, Educational"


In [14]:
def recommend(game):
    index = games[games['Name'] == game].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    recommended_games = []
    for i in distances[1:6]:
        recommended_games.append(games.iloc[i[0]].Name)
    if game in recommended_games:
        recommended_games.remove(game)
        recommended_games.append(games.iloc[distances[6][0]].Name)
    return recommended_games

In [17]:
recommend('Far Cry 3')

["Brothers in Arms: Hell's Highway",
 'Brothers in Arms: Road to Hill 30',
 'Far Cry',
 'Far Cry 2',
 'Far Cry 3: Blood Dragon']

In [16]:
import pickle
pickle.dump(games,open('game_list.pkl','wb'))
pickle.dump(similarity,open('similarity.pkl','wb'))