In [41]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [228]:
steam_games = pd.read_csv('steam_games_all_fields.csv', index_col=1, delimiter='\t')
app_id = steam_games['steam_url'].str.split('/',expand=True)
app_id = app_id.drop([0, 1, 2, 5, 6], axis=1)
app_id.columns = ['app', 'game_ID']
df = pd.concat([steam_games, app_id], axis=1, sort=False)
df['description'] = df['description'].fillna('')
df = df[~df.description.str.contains('no description')]
#df = df[~df.app.str.contains('sub')]
#df = df[~df.app.str.contains('digitalgiftcards')]
#df = df[~df.tags.str.contains('no_tag')]
#df = df[~df.score.str.contains('no rating')]
df = df.drop(['app'], axis=1)
df['game_ID'] = pd.to_numeric(df['game_ID'], downcast='signed')
df['number_of_review'] = df['number_of_review'].replace({'no reviews' : ''})
df['number_of_review'] = pd.to_numeric(df['number_of_review'], downcast='integer')
df['score'] = df['score'].replace({'no rating' : ''})
df['score'] = pd.to_numeric(df['score'])
df = df[df['score'] > 8.0]
df = df.rename(columns={"game_ID": "appid"})
df = df.rename(columns={"Unnamed: 0": "id"})
df = df.drop_duplicates(subset='appid', keep='first')
df = df.reset_index()
df.head()

Unnamed: 0,game_title,id,steam_url,tags,image,description,number_of_review,score,release_date,appid
0,Counter-Strike: Global Offensive,0,https://store.steampowered.com/app/730/Counter...,"FPS, Multiplayer, Shooter, Action, Team-Based,...",https://steamcdn-a.akamaihd.net/steam/apps/730...,Counter-Strike: Global Offensive (CS: GO) expa...,2915091.0,9.0,"Aug 21, 2012",730
1,Path of Exile,2,https://store.steampowered.com/app/238960/Path...,"Free to Play, Action RPG, Hack and Slash, RPG,...",https://steamcdn-a.akamaihd.net/steam/apps/238...,"You are an Exile, struggling to survive on the...",74977.0,9.0,"Oct 23, 2013",238960
2,Insurgency: Sandstorm,3,https://store.steampowered.com/app/581320/Insu...,"FPS, Realistic, Shooter, Multiplayer, Action, ...",https://steamcdn-a.akamaihd.net/steam/apps/581...,"Insurgency: Sandstorm is a team-based, tactica...",9019.0,9.0,"Dec 12, 2018",581320
3,Warframe,4,https://store.steampowered.com/app/230410/Warf...,"Free to Play, Action, Co-op, Multiplayer, Thir...",https://steamcdn-a.akamaihd.net/steam/apps/230...,Warframe is a cooperative free-to-play third p...,236593.0,9.0,"Mar 25, 2013",230410
4,Divinity: Original Sin 2 - Definitive Edition,5,https://store.steampowered.com/app/435150/Divi...,"RPG, Turn-Based, Co-op, Story Rich, Fantasy, O...",https://steamcdn-a.akamaihd.net/steam/apps/435...,The eagerly anticipated sequel to the award-wi...,37446.0,9.0,"Sep 14, 2017",435150


In [229]:
ds = df[['id' ,'appid', 'game_title', 'tags', 'description']]
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['description'])

In [230]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [233]:
results = {}

for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], ds['id'][i]) for i in similar_indices]

    # First item is the item itself, so remove it.
    # Each dictionary entry is like: [(1,2), (3,4)], with each tuple being (score, item_id)
    results[row['id']] = similar_items[1:]
    
print('done!')

done!


In [235]:
def item(id):
    return ds.loc[ds['id'] == id]['game_title'].tolist()[0]

def recommend(item_id, num):
    print('Recommending ' + str(num) + " products similar to " + item(item_id) + '...')
    print('----------')
    recs = results[item_id][:num]
    for rec in recs:
        print('Recommended: ' + item(rec[1]) + " (score:" + str(rec[0]) + ")")
        
recommend(0, 10)

Recommending 10 products similar to Counter-Strike: Global Offensive...
----------
Recommended: Counter-Strike: Condition Zero (score:0.069533239781)
Recommended: Smashbox Arena (score:0.0492347891014)
Recommended: HOARD (score:0.0419897226148)
Recommended: Team Fortress 2 (score:0.0376382287517)
Recommended: Sudden Strike 2 Gold (score:0.035092500903)
Recommended: Awesomenauts - the 2D moba (score:0.0344973485261)
Recommended: Left 4 Dead (score:0.0335489782866)
Recommended: Devil May Cry® 4 Special Edition (score:0.0328419194394)
Recommended: Eternity Warriors™ VR (score:0.0311919075975)
Recommended: Unreal Tournament 3 Black (score:0.030638177305)


In [236]:
ds = df[['id' ,'appid', 'game_title', 'tags', 'description']]
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['tags'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [237]:
results = {}

for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], ds['id'][i]) for i in similar_indices]

    # First item is the item itself, so remove it.
    # Each dictionary entry is like: [(1,2), (3,4)], with each tuple being (score, item_id)
    results[row['id']] = similar_items[1:]
    
print('done!')

done!


In [238]:
def item(id):
    return ds.loc[ds['id'] == id]['game_title'].tolist()[0]

def recommend(item_id, num):
    print('Recommending ' + str(num) + " products similar to " + item(item_id) + '...')
    print('----------')
    recs = results[item_id][:num]
    for rec in recs:
        print('Recommended: ' + item(rec[1]) + " (score:" + str(rec[0]) + ")")
        
recommend(0, 10)

Recommending 10 products similar to Counter-Strike: Global Offensive...
----------
Recommended: Paladins® (score:0.190256469751)
Recommended: Tom Clancy's Rainbow Six® Siege (score:0.183345525103)
Recommended: Counter-Strike: Source (score:0.150885067257)
Recommended: Counter-Strike (score:0.150592144832)
Recommended: Call of Duty®: Modern Warfare® 2 (score:0.142173011586)
Recommended: Muffled Warfare (score:0.121974178824)
Recommended: Red Orchestra 2: Heroes of Stalingrad with Rising Storm (score:0.1175767392)
Recommended: PlanetSide 2 (score:0.106728326284)
Recommended: Insurgency (score:0.104130214655)
Recommended: SMITE® (score:0.102145387943)


'Counter-Strike: Global Offensive'