In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [27]:
steam_games = pd.read_csv('steam_games_all_fields.csv', index_col=1, delimiter='\t')
app_id = steam_games['steam_url'].str.split('/',expand=True)
app_id = app_id.drop([0, 1, 2, 5, 6], axis=1)
app_id.columns = ['app', 'game_ID']
df = pd.concat([steam_games, app_id], axis=1, sort=False)
df['description'] = df['description'].fillna('')
df = df[~df.description.str.contains('no description')]
df['release_date'] = df['release_date'].replace({'no release date' : ''})
#df = df[~df.app.str.contains('sub')]
#df = df[~df.app.str.contains('digitalgiftcards')]
#df = df[~df.tags.str.contains('no_tag')]
#df = df[~df.score.str.contains('no rating')]
df = df.drop(['app'], axis=1)
df['game_ID'] = pd.to_numeric(df['game_ID'], downcast='signed')
df['number_of_review'] = df['number_of_review'].replace({'no reviews' : ''})
df['number_of_review'] = pd.to_numeric(df['number_of_review'], downcast='integer')
df['score'] = df['score'].replace({'no rating' : ''})
df['score'] = pd.to_numeric(df['score'])
#df = df[df['score'] > 8.0]
df = df.rename(columns={"game_ID": "appid"})
df = df.rename(columns={"Unnamed: 0": "id"})
df = df.drop_duplicates(subset='appid', keep='first')
df = df.reset_index()
df.head(20)

Unnamed: 0,game_title,id,steam_url,tags,image,description,number_of_review,score,release_date,appid
0,Counter-Strike: Global Offensive,0,https://store.steampowered.com/app/730/Counter...,"FPS, Multiplayer, Shooter, Action, Team-Based,...",https://steamcdn-a.akamaihd.net/steam/apps/730...,Counter-Strike: Global Offensive (CS: GO) expa...,2915091.0,9.0,"Aug 21, 2012",730
1,MONSTER HUNTER: WORLD,1,https://store.steampowered.com/app/582010/MONS...,"Action, Hunting, Co-op, Open World, Multiplaye...",https://steamcdn-a.akamaihd.net/steam/apps/582...,Welcome to a new world! In Monster Hunter: Wor...,55314.0,6.0,"Aug 9, 2018",582010
2,Path of Exile,2,https://store.steampowered.com/app/238960/Path...,"Free to Play, Action RPG, Hack and Slash, RPG,...",https://steamcdn-a.akamaihd.net/steam/apps/238...,"You are an Exile, struggling to survive on the...",74977.0,9.0,"Oct 23, 2013",238960
3,Insurgency: Sandstorm,3,https://store.steampowered.com/app/581320/Insu...,"FPS, Realistic, Shooter, Multiplayer, Action, ...",https://steamcdn-a.akamaihd.net/steam/apps/581...,"Insurgency: Sandstorm is a team-based, tactica...",9019.0,9.0,"Dec 12, 2018",581320
4,Warframe,4,https://store.steampowered.com/app/230410/Warf...,"Free to Play, Action, Co-op, Multiplayer, Thir...",https://steamcdn-a.akamaihd.net/steam/apps/230...,Warframe is a cooperative free-to-play third p...,236593.0,9.0,"Mar 25, 2013",230410
5,Divinity: Original Sin 2 - Definitive Edition,5,https://store.steampowered.com/app/435150/Divi...,"RPG, Turn-Based, Co-op, Story Rich, Fantasy, O...",https://steamcdn-a.akamaihd.net/steam/apps/435...,The eagerly anticipated sequel to the award-wi...,37446.0,9.0,"Sep 14, 2017",435150
6,Assassin's Creed® Odyssey,6,https://store.steampowered.com/app/812140/Assa...,"Open World, Action, RPG, Singleplayer, Adventu...",https://steamcdn-a.akamaihd.net/steam/apps/812...,Choose your fate in Assassin's Creed® Odyssey....,14907.0,9.0,"Oct 5, 2018",812140
7,Rocket League®,7,https://store.steampowered.com/app/252950/Rock...,"Multiplayer, Racing, Soccer, Sports, Competiti...",https://steamcdn-a.akamaihd.net/steam/apps/252...,Soccer meets driving once again in the long-aw...,182330.0,9.0,"Jul 7, 2015",252950
8,Tom Clancy's Rainbow Six® Siege,8,https://store.steampowered.com/app/359550/Tom_...,"FPS, Multiplayer, Tactical, Shooter, Action, T...",https://steamcdn-a.akamaihd.net/steam/apps/359...,Tom Clancy's Rainbow Six Siege is the latest i...,224822.0,9.0,"Dec 1, 2015",359550
9,Grand Theft Auto V,9,https://store.steampowered.com/app/271590/Gran...,"Open World, Action, Multiplayer, Third Person,...",https://steamcdn-a.akamaihd.net/steam/apps/271...,"Los Santos is a city of bright lights, long ni...",386027.0,6.0,"Apr 14, 2015",271590


In [12]:
def weighted_rating(x):
    v = x['number_of_review']
    R = x['score']
    return (v * R)

In [29]:
ds = df[['id' ,'appid', 'game_title', 'tags', 'description']]
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['description'])

In [30]:
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [31]:
results = {}

for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], ds['id'][i]) for i in similar_indices]

    # First item is the item itself, so remove it.
    # Each dictionary entry is like: [(1,2), (3,4)], with each tuple being (score, item_id)
    results[row['id']] = similar_items[1:]
    
print('done!')

done!


In [32]:
def item(id):
    return ds.loc[ds['id'] == id]['game_title'].tolist()[0]

def recommend(item_id, num):
    print('Recommending ' + str(num) + " products similar to " + item(item_id) + '...')
    print('----------')
    recs = results[item_id][:num]
    for rec in recs:
        print('Recommended: ' + item(rec[1]) + " (score:" + str(rec[0]) + ")")
        
recommend(11, 10)

Recommending 10 products similar to PLAYERUNKNOWN'S BATTLEGROUNDS...
----------
Recommended: Pixel Royale (score:0.8411016820922029)
Recommended: Infected Battlegrounds (score:0.14262542084099372)
Recommended: 武侠乂 The Swordsmen X (score:0.11563259932240091)
Recommended: Ring of Elysium (score:0.10143757867600177)
Recommended: Natural Selection 2 (score:0.10042232067774738)
Recommended: RUSSIA BATTLEGROUNDS (score:0.09220548429590864)
Recommended: H1Z1 (score:0.09044744065602206)
Recommended: Outworld Battlegrounds (score:0.08959644167581766)
Recommended: Battle Royale Survival (score:0.08806993160405685)
Recommended: SurvivalZ Battlegrounds (score:0.08019531366335444)


In [33]:
ds = df[['id' ,'appid', 'game_title', 'tags', 'description']]
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tf.fit_transform(ds['tags'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)

In [34]:
results = {}

for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-100:-1]
    similar_items = [(cosine_similarities[idx][i], ds['id'][i]) for i in similar_indices]

    # First item is the item itself, so remove it.
    # Each dictionary entry is like: [(1,2), (3,4)], with each tuple being (score, item_id)
    results[row['id']] = similar_items[1:]
    
print('done!')

done!


In [35]:
def item(id):
    return ds.loc[ds['id'] == id]['game_title'].tolist()[0]

def recommend(item_id, num):
    print('Recommending ' + str(num) + " products similar to " + item(item_id) + '...')
    print('----------')
    recs = results[item_id][:num]
    for rec in recs:
        print('Recommended: ' + item(rec[1]) + " (score:" + str(rec[0]) + ")")
        
recommend(11, 10)

Recommending 10 products similar to PLAYERUNKNOWN'S BATTLEGROUNDS...
----------
Recommended: Realm Royale (score:0.2690030158397976)
Recommended: Battle Royale Builder (score:0.2111620778659669)
Recommended: The Survivors (score:0.2087993857515626)
Recommended: Counter-Strike: Condition Zero (score:0.18551223142502654)
Recommended: Deathgarden™ (score:0.15857201257200573)
Recommended: Chivalry: Medieval Warfare (score:0.15292258954542304)
Recommended: Zeus' Battlegrounds (score:0.14825656719869096)
Recommended: H1Z1 (score:0.1481745280654869)
Recommended: Freefall Tournament (score:0.14447492921377453)
Recommended: Interstellar Marines (score:0.1430207677272827)


'Counter-Strike: Global Offensive'