# Imports

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import ast, json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

games_dir = "steam_games.json"

# Data Loading

In [149]:
with open(games_dir) as f:
    lines = f.readlines()
len(lines)

32135

In [150]:
ast.literal_eval(lines[0])

{'publisher': 'Kotoshiro',
 'genres': ['Action', 'Casual', 'Indie', 'Simulation', 'Strategy'],
 'app_name': 'Lost Summoner Kitty',
 'title': 'Lost Summoner Kitty',
 'url': 'http://store.steampowered.com/app/761140/Lost_Summoner_Kitty/',
 'release_date': '2018-01-04',
 'tags': ['Strategy', 'Action', 'Indie', 'Casual', 'Simulation'],
 'discount_price': 4.49,
 'reviews_url': 'http://steamcommunity.com/app/761140/reviews/?browsefilter=mostrecent&p=1',
 'specs': ['Single-player'],
 'price': 4.99,
 'early_access': False,
 'id': '761140',
 'developer': 'Kotoshiro'}

In [151]:
data = ast.literal_eval("[" + ','.join(lines) + "]")

with open('gamesdata.json', 'w') as json_file:
    json.dump(data, json_file)

In [17]:
df = pd.read_json('gamesdata.json')
df.head(1)

Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,discount_price,reviews_url,specs,price,early_access,id,developer,sentiment,metascore
0,Kotoshiro,"[Action, Casual, Indie, Simulation, Strategy]",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"[Strategy, Action, Indie, Casual, Simulation]",4.49,http://steamcommunity.com/app/761140/reviews/?...,[Single-player],4.99,False,761140.0,Kotoshiro,,


In [164]:
df.shape

(32135, 16)

# Data Cleaning

In [18]:
df.replace("NaN", np.nan, inplace=True)
nan_counts = df.isna().sum()
print(nan_counts)

publisher          8052
genres             3283
app_name              2
title              2050
url                   0
release_date       2067
tags                163
discount_price    31910
reviews_url           2
specs               670
price              1377
early_access          0
id                    2
developer          3299
sentiment          7182
metascore         29458
dtype: int64


In [19]:
# mostly NaN columns
df.drop("metascore", axis=1, inplace=True)
df.drop("discount_price", axis=1, inplace=True)

#irrelvant columns
df.drop("url", axis=1, inplace=True)
df.drop("reviews_url", axis=1, inplace=True)
df.drop("release_date", axis=1, inplace=True)

df = df.dropna()
df.shape

(19480, 11)

In [20]:
list_features = ['genres', 'tags', 'specs']

for feature in list_features:
    # lower string and remove spaces within elements
    df[feature] = df[feature].apply(lambda x: [i.replace(" ", "") for i in x] if isinstance(x, list) else x)

    # create "soup" by turning lists into strings with spaces between elements, no other syntax
    df[feature] = df[feature].astype(str).apply(lambda x: ' '.join(ast.literal_eval(x)))

df['developer'] = df['developer'].apply(lambda x: x.replace(" ", ""))
df['publisher'] = df['publisher'].apply(lambda x: x.replace(" ", ""))
df['sentiment'] = df['sentiment'].apply(lambda x: "" if "user reviews" in x else x.replace(" ", ""))

df[['genres', 'tags', 'specs', 'sentiment']].head(1)

Unnamed: 0,genres,tags,specs,sentiment
1,FreetoPlay Indie RPG Strategy,FreetoPlay Strategy Indie RPG CardGame Trading...,Single-player Multi-player OnlineMulti-Player ...,MostlyPositive


# Content-Based Filtering

In [22]:
def soup(x):
    return ''.join(x['publisher']) + ' ' + ''.join(x['developer']) + ' ' +''.join(x['genres']) + ' ' + ''.join(x['tags']) + ' ' + ''.join(x['genres']) + ' ' + ''.join(x['sentiment'])

df['soup'] = df.apply(soup, axis=1)
df['soup'].head(25)

1     MakingFun,Inc. SecretLevelSRL FreetoPlay Indie...
2     Poolians.com Poolians.com Casual FreetoPlay In...
5     TrickjumpGamesLtd TrickjumpGamesLtd Action Adv...
15    Apillo Apillo Adventure Casual Indie Simulatio...
21    TeroLunkka TeroLunkka Action Adventure Indie A...
27    StainlessGamesLtd StainlessGamesLtd Action Ind...
28    Valve Valve Action FPS Classic Action Sci-fi S...
33    ETGgames ETGgames Casual Indie Simulation Casu...
38    CisamidInc. lalalaZero,Urbanoff Adventure Casu...
39    StrategyFirst StrategyFirst Strategy Turn-Base...
40    StrategyFirst StrategyFirst Strategy Strategy ...
41    BlazingGriffinLtd. OuterlightLtd. Action Indie...
48    TPM.COSOFTWORKS TPM.COSOFTWORKS Action Adventu...
49    WonderboxGames WonderboxGames Casual Strategy ...
50    WonderboxGames WonderboxGames Action Indie Ind...
52    MadUnicornGames MadUnicornGames Action Adventu...
55    Majesco GlyphXGames Action Action Sci-fi Story...
56    IntroversionSoftware IntroversionSoftware 

In [23]:
count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(df['soup'])
count_matrix.shape

(19480, 11672)

In [24]:
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [37]:
def get_recommendations(title, df, cosine_sim):
    df = df.reset_index()
    indices = pd.Series(df.index, index=df['title'])

    # get index of the movie that matches the title
    idx = indices[title]

    # get sorted pairwise similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # get the scores of the 50 most similar movies
    sim_scores = sim_scores[1:11]

    # get the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return df[['title', 'soup']].iloc[movie_indices]

get_recommendations("Tom Clancy's Ghost Recon®", df, cosine_sim)

Unnamed: 0,title,soup
127,Tom Clancy's Rainbow Six® 3 Gold,"Ubisoft RedStormEntertainment,UbisoftMontreal ..."
84,Tom Clancy's Rainbow Six® Vegas,Ubisoft UbisoftMontreal Action Action FPS Tact...
19064,Operation Flashpoint: Dragon Rising,Codemasters CodemastersStudios Action Action F...
19266,Tom Clancy's Rainbow Six® Vegas 2,Ubisoft UbisoftMontreal Action Action Tactical...
18743,Operation Flashpoint: Red River,Codemasters CodemastersActionStudio Action Str...
18043,Sniper: Ghost Warrior 2,CIGames CityInteractive Action Sniper Action F...
135,Brothers in Arms: Earned in Blood™,Ubisoft GearboxSoftware Action Action FPS Worl...
19245,INSURGENCY: Modern Infantry Combat,NewWorldInteractive InsurgencyDevelopmentTeam ...
19257,Tom Clancy's Ghost Recon® Desert Siege™,Ubisoft RedStormEntertainment Action Action FP...
19275,Tom Clancy's Rainbow Six Lockdown™,Ubisoft RedStormEntertainment Action Action FP...


In [84]:
df.tail(25)

Unnamed: 0,publisher,genres,app_name,title,tags,specs,price,early_access,id,developer,sentiment,soup
32091,Cisamid Inc.,Action Adventure Casual Indie MassivelyMultipl...,PLAYERUNKN4WN: Zombie,PLAYERUNKN4WN: Zombie,Action Adventure MassivelyMultiplayer Casual I...,Single-player SteamAchievements,0.99,False,737750.0,"lalalaZero,Urbanoff",Mostly Negative,Action Adventure Casual Indie MassivelyMultipl...
32092,Purpl3Grape,Action Indie EarlyAccess,TileDynasty FPS Arena,TileDynasty FPS Arena,EarlyAccess Action Indie ArenaShooter,Single-player Multi-player OnlineMulti-Player ...,6.99,True,524350.0,Purpl3Grape,,Action Indie EarlyAccess EarlyAccess Action In...
32093,Enkian Games,Adventure Indie RPG,Ender Story: Chapter 1,Ender Story: Chapter 1,Adventure RPG Indie RPGMaker,Single-player Fullcontrollersupport,0.99,False,760300.0,Enkian Games,,Adventure Indie RPG Adventure RPG Indie RPGMak...
32096,YAOYICHEN,Action Adventure Casual Indie RPG,PearsAndGrayWitch,PearsAndGrayWitch,Action Adventure Casual Indie RPG,Single-player,4.99,False,766700.0,YAOYICHEN,,Action Adventure Casual Indie RPG Action Adven...
32100,杭州分浪网络科技有限公司,Action FreetoPlay MassivelyMultiplayer RPG Str...,忍者村大战2,忍者村大战2,FreetoPlay Strategy Action MassivelyMultiplaye...,OnlineMulti-Player In-AppPurchases,Free To Play,False,754350.0,杭州分浪网络科技有限公司,Mixed,Action FreetoPlay MassivelyMultiplayer RPG Str...
32101,KUMA GAMES,Simulation,High School Simulator,High School Simulator,Simulation SexualContent Gore Violent Anime,Single-player PartialControllerSupport,Free,False,754620.0,KUMA GAMES,Mixed,Simulation Simulation SexualContent Gore Viole...
32103,Valve,Action,Team Fortress Classic,Team Fortress Classic,Action FPS Multiplayer Classic Shooter Class-B...,Multi-player ValveAnti-Cheatenabled,4.99,False,20.0,Valve,Very Positive,Action Action FPS Multiplayer Classic Shooter ...
32104,Valve,Action,Half-Life: Opposing Force,Half-Life: Opposing Force,FPS Action Sci-fi Singleplayer Classic Shooter...,Single-player Multi-player ValveAnti-Cheatenabled,4.99,False,50.0,Gearbox Software,Very Positive,Action FPS Action Sci-fi Singleplayer Classic ...
32105,Valve,Action,Ricochet,Ricochet,Action FPS Multiplayer First-Person Cyberpunk ...,Multi-player ValveAnti-Cheatenabled,4.99,False,60.0,Valve,Mostly Positive,Action Action FPS Multiplayer First-Person Cyb...
32106,Valve,Action,Counter-Strike,Counter-Strike,Action FPS Multiplayer Shooter Classic Team-Ba...,Multi-player ValveAnti-Cheatenabled,9.99,False,10.0,Valve,Overwhelmingly Positive,Action Action FPS Multiplayer Shooter Classic ...
