### Import Libraries

In [4]:
import warnings
warnings.filterwarnings("ignore")
from surprise import Dataset
from surprise import Reader
from surprise.model_selection.split import train_test_split
import pandas as pd
import numpy as np
from surprise import SVD
from surprise import accuracy

### Loading Data

In [5]:
user_df  = pd.read_csv('data/cleaned/purchase_play_rating.csv', usecols= ['user', 'game', 'hrs', 'rating'])# read csv into ratings_df dataframe
ratings_df = user_df.drop('hrs', axis = 1)
ratings_df.head(5)

Unnamed: 0,user,game,rating
0,5250,Alien Swarm,4
1,5250,Cities Skylines,5
2,5250,Counter-Strike,1
3,5250,Counter-Strike Source,1
4,5250,Day of Defeat,1


### Matrix Factorization based model - SVD

#### Reading a file

In [6]:
reader = Reader(rating_scale=(1,5))  #invoke reader instance of surprise library
data=Dataset.load_from_df(ratings_df,reader) #load dataset into Surprise datastructure Dataset

#### Finding model with best parameters

In [7]:
# Diiferent combination of parameters over a cross validation
param_grid = {'n_factors': [10, 20, 100],
              'reg_all': [0.001, 0.01],
              'n_epochs': [10, 20, 100]
              }

In [8]:
gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5) 

In [9]:
gs.fit(data)

In [12]:
# best RMSE score
print(gs.best_score['rmse'])
# combination of parameters that gave the best RMSE score
print(gs.best_params['rmse'])

1.1034842478013858
{'n_factors': 10, 'reg_all': 0.01, 'n_epochs': 20}


In [13]:
# We can now use the algorithm that yields the best rmse:
svd = gs.best_estimator['rmse']

#### Training model on complete dataset

In [14]:
trainset = data.build_full_trainset()
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x200badc8dc8>

In [15]:
# Find missing values and predict
anti_test_set = trainset.build_anti_testset() 

In [16]:
predictions = svd.test(anti_test_set)

In [17]:
predictions

[Prediction(uid=5250, iid='Age of Empires II HD Edition', r_ui=2.3571705847644484, est=3.105130872503587, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2', r_ui=2.3571705847644484, est=1.577039671734837, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2 Operation Arrowhead', r_ui=2.3571705847644484, est=3.023673492349585, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2 Operation Arrowhead Beta (Obsolete)', r_ui=2.3571705847644484, est=1, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Banished', r_ui=2.3571705847644484, est=3.2093841789521065, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Black Ops', r_ui=2.3571705847644484, est=3.2080558521353466, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Black Ops - Multiplayer', r_ui=2.3571705847644484, est=3.442579926103578, details={'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Modern W

### Making recommendation for unknown ratings

In [18]:
from collections import defaultdict

def getGameRecommendations(topN=10):
    top_recs = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions: 
        top_recs[uid].append((iid, est))
     
    for uid, user_ratings in top_recs.items():
        user_ratings.sort(key = lambda x: x[1], reverse = True)
        top_recs[uid] = user_ratings[:topN]
     
    return top_recs 

def getGameRecommendationsForUser(userId, recommendations):
    if userId not in recommendations:
        print("User id is not present")
        return
    recommended_games = recommendations[userId]
    return recommended_games 

In [27]:
recommendations = getGameRecommendations(20)

In [28]:
user_df.loc[user_df['user'] == 76767] 

Unnamed: 0,user,game,hrs,rating
21,76767,Age of Empires II HD Edition,786.0,4
22,76767,Alien Swarm,48.0,2
23,76767,Arma 2,0.0,1
24,76767,Arma 2 Operation Arrowhead,0.0,1
25,76767,Arma 2 Operation Arrowhead Beta (Obsolete),0.0,1
26,76767,Banished,1440.0,5
27,76767,Call of Duty Black Ops,1320.0,5
28,76767,Call of Duty Black Ops - Multiplayer,750.0,4
29,76767,Call of Duty Modern Warfare 2,3900.0,5
30,76767,Call of Duty Modern Warfare 2 - Multiplayer,9900.0,5


In [29]:
getGameRecommendationsForUser(76767, recommendations)

[('Football Manager 2012', 4.751799763730155),
 ('Football Manager 2015', 4.727349369854593),
 ('Football Manager 2013', 4.695657501088753),
 ('Football Manager 2014', 4.6939455427890895),
 ('Fallout 4', 4.67576831944208),
 ('Grand Theft Auto V', 4.603048813844623),
 ('METAL GEAR SOLID V THE PHANTOM PAIN', 4.5685553243233485),
 ('Football Manager 2016', 4.482911154672919),
 ('DARK SOULS II', 4.419676517308506),
 ('Borderlands The Pre-Sequel', 4.410945891580416),
 ('Football Manager 2011', 4.37898557374254),
 ('Rust', 4.35891751948698),
 ('Rocket League', 4.307258259977288),
 ('The Binding of Isaac Rebirth', 4.288401223384067),
 ('Dying Light', 4.276279569191023),
 ('Farming Simulator 15', 4.2522968550455005),
 ('Football Manager 2010', 4.252031578489395),
 ('H1Z1', 4.232368039503382),
 ('Arma 3', 4.2257516052508795),
 ('The Sims(TM) 3', 4.219055004302684)]

In [30]:
user_df.loc[user_df['user'] == 103360] 

Unnamed: 0,user,game,hrs,rating
139,103360,Counter-Strike,0.0,1
140,103360,Counter-Strike Condition Zero,0.0,1
141,103360,Counter-Strike Condition Zero Deleted Scenes,0.0,1
142,103360,Day of Defeat,0.0,1
143,103360,Deathmatch Classic,0.0,1
144,103360,Half-Life,0.0,1
145,103360,Half-Life Blue Shift,0.0,1
146,103360,Half-Life Opposing Force,0.0,1
147,103360,Ricochet,0.0,1
148,103360,Team Fortress Classic,0.0,1


In [31]:
getGameRecommendationsForUser(103360, recommendations)

[('Football Manager 2012', 4.236310228965927),
 ('Grand Theft Auto V', 4.162337396426119),
 ('Football Manager 2013', 4.156011873637004),
 ('Fallout 4', 4.146346487974863),
 ('Football Manager 2014', 4.127196115847555),
 ('Football Manager 2015', 4.118698582260886),
 ('Counter-Strike Global Offensive', 4.093414989599811),
 ('METAL GEAR SOLID V THE PHANTOM PAIN', 4.04437241421749),
 ('Football Manager 2016', 3.9926116104000564),
 ('Rust', 3.9722706380188537),
 ('Football Manager 2011', 3.9238603111176587),
 ('Borderlands The Pre-Sequel', 3.872513944240077),
 ('DARK SOULS II', 3.836881259240815),
 ('The Witcher 3 Wild Hunt', 3.8197574793407907),
 ('H1Z1', 3.8056490063401407),
 ('Arma 3', 3.8025616137917204),
 ('Total War ATTILA', 3.7882676844568706),
 ('Football Manager 2010', 3.778390985167718),
 ('Dying Light', 3.7479099448082973),
 ('The Binding of Isaac Rebirth', 3.740433607136139)]

In [32]:
user_df.loc[user_df['user'] == 5250] 

Unnamed: 0,user,game,hrs,rating
0,5250,Alien Swarm,294.0,4
1,5250,Cities Skylines,8640.0,5
2,5250,Counter-Strike,0.0,1
3,5250,Counter-Strike Source,0.0,1
4,5250,Day of Defeat,0.0,1
5,5250,Deathmatch Classic,0.0,1
6,5250,Deus Ex Human Revolution,3720.0,5
7,5250,Dota 2,12.0,2
8,5250,Half-Life,0.0,1
9,5250,Half-Life 2,0.0,1


In [33]:
getGameRecommendationsForUser(5250, recommendations)

[('Grand Theft Auto V', 4.258866821322246),
 ('Football Manager 2014', 4.241820778884217),
 ('Football Manager 2012', 4.235056110374306),
 ('Football Manager 2015', 4.205614195201529),
 ('Football Manager 2013', 4.191701792075964),
 ('METAL GEAR SOLID V THE PHANTOM PAIN', 4.140595144542255),
 ('Fallout 4', 4.139786579531251),
 ('Football Manager 2016', 4.099174035044261),
 ("Sid Meier's Civilization V", 3.9858402571660627),
 ('Borderlands The Pre-Sequel', 3.977296623687224),
 ('Arma 3', 3.9250226774151606),
 ('Football Manager 2010', 3.8877306807745566),
 ('The Elder Scrolls V Skyrim', 3.8641716188657664),
 ('Total War ATTILA', 3.8634008387656262),
 ('DARK SOULS II', 3.86105239935986),
 ('The Witcher 3 Wild Hunt', 3.8562372339245714),
 ('Rust', 3.8241981641155656),
 ('Football Manager 2011', 3.8199251032890458),
 ('Starbound', 3.7538341446150842),
 ('Total War SHOGUN 2', 3.750805677711296)]

In [35]:
# store recommendations in CSV file
pd.DataFrame(recommendations).to_csv("data/output/SVD-Rec.csv")