### Import Libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")
from surprise import Dataset
from surprise import Reader
from surprise.model_selection.split import train_test_split
import pandas as pd
import numpy as np
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore
from surprise import accuracy

### Loading Data

In [2]:
user_df  = pd.read_csv('data/cleaned/purchase_play_rating.csv', usecols= ['user', 'game', 'hrs', 'rating'])# read csv into ratings_df dataframe
ratings_df = user_df.drop('hrs', axis = 1)
ratings_df.head(5)

Unnamed: 0,user,game,rating
0,5250,Alien Swarm,4
1,5250,Cities Skylines,5
2,5250,Counter-Strike,1
3,5250,Counter-Strike Source,1
4,5250,Day of Defeat,1


### User based Collaborative Filtering (KNNWithZScore)

In [3]:
reader = Reader(rating_scale=(1,5))  #invoke reader instance of surprise library
data=Dataset.load_from_df(ratings_df,reader) #load dataset into Surprise datastructure Dataset

In [4]:
#create training set
trainingSet, testSet = train_test_split(data, test_size=0.2, train_size=None, random_state=None, shuffle=True)

In [51]:
#lets configure some parameters for Collaborative Filtering Algorithm
sim_options = {
    'name': 'cosine', #similarity measure default is MSD
    'user_based': True #user-based CF
}
#Other options:
#For item-based CF ->False
#For name ->pearson,cosine,msd,pearson_baseline

In [52]:
#KNN
knn = KNNBasic(sim_options=sim_options,k=5,min_k=2) #neighbours=3, other parameters set as above
knn.fit(trainingSet) #fit model to the training set
predictions_knn = knn.test(testSet) #predict for test set values

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [53]:
#validating rating predictions using RMSE
accuracy.rmse(predictions_knn, verbose=True) 

RMSE: 1.2762


1.2762133555190116

In [54]:
# for each user-item combination in the test set we get predictions
predictions_knn

[Prediction(uid=48149238, iid='Warhammer 40,000 Dawn of War II', r_ui=5.0, est=3.2, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=299153, iid='Half-Life Opposing Force', r_ui=1.0, est=1, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=29873979, iid='Arma 3', r_ui=4.0, est=2.4, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=116617462, iid='Grand Theft Auto San Andreas', r_ui=4.0, est=2.6, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=155624237, iid='RIFT', r_ui=1.0, est=3.6, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=53245953, iid='Bastion', r_ui=3.0, est=2.2, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=10216256, iid='Half-Life 2 Episode One', r_ui=1.0, est=1, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=136866564, iid='Toribash', r_ui=1.0, est=1.4, details={'actual_k': 5, 'was_impossible': False}),
 Prediction(uid=42005897, iid='Elite Dan

### Training model on complete dataset

In [5]:
#lets configure some parameters for Collaborative Filtering Algorithm
sim_options = {
    'name': 'cosine', #similarity measure default is MSD
    'user_based': True #user-based CF
}

In [6]:
#KNN
knn_zscore = KNNWithZScore(sim_options=sim_options,k=10,min_k=2) #neighbours=3, other parameters set as above
knn_zscore.fit(trainingSet) #fit model to the training set
predictions_knn = knn_zscore.test(testSet) #predict for test set values

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [7]:
#validating rating predictions using RMSE
accuracy.rmse(predictions_knn, verbose=True) 

RMSE: 1.2245


1.2244640260864044

In [8]:
trainset = data.build_full_trainset()
knn_zscore.fit(data.build_full_trainset())

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithZScore at 0x1d9c67fc8c8>

In [9]:
# Find missing values and predict
anti_test_set = trainset.build_anti_testset()

In [10]:
predictions = knn_zscore.test(anti_test_set)

In [11]:
predictions

[Prediction(uid=5250, iid='Age of Empires II HD Edition', r_ui=2.3571705847644484, est=1.6731691672682973, details={'actual_k': 10, 'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2', r_ui=2.3571705847644484, est=1.2902138314729015, details={'actual_k': 10, 'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2 Operation Arrowhead', r_ui=2.3571705847644484, est=2.238585704863077, details={'actual_k': 10, 'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2 Operation Arrowhead Beta (Obsolete)', r_ui=2.3571705847644484, est=1, details={'actual_k': 10, 'was_impossible': False}),
 Prediction(uid=5250, iid='Banished', r_ui=2.3571705847644484, est=2.74473792601751, details={'actual_k': 10, 'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Black Ops', r_ui=2.3571705847644484, est=1.5431685282670164, details={'actual_k': 10, 'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Black Ops - Multiplayer', r_ui=2.3571705847644484, est=1.93868

### Making recommendations for unknown ratings

In [12]:
from collections import defaultdict

def getGameRecommendations(topN=10):
    top_recs = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions: 
        top_recs[uid].append((iid, est))
     
    for uid, user_ratings in top_recs.items():
        user_ratings.sort(key = lambda x: x[1], reverse = True)
        top_recs[uid] = user_ratings[:topN]
     
    return top_recs 

def getGameRecommendationsForUser(userId, recommendations):
    if userId not in recommendations:
        print("User id is not present")
        return
    recommended_games = recommendations[userId]
    return recommended_games 

In [13]:
recommendations = getGameRecommendations(10)

In [14]:
# Games rated by user 76767
user_df.loc[user_df['user'] == 76767] 

Unnamed: 0,user,game,hrs,rating
21,76767,Age of Empires II HD Edition,786.0,4
22,76767,Alien Swarm,48.0,2
23,76767,Arma 2,0.0,1
24,76767,Arma 2 Operation Arrowhead,0.0,1
25,76767,Arma 2 Operation Arrowhead Beta (Obsolete),0.0,1
26,76767,Banished,1440.0,5
27,76767,Call of Duty Black Ops,1320.0,5
28,76767,Call of Duty Black Ops - Multiplayer,750.0,4
29,76767,Call of Duty Modern Warfare 2,3900.0,5
30,76767,Call of Duty Modern Warfare 2 - Multiplayer,9900.0,5


In [15]:
# Recommended games to user 76767
getGameRecommendationsForUser(76767, recommendations)

[('The Elder Scrolls V Skyrim', 5),
 ("Sid Meier's Civilization Beyond Earth", 5),
 ('Infinifactory', 5),
 ('Rampage Knights', 5),
 ('Pro Evolution Soccer 2014', 5),
 ('F1 2015', 5),
 ('Quantum of Solace', 5),
 ('NBA 2K14', 5),
 ('Robin Hood', 5),
 ('From The Depths', 5)]

In [16]:
# Games rated by user 103360
user_df.loc[user_df['user'] == 103360] 

Unnamed: 0,user,game,hrs,rating
139,103360,Counter-Strike,0.0,1
140,103360,Counter-Strike Condition Zero,0.0,1
141,103360,Counter-Strike Condition Zero Deleted Scenes,0.0,1
142,103360,Day of Defeat,0.0,1
143,103360,Deathmatch Classic,0.0,1
144,103360,Half-Life,0.0,1
145,103360,Half-Life Blue Shift,0.0,1
146,103360,Half-Life Opposing Force,0.0,1
147,103360,Ricochet,0.0,1
148,103360,Team Fortress Classic,0.0,1


In [17]:
# Recommended games to user 103360
getGameRecommendationsForUser(103360, recommendations)

[('Train Fever', 4.822390850496379),
 ('Trials Fusion', 4.6626852146848705),
 ('Grim Legends The Forsaken Bride', 4.035791419071309),
 ('The Elder Scrolls V Skyrim', 3.9836514920102624),
 ('Pirates of Black Cove', 3.8190692183605277),
 ('Fallout 4', 3.8086816982920433),
 ('Test Drive Unlimited 2', 3.7213875317135927),
 ('Rebel Galaxy', 3.619072712560264),
 ('Offworld Trading Company', 3.590907896194164),
 ('Horizon', 3.532348088657562)]

In [18]:
# Games rated by user 5250
user_df.loc[user_df['user'] == 5250] 

Unnamed: 0,user,game,hrs,rating
0,5250,Alien Swarm,294.0,4
1,5250,Cities Skylines,8640.0,5
2,5250,Counter-Strike,0.0,1
3,5250,Counter-Strike Source,0.0,1
4,5250,Day of Defeat,0.0,1
5,5250,Deathmatch Classic,0.0,1
6,5250,Deus Ex Human Revolution,3720.0,5
7,5250,Dota 2,12.0,2
8,5250,Half-Life,0.0,1
9,5250,Half-Life 2,0.0,1


In [19]:
# Recommended games to user 5250
getGameRecommendationsForUser(5250, recommendations)

[('Tales of Zestiria', 4.771001522919768),
 ('Pro Evolution Soccer 2014', 4.224953768887625),
 ("Anna's Quest", 4.115865173123142),
 ('Action! - Gameplay Recording and Streaming', 4.070678726199446),
 ('The Repopulation', 4.037899428483882),
 ('Quantum of Solace', 3.980340757764984),
 ('4 Elements', 3.8339624482543257),
 ('F1 2015', 3.8241215186028104),
 ('From The Depths', 3.8065917141122334),
 ('Infinifactory', 3.8003879305794155)]

In [20]:
# store recommendations in CSV file
pd.DataFrame(recommendations).to_csv("data/output/CF-User-Rec.csv")