### Import Libraries

In [1]:
import warnings
warnings.filterwarnings("ignore")
from surprise import Dataset
from surprise import Reader
from surprise.model_selection.split import train_test_split
import pandas as pd
import numpy as np
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore
from surprise import accuracy

### Loading Data

In [2]:
user_df  = pd.read_csv('data/cleaned/purchase_play_rating.csv', usecols= ['user', 'game', 'hrs', 'rating'])# read csv into ratings_df dataframe
ratings_df = user_df.drop('hrs', axis = 1)
ratings_df.head(5)

Unnamed: 0,user,game,rating
0,5250,Alien Swarm,4
1,5250,Cities Skylines,5
2,5250,Counter-Strike,1
3,5250,Counter-Strike Source,1
4,5250,Day of Defeat,1


### Item based Collaborating Filtering (KNNWithZScore)

#### Reading a file

In [3]:
reader = Reader(rating_scale=(1,5))  #invoke reader instance of surprise library
data=Dataset.load_from_df(ratings_df,reader) #load dataset into Surprise datastructure Dataset

#### Split into test and train

In [4]:
#create training set
trainingSet, testSet = train_test_split(data, test_size=0.2, train_size=None, random_state=None, shuffle=True)

#### Finding model with best parameters

In [5]:
#lets configure some parameters for Collaborative Filtering Algorithm
sim_options = {
    'name': 'cosine', #similarity measure default is MSD
    'user_based': False #user-based CF
}
#Other options:
#For item-based CF ->False
#For name ->pearson,cosine,msd,pearson_baseline

In [16]:
#KNN
knn = KNNWithZScore(sim_options=sim_options,k=12,min_k=5) #neighbours=3, other parameters set as above
knn.fit(trainingSet) #fit model to the training set
predictions_knn = knn.test(testSet) #predict for test set values

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [17]:
#validating rating predictions using RMSE
accuracy.rmse(predictions_knn, verbose=True) 

RMSE: 1.1508


1.1508498011065447

In [18]:
# for each user-item combination in the test set we get predictions
predictions_knn

[Prediction(uid=124651043, iid='Spore', r_ui=4.0, est=3.9523961803638707, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=34919318, iid='Bionic Commando Rearmed', r_ui=1.0, est=1.2326642096017937, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=94234855, iid='RACE 07 - Formula RaceRoom Add-On', r_ui=1.0, est=1, details={'actual_k': 4, 'was_impossible': False}),
 Prediction(uid=8776918, iid='Fallout 4', r_ui=5.0, est=4.410604393339748, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=138941587, iid='Sugar Cube Bittersweet Factory', r_ui=3.0, est=1.4813539523024373, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=26762388, iid='Skyrim High Resolution Texture Pack', r_ui=1.0, est=1.4152152931967374, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=266479412, iid='Rise of Incarnates', r_ui=1.0, est=1.1949909872608233, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=64973908

#### Training model on complete dataset

In [19]:
trainset = data.build_full_trainset()
knn.fit(data.build_full_trainset())

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithZScore at 0x22b1d253b08>

In [20]:
# Find missing values and predict
anti_test_set = trainset.build_anti_testset() 

In [22]:
predictions = knn.test(anti_test_set)

In [23]:
predictions

[Prediction(uid=5250, iid='Age of Empires II HD Edition', r_ui=2.3571705847644484, est=3.080112253542746, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2', r_ui=2.3571705847644484, est=1.6745402796892987, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2 Operation Arrowhead', r_ui=2.3571705847644484, est=2.425152748075539, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=5250, iid='Arma 2 Operation Arrowhead Beta (Obsolete)', r_ui=2.3571705847644484, est=1, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=5250, iid='Banished', r_ui=2.3571705847644484, est=3.4783210581077584, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Black Ops', r_ui=2.3571705847644484, est=3.160722904833292, details={'actual_k': 12, 'was_impossible': False}),
 Prediction(uid=5250, iid='Call of Duty Black Ops - Multiplayer', r_ui=2.3571705847644484, est=2.99846

### Making recommendations for unknown ratings

In [24]:
from collections import defaultdict

def getGameRecommendations(topN=10):
    top_recs = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions: 
        top_recs[uid].append((iid, est))
     
    for uid, user_ratings in top_recs.items():
        user_ratings.sort(key = lambda x: x[1], reverse = True)
        top_recs[uid] = user_ratings[:topN]
     
    return top_recs 

def getGameRecommendationsForUser(userId, recommendations):
    if userId not in recommendations:
        print("User id is not present")
        return
    recommended_games = recommendations[userId]
    return recommended_games 

In [25]:
recommendations = getGameRecommendations(10)

In [26]:
user_df.loc[user_df['user'] == 76767] 

Unnamed: 0,user,game,hrs,rating
21,76767,Age of Empires II HD Edition,786.0,4
22,76767,Alien Swarm,48.0,2
23,76767,Arma 2,0.0,1
24,76767,Arma 2 Operation Arrowhead,0.0,1
25,76767,Arma 2 Operation Arrowhead Beta (Obsolete),0.0,1
26,76767,Banished,1440.0,5
27,76767,Call of Duty Black Ops,1320.0,5
28,76767,Call of Duty Black Ops - Multiplayer,750.0,4
29,76767,Call of Duty Modern Warfare 2,3900.0,5
30,76767,Call of Duty Modern Warfare 2 - Multiplayer,9900.0,5


In [27]:
getGameRecommendationsForUser(76767, recommendations)

[('Hidden Object Bundle 4 in 1', 5),
 ('Ironcast', 5),
 ('Football Manager 2010', 5),
 ('Football Manager 2011', 5),
 ('Music Creator 6 Touch', 5),
 ('Aion Collectors Edition', 5),
 ('From The Depths', 5),
 ('Worldwide Soccer Manager 2009', 5),
 ('FIFA Manager 11', 5),
 ('FIFA Manager 09', 5)]

In [28]:
user_df.loc[user_df['user'] == 103360] 

Unnamed: 0,user,game,hrs,rating
139,103360,Counter-Strike,0.0,1
140,103360,Counter-Strike Condition Zero,0.0,1
141,103360,Counter-Strike Condition Zero Deleted Scenes,0.0,1
142,103360,Day of Defeat,0.0,1
143,103360,Deathmatch Classic,0.0,1
144,103360,Half-Life,0.0,1
145,103360,Half-Life Blue Shift,0.0,1
146,103360,Half-Life Opposing Force,0.0,1
147,103360,Ricochet,0.0,1
148,103360,Team Fortress Classic,0.0,1


In [29]:
getGameRecommendationsForUser(103360, recommendations)

[('Ironcast', 5),
 ('Aion Collectors Edition', 5),
 ('From The Depths', 5),
 ('Distant Worlds Universe', 5),
 ('Worldwide Soccer Manager 2009', 5),
 ('The Repopulation', 5),
 ('FIFA Manager 11', 5),
 ('FIFA Manager 09', 5),
 ('Anno 2205', 5),
 ('Emergency 5 - Deluxe Edition', 5)]

In [30]:
user_df.loc[user_df['user'] == 5250] 

Unnamed: 0,user,game,hrs,rating
0,5250,Alien Swarm,294.0,4
1,5250,Cities Skylines,8640.0,5
2,5250,Counter-Strike,0.0,1
3,5250,Counter-Strike Source,0.0,1
4,5250,Day of Defeat,0.0,1
5,5250,Deathmatch Classic,0.0,1
6,5250,Deus Ex Human Revolution,3720.0,5
7,5250,Dota 2,12.0,2
8,5250,Half-Life,0.0,1
9,5250,Half-Life 2,0.0,1


In [31]:
getGameRecommendationsForUser(5250, recommendations)

[('FIFA Manager 11', 5),
 ('Emergency 5 - Deluxe Edition', 5),
 ('Farming Giant', 5),
 ('Stargate Resistance', 5),
 ('Action! - Gameplay Recording and Streaming', 5),
 ('Emergency 2012', 5),
 ('Cloud Chamber', 5),
 ('Build-A-Lot 4', 5),
 ('Comanche 4', 5),
 ('Hotel Giant 2', 5)]

In [32]:
# store recommendations in CSV file
pd.DataFrame(recommendations).to_csv("data/output/CF-Item-Rec.csv")