In [None]:
# Import libraries and dependencies
import numpy as np
import scipy
import pandas as pd
import math
import random
import sklearn
from nltk.corpus import stopwords
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [None]:
# Read games and users data
df_games = pd.read_csv('games.csv', low_memory=False)
df_users_ratings = pd.read_csv('user.csv', low_memory=False)

In [None]:
# Inspect games dataset
df_games.head()

Unnamed: 0,id_game,title,genre,price
0,1,Terraria(2011),Open World Survival Craft,$9.99
1,2,Portal 2(2011),Platformer,$9.99
2,3,Stardew Valley(2016),Farming Sim,$14.99
3,4,Hades(2020),Action Roguelike,$24.99
4,5,Left 4 Dead 2(2009),Zombies,$9.99


In [None]:
# Remame some columns to follow PEP 8 convention
df_games.rename(columns={'id_game': 'gameId'}, inplace=True)
df_games.head()

Unnamed: 0,gameId,title,genre,price
0,1,Terraria(2011),Open World Survival Craft,$9.99
1,2,Portal 2(2011),Platformer,$9.99
2,3,Stardew Valley(2016),Farming Sim,$14.99
3,4,Hades(2020),Action Roguelike,$24.99
4,5,Left 4 Dead 2(2009),Zombies,$9.99


In [None]:
# Inspect users dataset
df_users_ratings.head()

Unnamed: 0,UserID,GameID,rating;
0,1,147,1.9;
1,1,198,5.0;
2,1,137,9.0;
3,1,197,5.1;
4,2,9,7.4;


In [None]:
# Remame some colums to follow PEP 8 convention
df_users_ratings.rename(columns={'rating;': 'rating', 'UserID': 'userId', 'GameID': 'gameId'}, inplace=True)
df_users_ratings.head(10)

Unnamed: 0,userId,gameId,rating
0,1,147,1.9;
1,1,198,5.0;
2,1,137,9.0;
3,1,197,5.1;
4,2,9,7.4;
5,2,26,1.5;
6,2,221,2.2;
7,2,6,1.4;
8,2,15,3.8;
9,2,205,2.6;


In [None]:
# Clean up rating data to ensure that all data is valid
def clean_reatings(rating):
  if rating and type(rating) == str:
    rating = rating.replace(';', '')
    if rating.count('.') > 1 and rating[-1] == '.':
      return rating[:-1]
    elif float(rating) > 10:
      return np.nan
    else:
      return rating
  return rating

df_users_ratings['rating'] = df_users_ratings['rating'].apply(lambda x: clean_reatings(x)).apply(pd.to_numeric)
df_users_ratings.head()

Unnamed: 0,userId,gameId,rating
0,1,147,1.9
1,1,198,5.0
2,1,137,9.0
3,1,197,5.1
4,2,9,7.4


In [None]:
df_users_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5470 entries, 0 to 5469
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   userId  5470 non-null   int64  
 1   gameId  5470 non-null   int64  
 2   rating  5468 non-null   float64
dtypes: float64(1), int64(2)
memory usage: 128.3 KB


In [None]:
# Replace null values
df_users_ratings['rating'].fillna(df_users_ratings['rating'].mean(), inplace=True)
df_users_ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5470 entries, 0 to 5469
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   userId  5470 non-null   int64  
 1   gameId  5470 non-null   int64  
 2   rating  5470 non-null   float64
dtypes: float64(1), int64(2)
memory usage: 128.3 KB


In [None]:
df_game_features = df_users_ratings.pivot(index='userId', columns='gameId', values='rating').fillna(0)
df_game_features #making tables of pivots in order to represent it in clear manner 

gameId,1,2,3,4,5,6,7,8,9,10,...,241,242,243,244,245,246,247,248,249,250
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,1.4,0.0,0.0,7.4,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.6,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
996,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.8,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,9.7,0.0,0.0,0.0,0.0


In [None]:
from scipy.sparse.linalg import svds #making svd decomposition

R = df_game_features.values
user_ratings_mean = np.mean(R, axis = 1)
R_demeaned = R - user_ratings_mean.reshape(-1, 1)

U, sigma, Vt = svds(R_demeaned)

sigma = np.diag(sigma)
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

preds_df = pd.DataFrame(all_user_predicted_ratings, columns = df_game_features.columns)
preds_df.head()

gameId,1,2,3,4,5,6,7,8,9,10,...,241,242,243,244,245,246,247,248,249,250
0,-0.058195,-0.431758,0.168543,0.129824,0.190241,0.399951,-0.002122,-0.390043,0.17085,-0.158296,...,-0.213516,-0.584701,0.015638,0.254824,-0.064793,-0.142608,-0.290546,-0.011887,-0.194547,0.20554
1,0.43678,0.131685,-0.017688,0.106477,0.158686,0.216764,0.205381,-0.138775,0.538218,0.097861,...,-0.122015,0.513184,0.136241,0.01319,0.331344,0.046445,0.46078,0.262083,0.112885,-0.07382
2,0.043847,-0.271463,-0.43157,-0.352519,0.333072,0.350666,-0.084835,-0.190006,0.01045,0.046839,...,-0.440283,-0.106615,0.246612,0.06719,-0.232152,-0.370552,0.352844,0.26584,-0.187915,-0.05113
3,0.108481,0.067257,0.416706,-0.618633,-0.245834,0.088023,0.878277,0.449493,0.09857,-0.13753,...,0.458423,0.165457,-0.01551,-0.745709,-0.304622,1.433312,0.762798,-0.169537,0.383429,-0.072087
4,-0.454799,0.330851,-0.246671,-0.949421,-0.789058,1.044818,-0.020353,-0.319657,0.474527,0.130833,...,-0.168582,-0.363804,0.082602,-0.721571,0.159671,-0.924701,-0.344492,0.554493,0.218807,0.518156


In [None]:
def recommend_games(preds_df, userId, games_df, original_ratings_df, num_recommendations=5): #default we print top 5 recomndations
  user_row_number = userId - 1 
  sorted_user_predictions = preds_df.iloc[user_row_number].sort_values(ascending=False)
  user_data = original_ratings_df[original_ratings_df.userId == (userId)]
  user_full = (user_data.merge(games_df, how = 'left', left_on = 'gameId', right_on = 'gameId').
                    sort_values(['rating'], ascending=False)
                )
  recommendations = (games_df[~games_df['gameId'].isin(user_full['gameId'])]).merge(
      pd.DataFrame(sorted_user_predictions).reset_index(),
      how = 'left', left_on = 'gameId', right_on = 'gameId'
      ).rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :-1]
                    

  return user_full, recommendations



In [None]:
userID = 2 #for whom we predict
recNumber = 5 #how much predictions to show

already_rated, predictions = recommend_games(preds_df, userID, df_games, df_users_ratings, recNumber)
predictions.head(11)

Unnamed: 0,gameId,title,genre,price
15,19,Bloons TD 6(2018),Tower Defense,$9.99
46,52,Inscryption(2021),Card Battler,$19.99
234,242,Universe Sandbox(2015),Sandbox,$29.99
89,95,The Test(2020),Multiple Endings,$1.99
75,81,Outlast(2013),Horror,$19.99
