In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
import seaborn as sns
import warnings
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split
import pyarrow.parquet as pq
from surprise import SVD
from surprise import accuracy
import pickle


ModuleNotFoundError: No module named 'surprise'

In [4]:
warnings.filterwarnings('ignore')

In [5]:
games = pd.read_parquet('games.parquet')

reviews = pd.read_parquet('reviews.parquet')


In [6]:
reviews.head(2)

Unnamed: 0,index,user_id,item_id,helpful,recommend,año,sentiment_analysis
0,0,76561197970982479,1250,No ratings yet,True,2011,2
1,0,76561197970982479,22200,No ratings yet,True,2011,2


In [7]:
games.head(2)


Unnamed: 0,app_name,tags,price,id,developer,release_year,Accounting,Action,Adventure,Animation &amp; Modeling,...,Photo Editing,RPG,Racing,Simulation,Software Training,Sports,Strategy,Utilities,Video Production,Web Publishing
0,Lost Summoner Kitty,"['Strategy', 'Action', 'Indie', 'Casual', 'Sim...",4.99,761140,Kotoshiro,2018,0,1,0,0,...,0,0,0,1,0,0,1,0,0,0
1,Ironbound,"['Free to Play', 'Strategy', 'Indie', 'RPG', '...",0.0,643980,Secret Level SRL,2018,0,0,0,0,...,0,1,0,0,0,0,1,0,0,0


In [8]:
int_values = games[games['id'].apply(lambda x: isinstance(x, int))]


In [9]:
merged_df = pd.merge(reviews[['item_id', 'user_id', 'sentiment_analysis',"recommend"]], games[['app_name',"id"]], left_on='item_id', right_on="id")

In [10]:
merged_df

Unnamed: 0,item_id,user_id,sentiment_analysis,recommend,app_name,id
0,1250,76561197970982479,2,True,Killing Floor,1250
1,22200,76561197970982479,2,True,Zeno Clash,22200
2,227300,js41637,2,True,Euro Truck Simulator 2,227300
3,239030,js41637,2,True,"Papers, Please",239030
4,370360,evcentric,2,True,TIS-100,370360
...,...,...,...,...,...,...
44734,730,76561198239215706,2,True,Counter-Strike: Global Offensive,730
44735,730,wayfeng,1,True,Counter-Strike: Global Offensive,730
44736,253980,76561198251004808,2,True,Enclave,253980
44737,730,72947282842,0,True,Counter-Strike: Global Offensive,730


In [11]:
merged_df['recommend'] = merged_df['recommend'].replace({True: 1, False: 0})

In [12]:
merged_df.head(3)

Unnamed: 0,item_id,user_id,sentiment_analysis,recommend,app_name,id
0,1250,76561197970982479,2,1,Killing Floor,1250
1,22200,76561197970982479,2,1,Zeno Clash,22200
2,227300,js41637,2,1,Euro Truck Simulator 2,227300


In [13]:
for index, row in merged_df.iterrows():
    if (row[2] == 2) and (row[3] == 1):
        merged_df.at[index, 'rating'] = 5
    elif (row[2] == 2) and (row[3] == 0):
        merged_df.at[index, 'rating'] = 4
    elif (row[2] == 1) and (row[3] == 1):
        merged_df.at[index, 'rating'] = 3
    elif (row[2] == 1) and (row[3] == 0):
        merged_df.at[index, 'rating'] = 2
    elif (row[2] == 0) and (row[3] == 1):
        merged_df.at[index, 'rating'] = 1
    elif (row[2] == 0) and (row[3] == 0):
        merged_df.at[index, 'rating'] = 0

In [14]:
merged_df

Unnamed: 0,item_id,user_id,sentiment_analysis,recommend,app_name,id,rating
0,1250,76561197970982479,2,1,Killing Floor,1250,5.0
1,22200,76561197970982479,2,1,Zeno Clash,22200,5.0
2,227300,js41637,2,1,Euro Truck Simulator 2,227300,5.0
3,239030,js41637,2,1,"Papers, Please",239030,5.0
4,370360,evcentric,2,1,TIS-100,370360,5.0
...,...,...,...,...,...,...,...
44734,730,76561198239215706,2,1,Counter-Strike: Global Offensive,730,5.0
44735,730,wayfeng,1,1,Counter-Strike: Global Offensive,730,3.0
44736,253980,76561198251004808,2,1,Enclave,253980,5.0
44737,730,72947282842,0,1,Counter-Strike: Global Offensive,730,1.0


In [15]:
merged_df.to_parquet('model.parquet')

In [17]:
model_df = pd.read_parquet('model.parquet')

In [18]:
model_df

Unnamed: 0,item_id,user_id,sentiment_analysis,recommend,app_name,id,rating
0,1250,76561197970982479,2,1,Killing Floor,1250,5.0
1,22200,76561197970982479,2,1,Zeno Clash,22200,5.0
2,227300,js41637,2,1,Euro Truck Simulator 2,227300,5.0
3,239030,js41637,2,1,"Papers, Please",239030,5.0
4,370360,evcentric,2,1,TIS-100,370360,5.0
...,...,...,...,...,...,...,...
44734,730,76561198239215706,2,1,Counter-Strike: Global Offensive,730,5.0
44735,730,wayfeng,1,1,Counter-Strike: Global Offensive,730,3.0
44736,253980,76561198251004808,2,1,Enclave,253980,5.0
44737,730,72947282842,0,1,Counter-Strike: Global Offensive,730,1.0


In [19]:
reader = Reader(rating_scale = (0, 5))

In [21]:
data = Dataset.load_from_df(model_df[['user_id', 'app_name', 'rating']], reader)

In [22]:
trainset, testset = train_test_split(data, test_size=0.2, random_state= 42)

In [23]:
from surprise.model_selection import GridSearchCV


param_grid = {'n_factors': [5,50,100], 'n_epochs': [5,10,20], 'lr_all': [0.001, 0.002, 0.005], 'reg_all': {0.002, 0.02, 0.2}}
gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv = 5, n_jobs= -1)
gs.fit(data)


In [25]:
print(gs.best_score['rmse'])
print(gs.best_params['rmse'])

1.5493957194442203
{'n_factors': 100, 'n_epochs': 20, 'lr_all': 0.005, 'reg_all': 0.2}


In [26]:
model = SVD(n_factors=100, n_epochs=20, lr_all=0.005, reg_all=0.2)
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x18fb360fad0>

In [31]:
with open ('model.pkl', 'wb') as archivo:
    pickle.dump(model, archivo)

In [32]:
with open ('model.pkl', 'rb') as archivo:
    model = pickle.load(archivo)

In [33]:
print(model)

<surprise.prediction_algorithms.matrix_factorization.SVD object at 0x0000018FB35FE510>


In [35]:
def user_recomendations(id_usuario):
    lista = list()
    if id_usuario not in model_df['user_id'].unique():
        return {'error': 'el usuario no existe.'}
    
    games_val = model_df[model_df['user_id'] == id_usuario]['app_name'].unique()

    all_games = model_df['app_name'].unique()

    games_no_val = list(set(all_games) - set(games_val))

    predictions= [model.predict(id_usuario,game)for game in games_no_val]

    recomendations = sorted(predictions, key=lambda x: x.est, reverse = True) [:5]

    games_recomend =[recomendation.iid for recomendation in recomendations]

    recomendations_dict = {
        "game 1" : games_recomend[0],
        "game 2" : games_recomend[1],
        "game 3" : games_recomend[2],
        "game 4" : games_recomend[3],
        "game 5" : games_recomend[4],
    }
    return recomendations_dict

In [36]:
user_recomendations("inorisanbaka")

{'game 1': 'SMITE®',
 'game 2': 'Rogue Legacy',
 'game 3': 'Ori and the Blind Forest',
 'game 4': "King Arthur's Gold",
 'game 5': 'Gunpoint'}

In [37]:
user_recomendations("diego9031")

{'game 1': 'Remember Me',
 'game 2': 'SMITE®',
 'game 3': 'Ori and the Blind Forest',
 'game 4': 'Dust: An Elysian Tail',
 'game 5': 'Psychonauts'}

In [39]:
user_recomendations("Gamer0009")

{'game 1': 'SMITE®',
 'game 2': 'Rogue Legacy',
 'game 3': 'Ori and the Blind Forest',
 'game 4': "King Arthur's Gold",
 'game 5': 'Gunpoint'}