In [1]:
import pandas as pd
import numpy as np
import scipy as sp
%load_ext autoreload
%autoreload 2
import utils as ut
import operator
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings("ignore")


In [2]:
df_model = pd.read_csv('data/csv/model.csv', encoding='utf-8')

# User Recomendation

In this model we are going to apply the user-item filter, that is, take a user, find similar users and recommend items that those similar users liked. In this case the input is a user and the output is a list of games that are recommended to that user

In [3]:
df_model

Unnamed: 0,item_id,item_name,genres,user_id,rating
0,105600,Terraria,Adventure,76561198066375266,5
1,341190,Hotline Miami 2: Wrong Number Digital Comic,Indie,mattyp500,5
2,98300,Toy Soldiers,Simulation,76561198101278301,5
3,233720,Surgeon Simulator,Indie,BonnieMTD,1
4,252490,Rust,Indie,76561198115944716,5
...,...,...,...,...,...
1999995,105600,Terraria,RPG,76561198073186208,3
1999996,245170,Skullgirls,Indie,76561198011218089,1
1999997,274310,Always Sometimes Monsters,Indie,SemiSalad,5
1999998,109600,Neverwinter,Free to Play,76561198126495215,5


In [4]:
df_mod_user = df_model[['user_id', 'item_name', 'rating']]
df_mod_user.drop_duplicates(inplace=True)
df_mod_user

Unnamed: 0,user_id,item_name,rating
0,76561198066375266,Terraria,5
1,mattyp500,Hotline Miami 2: Wrong Number Digital Comic,5
2,76561198101278301,Toy Soldiers,5
3,BonnieMTD,Surgeon Simulator,1
4,76561198115944716,Rust,5
...,...,...,...
1999988,ceige,Deus Ex: Human Revolution - Director's Cut,1
1999991,gigi_buffon,The Inner World,4
1999993,thesiminerd,PlanetSide 2,5
1999996,76561198011218089,Skullgirls,1


We are going to create a matrix that contains the 'user_id' as indexes, the items names as columns and the 'rating' as values.

In [5]:
u_matrix = df_mod_user.pivot_table(index=['user_id'], columns=['item_name'], values='rating')
u_matrix

item_name,! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,$1 Ride,.EXE,001 Game Creator,0RBITALIS,1 Moment Of Time: Silentville,"1,000 Heads Among the Trees",10 Second Ninja,...,sZone-Online,samurai_jazz,simian.interface++,stratO,the static speaks my name,theBlu,theHunter: Primal,Астролорды: Оружие Пришельцев,侠客风云传(Tale of Wuxia),軒轅劍外傳穹之扉(The Gate of Firmament)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,,,,,,,,,,,...,3.0,,,,,,,,,
--ace--,,,,,,,,,,,...,,,,,,,,,,
--ionex--,,,,,,,,,,,...,,,,,,,,,,
-2SV-vuLB-Kg,,,,,,,,,,,...,,,,,,,,,,
-Beave-,,,,,,,,,,,...,3.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwanzigdrei,,,,,,,,,,,...,,,,,,,,,,
zy0705,,,,,,,,,,,...,,,,,,,,,,
zynxgameth,,,,,,,,,,,...,,,,,,,,,,
zyr0n1c,,,,,,,,,,,...,,,,,,,,,,


In [6]:
random_rows = 1500
u_matrix_sample = u_matrix.sample(n=random_rows, random_state=42)

We are going to normalize the dataframe values.
Normalization adjusts a user's ratings,  they are centered on zero and scaled based on their variability.

In [7]:
scaler = MinMaxScaler()
umatrix_norm = pd.DataFrame(scaler.fit_transform(u_matrix_sample), columns=u_matrix_sample.columns, index=u_matrix_sample.index)

# The columns that contain only zero or have no rating are deleted, the gaps are filled with 0.
umatrix_norm.fillna(0, inplace=True)
umatrix_norm = umatrix_norm.T
umatrix_norm = umatrix_norm.loc[:, (umatrix_norm != 0).any(axis=0)]
umatrix_norm

user_id,76561198139249899,76561198083594288,anonimux,Terenator,76561198049813970,76561198076388843,itscazadude,joshodonnell,76561198052461539,76561198055579125,...,Solonx,leonmfps,weMAD,76561197993412979,76561198143242417,ShanShanthePanMan,76561198139928409,76561198055632563,dieto2001,Customurl1
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.EXE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Астролорды: Оружие Пришельцев,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We are converting umatrix_norm to a sparse matrix format to reduce the memory used and improve efficiency in handling large data sets, especially when most of the values ​​in the matrix are zeros. Then we normalize the sparse matrix too.

In [8]:
um_sparse = sp.sparse.csr_matrix(umatrix_norm.values)
um_sparse

<7336x1179 sparse matrix of type '<class 'numpy.float64'>'
	with 57147 stored elements in Compressed Sparse Row format>

In [9]:
um_sparse_normalized = normalize(um_sparse)

Now, we aply the cosine similarity model to the transposed normalized sparse matriz

In [10]:
user_similarity = cosine_similarity(um_sparse_normalized.T)

Now, we are going to save the model matrix in a dataframe in order to use it in our finctuion

In [11]:
user_sim_df = pd.DataFrame(user_similarity, index = umatrix_norm.columns, columns = umatrix_norm.columns)

In [12]:
user_sim_df

user_id,76561198139249899,76561198083594288,anonimux,Terenator,76561198049813970,76561198076388843,itscazadude,joshodonnell,76561198052461539,76561198055579125,...,Solonx,leonmfps,weMAD,76561197993412979,76561198143242417,ShanShanthePanMan,76561198139928409,76561198055632563,dieto2001,Customurl1
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561198139249899,1.000000,0.013572,0.004476,0.010586,0.014333,0.016283,0.023400,0.000000,0.031463,0.003640,...,0.029357,0.004099,0.002820,0.002115,0.020043,0.009961,0.008561,0.015232,0.012206,0.028837
76561198083594288,0.013572,1.000000,0.011969,0.024101,0.024831,0.022619,0.031256,0.017072,0.015832,0.018974,...,0.048278,0.009376,0.001643,0.030873,0.011016,0.041519,0.004986,0.020435,0.049298,0.019550
anonimux,0.004476,0.011969,1.000000,0.010194,0.007173,0.001766,0.047221,0.000000,0.030251,0.006073,...,0.009068,0.003039,0.004357,0.008594,0.018672,0.026974,0.005134,0.003451,0.003803,0.093030
Terenator,0.010586,0.024101,0.010194,1.000000,0.030414,0.011702,0.021156,0.031126,0.022476,0.022291,...,0.006582,0.009461,0.009835,0.018290,0.010607,0.017466,0.001840,0.035549,0.011170,0.013795
76561198049813970,0.014333,0.024831,0.007173,0.030414,1.000000,0.037444,0.020190,0.017480,0.045396,0.009603,...,0.025446,0.003612,0.000000,0.135947,0.045257,0.033961,0.000000,0.022837,0.005547,0.027819
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ShanShanthePanMan,0.009961,0.041519,0.026974,0.017466,0.033961,0.023238,0.055814,0.042115,0.037940,0.029127,...,0.093060,0.020961,0.018820,0.013720,0.016492,1.000000,0.003244,0.008399,0.035974,0.040791
76561198139928409,0.008561,0.004986,0.005134,0.001840,0.000000,0.054804,0.000000,0.000000,0.021990,0.001586,...,0.005197,0.001253,0.007613,0.000000,0.000000,0.003244,1.000000,0.001370,0.007274,0.017138
76561198055632563,0.015232,0.020435,0.003451,0.035549,0.022837,0.022220,0.009027,0.091102,0.012642,0.006990,...,0.005977,0.016354,0.034902,0.032880,0.021784,0.008399,0.001370,1.000000,0.005300,0.012957
dieto2001,0.012206,0.049298,0.003803,0.011170,0.005547,0.034342,0.049768,0.002321,0.044600,0.045096,...,0.011662,0.035530,0.027975,0.009011,0.076914,0.035974,0.007274,0.005300,1.000000,0.091036


Create the user-item recomendation function

In [13]:
def similar_user_recs(user: str):
    '''
    Generates a list of the most recommended items for a user, based on ratings from similar users.

    Arguments:
        user (str): The name or identifier of the user for whom you want to generate recommendations.

    Returns:
        list: A list of the most recommended items for the user based on the rating of similar users.

    '''
    # Check if the user is present in the umatrix_norm columns (if not, return a message)
    if user not in umatrix_norm.columns:
        return('No data available on user {}'.format(user))
    
    # Get the users most similar to the given user
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    
    best = []  # List to store the items best rated by similar users
    most_common = {}  # Dictionary to count how many times each item is recommended
    
    # For each similar user, find the highest rated item and add it to the 'best' list
    for i in sim_users:
        max_score = umatrix_norm.loc[:, i].max()
        best.append(umatrix_norm[umatrix_norm.loc[:, i]==max_score].index.tolist())
           
    # Counts how many times each item is recommended
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    
    # Sort items by recommendation frequency in descending order
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    
    # Return 5 most recommend items
    return 'Users who are similar to {}:'.format(user), 'also liked it', sorted_list[:5]

In [14]:
similar_user_recs('Terenator')

('Users who are similar to Terenator:',
 'also liked it',
 [('PAYDAY 2', 9),
  ('Unturned', 7),
  ('Rust', 6),
  ('PlanetSide 2', 6),
  ('ARK: Survival Evolved', 5)])

# Item recomendation

This model model will have an item-item relationship, that is, an item is taken, based on how similar that item is to the rest, similar ones are recommended. Here the input is a game and the output is a list of recommended games

In [15]:
df_mod_game = df_model[['item_id', 'item_name', 'genres']]
df_mod_game.drop_duplicates(inplace=True)
df_mod_game

Unnamed: 0,item_id,item_name,genres
0,105600,Terraria,Adventure
1,341190,Hotline Miami 2: Wrong Number Digital Comic,Indie
2,98300,Toy Soldiers,Simulation
3,233720,Surgeon Simulator,Indie
4,252490,Rust,Indie
...,...,...,...
1993850,378070,Energy Hook,Sports
1994590,275830,7 Wonders: Magical Mystery Tour,Strategy
1996994,367550,Rugby League Team Manager 2015,Sports
1997506,347060,Pro Basketball Manager 2016,Simulation


We create a new column 'genre_tag' with values 1. 

In [16]:
df_mod_game['genre_tag'] = 1 

In [17]:
df_mod_game

Unnamed: 0,item_id,item_name,genres,genre_tag
0,105600,Terraria,Adventure,1
1,341190,Hotline Miami 2: Wrong Number Digital Comic,Indie,1
2,98300,Toy Soldiers,Simulation,1
3,233720,Surgeon Simulator,Indie,1
4,252490,Rust,Indie,1
...,...,...,...,...
1993850,378070,Energy Hook,Sports,1
1994590,275830,7 Wonders: Magical Mystery Tour,Strategy,1
1996994,367550,Rugby League Team Manager 2015,Sports,1
1997506,347060,Pro Basketball Manager 2016,Simulation,1


We create an auxiliar dataframe 'df_id' in order to extract the id_item for our recomendation function

In [18]:
df_id = df_mod_game[['item_id', 'item_name']].drop_duplicates()

In [19]:
df_id

Unnamed: 0,item_id,item_name
0,105600,Terraria
1,341190,Hotline Miami 2: Wrong Number Digital Comic
2,98300,Toy Soldiers
3,233720,Surgeon Simulator
4,252490,Rust
...,...,...
1966757,434790,Planet 1138
1972802,419500,Red Game Without A Great Name
1985267,273800,Forestry 2017 - The Simulation
1997506,347060,Pro Basketball Manager 2016


We are going to create a matrix that contains the 'item_names' as indexes, and 'genres' as columns and the 'genre_tag' as values.

In [20]:
g_matrix = df_mod_game.pivot_table(index='item_name', columns='genres', values='genre_tag', fill_value=0)
g_matrix


genres,Action,Action RPG,Action-Adventure,Adventure,Aliens,Animation & Modeling,Anime,Arcade,Asynchronous Multiplayer,Atmospheric,...,Violent,Visual Novel,Voxel,Walking Simulator,War,Wargame,Web Publishing,Werewolves,Zombies,e-sports
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.EXE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Астролорды: Оружие Пришельцев,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In order to save memory and, without loss of generality, we are going to extract an auxiliary matrix with 60% of the data, randomizing the original matrix

In [21]:
random_rows = 1500
g_matrix_sample = g_matrix.sample(n=random_rows, random_state=42)

In [22]:
g_matrix_sample

genres,Action,Action RPG,Action-Adventure,Adventure,Aliens,Animation & Modeling,Anime,Arcade,Asynchronous Multiplayer,Atmospheric,...,Violent,Visual Novel,Voxel,Walking Simulator,War,Wargame,Web Publishing,Werewolves,Zombies,e-sports
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CABAL Online,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Beyond Divinity,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Closure,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Chronicles of Mystery: The Scorpio Ritual,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Keep Talking and Nobody Explodes,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Dustbowl,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Elements: Epic Heroes,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bloody Trapland,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Last Heroes,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We going to convert g_matrix_sample to a sparse matrix format to reduce the memory used and improve efficiency in handling large data sets, especially when most of the values ​​in the matrix are zeros. 

In [23]:
game_sparse = sp.sparse.csr_matrix(g_matrix_sample.values)

Aply the cosine similarity model to saprse matrix

In [24]:
game_similarity = cosine_similarity(game_sparse)

Save the model matrix in a new dataframe

In [25]:
game_sim_df = pd.DataFrame(game_similarity, index = g_matrix_sample.index, columns = g_matrix_sample.index)
game_sim_df

item_name,CABAL Online,Beyond Divinity,Closure,Chronicles of Mystery: The Scorpio Ritual,Keep Talking and Nobody Explodes,Fiesta Online NA,Adventures of Bertram Fiddle: Episode 1: A Dreadly Business,It's A Wipe!,DinerTown Tycoon,LoveBeat,...,Ragnarok Clicker,Highway to the Moon,Mini Motor Racing EVO,LocoSoccer,iRacing,Dustbowl,Elements: Epic Heroes,Bloody Trapland,Last Heroes,The Escapists: The Walking Dead
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
CABAL Online,1.000000,0.500000,0.000000,0.000000,0.125000,0.866025,0.000000,0.250000,0.00,0.75000,...,0.408248,0.353553,0.000000,0.000000,0.333333,0.353553,0.866025,0.250000,0.250000,0.000000
Beyond Divinity,0.500000,1.000000,0.000000,0.000000,0.000000,0.577350,0.000000,0.500000,0.00,0.00000,...,0.408248,0.000000,0.000000,0.000000,0.000000,0.707107,0.577350,0.000000,0.500000,0.000000
Closure,0.000000,0.000000,1.000000,0.000000,0.250000,0.000000,0.577350,0.500000,0.00,0.00000,...,0.000000,0.707107,0.577350,0.577350,0.000000,0.000000,0.000000,0.500000,0.500000,0.707107
Chronicles of Mystery: The Scorpio Ritual,0.000000,0.000000,0.000000,1.000000,0.000000,0.000000,0.577350,0.000000,0.00,0.00000,...,0.408248,0.000000,0.000000,0.000000,0.000000,0.707107,0.000000,0.500000,0.500000,0.000000
Keep Talking and Nobody Explodes,0.125000,0.000000,0.250000,0.000000,1.000000,0.000000,0.144338,0.375000,0.25,0.12500,...,0.204124,0.353553,0.144338,0.288675,0.250000,0.000000,0.144338,0.250000,0.125000,0.353553
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Dustbowl,0.353553,0.707107,0.000000,0.707107,0.000000,0.408248,0.408248,0.353553,0.00,0.00000,...,0.577350,0.000000,0.000000,0.000000,0.000000,1.000000,0.408248,0.353553,0.707107,0.000000
Elements: Epic Heroes,0.866025,0.577350,0.000000,0.000000,0.144338,0.666667,0.000000,0.288675,0.00,0.57735,...,0.235702,0.408248,0.000000,0.000000,0.192450,0.408248,1.000000,0.288675,0.288675,0.000000
Bloody Trapland,0.250000,0.000000,0.500000,0.500000,0.250000,0.000000,0.866025,0.250000,0.00,0.50000,...,0.408248,0.707107,0.577350,0.288675,0.000000,0.353553,0.288675,1.000000,0.750000,0.353553
Last Heroes,0.250000,0.500000,0.500000,0.500000,0.125000,0.288675,0.866025,0.500000,0.00,0.25000,...,0.612372,0.353553,0.577350,0.288675,0.000000,0.707107,0.288675,0.750000,1.000000,0.353553


Create the item-item recomendatoin function

In [26]:
def get_recommendations_by_id(item_id: int):
    '''
    Generates recommendations for a game given its ID.

    Parameters:
    - item_id (int): The ID of the game for which you want to obtain recommendations.

    Returns:
    - recommendations (list): A list of recommended game names for the given game.
    - message (str): A message indicating if the entered ID has no data available.
    '''

    # Get item name from Id
    game_name = df_id.loc[df_id['item_id'] == item_id, 'item_name'].iloc[0]

    # Check if item exists in the similarity matrix
    if game_name not in game_sim_df.index:
        return [], f"ID {item_id} has not data avalible."

    # Get the row corresponding to the item
    game_row = game_sim_df.loc[game_name]

    # Find similar items by sorting the row
    similar_games = game_sim_df.dot(game_row).sort_values(ascending=False)

    # Remove the item itself from the recommendation list
    similar_games = similar_games.drop(game_name)

    # Take the first 5 games as recommendations and return in list format
    recommendations = similar_games.head(5).index.tolist()

    return 'Recommend similar items to item {}'.format(item_id), recommendations


In [27]:
row = df_id[df_id['item_name'] == 'Beyond Divinity']
row


Unnamed: 0,item_id,item_name
37726,219760,Beyond Divinity


In [28]:
get_recommendations_by_id(219760)

('Recommend similar items to item 219760',
 ['Driftmoon',
  "Candice DeBébé's Incredibly Trick Lifestyle",
  'Nusakana',
  'LISA',
  "A Princess' Tale"])

Now, we save as parquet file the dataframes we need for our API functions

In [29]:
dfs = [umatrix_norm, user_sim_df, df_id, game_sim_df,]
# Nombres correspondientes a cada DataFrame
names = ['umatrix_norm','user_sim', 'df_id', 'game_sim']

ut.save_to_pq(dfs, names)

DataFrame 'umatrix_norm' save as 'data/umatrix_norm.parquet'
DataFrame 'user_sim' save as 'data/user_sim.parquet'
DataFrame 'df_id' save as 'data/df_id.parquet'
DataFrame 'game_sim' save as 'data/game_sim.parquet'
