In [1]:
import pandas as pd
import scipy as sp
%load_ext autoreload
%autoreload 2
import utils as ut
import operator
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings("ignore")

In [2]:
df_model = pd.read_csv('data/csv/model.csv', encoding='utf-8')

# User Recomendation

In this model we are going to apply the user-item filter, that is, take a user, find similar users and recommend items that those similar users liked. In this case the input is a user and the output is a list of games that are recommended to that user

In [3]:
df_model

Unnamed: 0,item_id,item_name,genres,user_id,rating
0,282010,Carmageddon Max Pack,Action,UTNerd24,1
1,282010,Carmageddon Max Pack,Action,I_DID_911_JUST_SAYING,1
2,282010,Carmageddon Max Pack,Action,saint556,1
3,282010,Carmageddon Max Pack,Action,chidvd,5
4,282010,Carmageddon Max Pack,Action,aerpub,1
...,...,...,...,...,...
4335479,80,Counter-Strike: Condition Zero,Action,lachwe,5
4335480,80,Counter-Strike: Condition Zero,Action,danebuchanan,5
4335481,80,Counter-Strike: Condition Zero,Action,K1NGCJS,1
4335482,80,Counter-Strike: Condition Zero,Action,celebrexISGO,5


In [4]:
df_mod_user = df_model[['user_id', 'item_name', 'rating']]
df_mod_user.drop_duplicates(inplace=True)
df_mod_user

Unnamed: 0,user_id,item_name,rating
0,UTNerd24,Carmageddon Max Pack,1
1,I_DID_911_JUST_SAYING,Carmageddon Max Pack,1
2,saint556,Carmageddon Max Pack,1
3,chidvd,Carmageddon Max Pack,5
4,aerpub,Carmageddon Max Pack,1
...,...,...,...
4335479,lachwe,Counter-Strike: Condition Zero,5
4335480,danebuchanan,Counter-Strike: Condition Zero,5
4335481,K1NGCJS,Counter-Strike: Condition Zero,1
4335482,celebrexISGO,Counter-Strike: Condition Zero,5


We are going to create a matrix that contains the 'user_id' as indexes, the items names as columns and the 'rating' as values.

In [5]:
u_matrix = df_mod_user.pivot_table(index=['user_id'], columns=['item_name'], values='rating')
u_matrix

item_name,! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,$1 Ride,.EXE,001 Game Creator,0RBITALIS,1 Moment Of Time: Silentville,"1,000 Heads Among the Trees",10 Minute Tower,...,sZone-Online,samurai_jazz,simian.interface++,stratO,the static speaks my name,theBlu,theHunter: Primal,Астролорды: Оружие Пришельцев,侠客风云传(Tale of Wuxia),軒轅劍外傳穹之扉(The Gate of Firmament)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,,,,,,,,,,,...,3.0,,,,,,,,,
--ace--,,,,,,,,,,,...,,,,,,,,,,
--ionex--,,,,,,,,,,,...,,,,,,,,,,
-2SV-vuLB-Kg,,,,,,,,,,,...,,,,,,,,,,
-Beave-,,,,,,,,,,,...,3.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwanzigdrei,,,,,,,,,,,...,,,,,,,,,,
zy0705,,,,,,,,,,,...,,,,,,,,,,
zynxgameth,,,,,,,,,,,...,,,,,,,,,,
zyr0n1c,,,,,,,,,,,...,,,,,,,,,,


In order to save memory, let's work with a sample of the data

In [34]:
random_rows = 5000
u_matrix_sample = u_matrix.sample(n=random_rows, random_state=42)

We are going to normalize the dataframe values.
Normalization adjusts a user's ratings,  they are centered on zero and scaled based on their variability.

In [35]:
scaler = MinMaxScaler()
umatrix_norm = pd.DataFrame(scaler.fit_transform(u_matrix_sample), columns=u_matrix_sample.columns, index=u_matrix_sample.index)

# The columns that contain only zero or have no rating are deleted, the gaps are filled with 0.
umatrix_norm.fillna(0, inplace=True)
umatrix_norm = umatrix_norm.T
umatrix_norm = umatrix_norm.loc[:, (umatrix_norm != 0).any(axis=0)]
umatrix_norm

user_id,76561198045470612,fbibradfbi,13lazer,Malleeable,Urotsuki,centor111,kukkai_kfz,76561198196614946,76561198089242036,JorazGamerGuy,...,SuperficialWounds,magneboar,GummyOS,76561198018919551,SirHolyCow,76561198044352021,76561198063510182,76561197991723203,dewsy123,sharks003
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.EXE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Астролорды: Оружие Пришельцев,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We are converting umatrix_norm to a sparse matrix format to reduce the memory used and improve efficiency in handling large data sets, especially when most of the values ​​in the matrix are zeros. Then we normalize the sparse matrix too.

In [36]:
um_sparse = sp.sparse.csr_matrix(umatrix_norm.values)
um_sparse

<7520x4006 sparse matrix of type '<class 'numpy.float64'>'
	with 291572 stored elements in Compressed Sparse Row format>

In [37]:
um_sparse_normalized = normalize(um_sparse)

Now, we aply the cosine similarity model to the transposed normalized sparse matriz

In [38]:
user_similarity = cosine_similarity(um_sparse_normalized.T)

Now, we are going to save the model matrix in a dataframe in order to use it in our finctuion

In [39]:
user_sim_df = pd.DataFrame(user_similarity, index = umatrix_norm.columns, columns = umatrix_norm.columns)

In [40]:
user_sim_df

user_id,76561198045470612,fbibradfbi,13lazer,Malleeable,Urotsuki,centor111,kukkai_kfz,76561198196614946,76561198089242036,JorazGamerGuy,...,SuperficialWounds,magneboar,GummyOS,76561198018919551,SirHolyCow,76561198044352021,76561198063510182,76561197991723203,dewsy123,sharks003
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561198045470612,1.000000,0.005468,0.130322,0.019033,0.019240,0.017556,0.010553,0.022601,0.012859,0.007458,...,0.017413,0.029070,0.019894,0.016845,0.012617,0.000000,0.016964,0.009983,0.022000,0.010854
fbibradfbi,0.005468,1.000000,0.065207,0.051712,0.010344,0.022287,0.014619,0.015302,0.017843,0.010094,...,0.009353,0.007897,0.007287,0.035769,0.005069,0.024040,0.008127,0.020978,0.026340,0.008264
13lazer,0.130322,0.065207,1.000000,0.071157,0.051099,0.045938,0.027084,0.038141,0.044164,0.044915,...,0.039637,0.031490,0.037525,0.072894,0.015229,0.067711,0.072655,0.043010,0.046656,0.035795
Malleeable,0.019033,0.051712,0.071157,1.000000,0.053455,0.026991,0.031216,0.048949,0.035277,0.047335,...,0.021147,0.073467,0.028432,0.063991,0.020831,0.047185,0.044040,0.012028,0.097193,0.028395
Urotsuki,0.019240,0.010344,0.051099,0.053455,1.000000,0.009439,0.009553,0.011379,0.014138,0.013174,...,0.020071,0.017250,0.012215,0.015567,0.011472,0.011957,0.024183,0.022769,0.031528,0.021824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76561198044352021,0.000000,0.024040,0.067711,0.047185,0.011957,0.019075,0.027489,0.046862,0.062616,0.090659,...,0.004776,0.051260,0.064102,0.049778,0.003248,1.000000,0.072654,0.022991,0.056630,0.125420
76561198063510182,0.016964,0.008127,0.072655,0.044040,0.024183,0.013428,0.029664,0.028361,0.012739,0.042321,...,0.015075,0.033023,0.030737,0.075760,0.025285,0.072654,1.000000,0.039055,0.042751,0.085116
76561197991723203,0.009983,0.020978,0.043010,0.012028,0.022769,0.013086,0.034247,0.009540,0.004554,0.041818,...,0.032559,0.004530,0.011049,0.089077,0.004064,0.022991,0.039055,1.000000,0.018637,0.023681
dewsy123,0.022000,0.026340,0.046656,0.097193,0.031528,0.041519,0.060382,0.039370,0.012853,0.050438,...,0.009115,0.097774,0.025168,0.080209,0.017558,0.056630,0.042751,0.018637,1.000000,0.010378


Create the user-item recomendation function

In [12]:
def similar_user_recs(user: str):
    '''
    Generates a list of the most recommended items for a user, based on ratings from similar users.

    Arguments:
        user (str): The name or identifier of the user for whom you want to generate recommendations.

    Returns:
        list: A list of the most recommended items for the user based on the rating of similar users.

    '''
    # Check if the user is present in the umatrix_norm columns (if not, return a message)
    if user not in umatrix_norm.columns:
        return('No data available on user {}'.format(user))
    
    # Get the users most similar to the given user
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    
    best = []  # List to store the items best rated by similar users
    most_common = {}  # Dictionary to count how many times each item is recommended
    
    # For each similar user, find the highest rated item and add it to the 'best' list
    for i in sim_users:
        max_score = umatrix_norm.loc[:, i].max()
        best.append(umatrix_norm[umatrix_norm.loc[:, i]==max_score].index.tolist())
           
    # Counts how many times each item is recommended
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    
    # Sort items by recommendation frequency in descending order
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    
    # Return 5 most recommend items
    return 'Users who are similar to {}:'.format(user), 'also liked it', sorted_list[:5]

In [43]:
similar_user_recs('Urotsuki')

('Users who are similar to Urotsuki:',
 'also liked it',
 [('Counter-Strike: Global Offensive', 10),
  ('Borderlands 2', 9),
  ('Left 4 Dead 2', 9),
  ('Tomb Raider', 9),
  ('Blood Omen 2: Legacy of Kain', 8)])

# Item recomendation

This model model will have an item-item relationship, that is, an item is taken, based on how similar that item is to the rest, similar ones are recommended. Here the input is a game and the output is a list of recommended games

In [15]:
df_mod_game = df_model[['item_id', 'item_name', 'genres']]
df_mod_game.drop_duplicates(inplace=True)
df_mod_game

Unnamed: 0,item_id,item_name,genres
0,282010,Carmageddon Max Pack,Action
94,282010,Carmageddon Max Pack,Indie
188,282010,Carmageddon Max Pack,Racing
282,70,Half-Life,Action
3609,1640,Disciples II: Gallean's Return,Strategy
...,...,...,...
4330374,30,Day of Defeat,Action
4331665,200980,Geneforge 2,Strategy
4331767,200980,Geneforge 2,RPG
4331869,200980,Geneforge 2,Indie


We create a new column 'genre_tag' with values 1. 

In [16]:
df_mod_game['genre_tag'] = 1 

In [17]:
df_mod_game

Unnamed: 0,item_id,item_name,genres,genre_tag
0,282010,Carmageddon Max Pack,Action,1
94,282010,Carmageddon Max Pack,Indie,1
188,282010,Carmageddon Max Pack,Racing,1
282,70,Half-Life,Action,1
3609,1640,Disciples II: Gallean's Return,Strategy,1
...,...,...,...,...
4330374,30,Day of Defeat,Action,1
4331665,200980,Geneforge 2,Strategy,1
4331767,200980,Geneforge 2,RPG,1
4331869,200980,Geneforge 2,Indie,1


We create an auxiliar dataframe 'df_id' in order to extract the id_item for our recomendation function

In [18]:
df_id = df_mod_game[['item_id', 'item_name']].drop_duplicates()

In [19]:
df_id

Unnamed: 0,item_id,item_name
0,282010,Carmageddon Max Pack
282,70,Half-Life
3609,1640,Disciples II: Gallean's Return
3689,1630,Disciples II: Rise of the Elves
3756,3800,Advent Rising
...,...,...
4329754,40990,Mafia
4329984,259340,Chicken Shoot Gold
4330374,30,Day of Defeat
4331665,200980,Geneforge 2


We are going to create a matrix that contains the 'item_names' as indexes, and 'genres' as columns and the 'genre_tag' as values.

In [20]:
g_matrix = df_mod_game.pivot_table(index='item_name', columns='genres', values='genre_tag', fill_value=0)
g_matrix


genres,Action,Action RPG,Action-Adventure,Adventure,Aliens,Animation & Modeling,Anime,Arcade,Arena Shooter,Asynchronous Multiplayer,...,Violent,Visual Novel,Voxel,Walking Simulator,War,Wargame,Web Publishing,Werewolves,Zombies,e-sports
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.EXE,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Астролорды: Оружие Пришельцев,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In order to save memory and, without loss of generality, we are going to extract an auxiliary matrix with 60% of the data, randomizing the original matrix

We going to convert g_matrix_sample to a sparse matrix format to reduce the memory used and improve efficiency in handling large data sets, especially when most of the values ​​in the matrix are zeros. 

In [21]:
game_sparse = sp.sparse.csr_matrix(g_matrix.values)

Aply the cosine similarity model to saprse matrix

In [22]:
game_similarity = cosine_similarity(game_sparse)

Save the model matrix in a new dataframe

In [23]:
game_sim_df = pd.DataFrame(game_similarity, index = g_matrix.index, columns = g_matrix.index)
game_sim_df

item_name,! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,$1 Ride,.EXE,001 Game Creator,0RBITALIS,1 Moment Of Time: Silentville,"1,000 Heads Among the Trees",10 Minute Tower,...,sZone-Online,samurai_jazz,simian.interface++,stratO,the static speaks my name,theBlu,theHunter: Primal,Астролорды: Оружие Пришельцев,侠客风云传(Tale of Wuxia),軒轅劍外傳穹之扉(The Gate of Firmament)
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,1.000000,0.500000,0.288675,0.866025,0.866025,0.0,0.353553,0.707107,0.500000,0.500000,...,0.250000,0.866025,0.707107,0.577350,0.500000,0.612372,0.577350,0.250000,0.447214,0.000000
"""Glow Ball"" - The billiard puzzle game",0.500000,1.000000,0.577350,0.577350,0.288675,0.0,0.353553,0.353553,0.500000,0.500000,...,0.000000,0.288675,0.707107,0.288675,0.250000,0.408248,0.000000,0.500000,0.447214,0.000000
#SelfieTennis,0.288675,0.577350,1.000000,0.333333,0.333333,0.0,0.408248,0.408248,0.577350,0.577350,...,0.000000,0.333333,0.408248,0.333333,0.288675,0.471405,0.000000,0.288675,0.000000,0.000000
$1 Ride,0.866025,0.577350,0.333333,1.000000,0.666667,0.0,0.408248,0.408248,0.577350,0.577350,...,0.288675,0.666667,0.816497,0.666667,0.288675,0.471405,0.333333,0.288675,0.258199,0.000000
.EXE,0.866025,0.288675,0.333333,0.666667,1.000000,0.0,0.408248,0.816497,0.577350,0.577350,...,0.288675,1.000000,0.408248,0.666667,0.577350,0.471405,0.666667,0.288675,0.258199,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.612372,0.408248,0.471405,0.471405,0.471405,0.0,0.577350,0.577350,0.408248,0.408248,...,0.000000,0.471405,0.577350,0.471405,0.408248,1.000000,0.471405,0.204124,0.547723,0.000000
theHunter: Primal,0.577350,0.000000,0.000000,0.333333,0.666667,0.0,0.408248,0.408248,0.000000,0.000000,...,0.288675,0.666667,0.000000,0.666667,0.288675,0.471405,1.000000,0.000000,0.516398,0.000000
Астролорды: Оружие Пришельцев,0.250000,0.500000,0.288675,0.288675,0.288675,0.0,0.353553,0.353553,0.500000,0.500000,...,0.500000,0.288675,0.353553,0.288675,0.500000,0.204124,0.000000,1.000000,0.223607,0.000000
侠客风云传(Tale of Wuxia),0.447214,0.447214,0.000000,0.258199,0.258199,0.0,0.316228,0.316228,0.000000,0.000000,...,0.223607,0.258199,0.316228,0.258199,0.447214,0.547723,0.516398,0.223607,1.000000,0.447214


Create the item-item recomendatoin function

In [24]:
def get_recommendations_by_id(item_id: int):
    '''
    Generates recommendations for a game given its ID.

    Parameters:
    - item_id (int): The ID of the game for which you want to obtain recommendations.

    Returns:
    - recommendations (list): A list of recommended game names for the given game.
    - message (str): A message indicating if the entered ID has no data available.
    '''

    # Get item name from Id
    game_name = df_id.loc[df_id['item_id'] == item_id, 'item_name'].iloc[0]

    # Check if item exists in the similarity matrix
    if game_name not in game_sim_df.index:
        return [], f"ID {item_id} has not data avalible."

    # Get the row corresponding to the item
    game_row = game_sim_df.loc[game_name]

    # Find similar items by sorting the row
    similar_games = game_sim_df.dot(game_row).sort_values(ascending=False)

    # Remove the item itself from the recommendation list
    similar_games = similar_games.drop(game_name)

    # Take the first 5 games as recommendations and return in list format
    recommendations = similar_games.head(5).index.tolist()

    return 'Recommend similar items to item {}'.format(item_id), recommendations


In [32]:
row = df_id[df_id['item_name'] == 'theHunter: Primal']
row


Unnamed: 0,item_id,item_name
2436367,322920,theHunter: Primal


In [33]:
get_recommendations_by_id(322920)

('Recommend similar items to item 322920',
 ['BiT Evolution',
  "The Cat! Porfirio's Adventure",
  'Overcast - Walden and the Werewolf',
  'Darkout',
  'Albedo: Eyes from Outer Space'])

Now, we save as parquet file the dataframes we need for our API functions

In [41]:
dfs = [umatrix_norm, user_sim_df, df_id, game_sim_df,]
# Nombres correspondientes a cada DataFrame
names = ['umatrix_norm','user_sim', 'df_id', 'game_sim']

ut.save_to_pq(dfs, names)

DataFrame 'umatrix_norm' save as 'data/parquet/umatrix_norm.parquet'
DataFrame 'user_sim' save as 'data/parquet/user_sim.parquet'
DataFrame 'df_id' save as 'data/parquet/df_id.parquet'
DataFrame 'game_sim' save as 'data/parquet/game_sim.parquet'
