In [1]:
import pandas as pd
import numpy as np
import scipy as sp
%load_ext autoreload
%autoreload 2
import utils as ut
import operator
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity


In [25]:
df_model = pd.read_csv('data/csv/model.csv', encoding='utf-8')

# User Recomendation

In this model we are going to apply the user-item filter, that is, take a user, find similar users and recommend items that those similar users liked. In this case the input is a user and the output is a list of games that are recommended to that user

In [26]:
df_model

Unnamed: 0,item_id,item_name,genres,user_id,rating
0,282010,Carmageddon Max Pack,Action,UTNerd24,1
1,282010,Carmageddon Max Pack,Action,I_DID_911_JUST_SAYING,1
2,282010,Carmageddon Max Pack,Action,saint556,1
3,282010,Carmageddon Max Pack,Action,chidvd,5
4,282010,Carmageddon Max Pack,Action,aerpub,1
...,...,...,...,...,...
4335479,80,Counter-Strike: Condition Zero,Action,lachwe,5
4335480,80,Counter-Strike: Condition Zero,Action,danebuchanan,5
4335481,80,Counter-Strike: Condition Zero,Action,K1NGCJS,1
4335482,80,Counter-Strike: Condition Zero,Action,celebrexISGO,5


In [27]:
df_mod_user = df_model[['user_id', 'item_name', 'rating']]
df_mod_user.drop_duplicates(inplace=True)
df_mod_user

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mod_user.drop_duplicates(inplace=True)


Unnamed: 0,user_id,item_name,rating
0,UTNerd24,Carmageddon Max Pack,1
1,I_DID_911_JUST_SAYING,Carmageddon Max Pack,1
2,saint556,Carmageddon Max Pack,1
3,chidvd,Carmageddon Max Pack,5
4,aerpub,Carmageddon Max Pack,1
...,...,...,...
4335479,lachwe,Counter-Strike: Condition Zero,5
4335480,danebuchanan,Counter-Strike: Condition Zero,5
4335481,K1NGCJS,Counter-Strike: Condition Zero,1
4335482,celebrexISGO,Counter-Strike: Condition Zero,5


We are going to create a matrix that contains the 'user_id' as indexes, the items names as columns and the 'rating' as values.

In [28]:
u_matrix = df_mod_user.pivot_table(index=['user_id'], columns=['item_name'], values='rating')
u_matrix

item_name,! That Bastard Is Trying To Steal Our Gold !,"""Glow Ball"" - The billiard puzzle game",#SelfieTennis,$1 Ride,.EXE,001 Game Creator,0RBITALIS,1 Moment Of Time: Silentville,"1,000 Heads Among the Trees",10 Minute Tower,...,sZone-Online,samurai_jazz,simian.interface++,stratO,the static speaks my name,theBlu,theHunter: Primal,Астролорды: Оружие Пришельцев,侠客风云传(Tale of Wuxia),軒轅劍外傳穹之扉(The Gate of Firmament)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
--000--,,,,,,,,,,,...,3.0,,,,,,,,,
--ace--,,,,,,,,,,,...,,,,,,,,,,
--ionex--,,,,,,,,,,,...,,,,,,,,,,
-2SV-vuLB-Kg,,,,,,,,,,,...,,,,,,,,,,
-Beave-,,,,,,,,,,,...,3.0,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zwanzigdrei,,,,,,,,,,,...,,,,,,,,,,
zy0705,,,,,,,,,,,...,,,,,,,,,,
zynxgameth,,,,,,,,,,,...,,,,,,,,,,
zyr0n1c,,,,,,,,,,,...,,,,,,,,,,


In [29]:
random_rows = 3000
u_matrix_sample = u_matrix.sample(n=random_rows, random_state=42)

We are going to normalize the dataframe values.
Normalization adjusts a user's ratings,  they are centered on zero and scaled based on their variability.

In [32]:
scaler = MinMaxScaler()
umatrix_norm = pd.DataFrame(scaler.fit_transform(u_matrix_sample), columns=u_matrix_sample.columns, index=u_matrix_sample.index)

# The columns that contain only zero or have no rating are deleted, the gaps are filled with 0.
umatrix_norm.fillna(0, inplace=True)
umatrix_norm = umatrix_norm.T
umatrix_norm = umatrix_norm.loc[:, (umatrix_norm != 0).any(axis=0)]
umatrix_norm

  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)


user_id,76561198045470612,fbibradfbi,13lazer,Malleeable,Urotsuki,centor111,kukkai_kfz,76561198196614946,76561198089242036,JorazGamerGuy,...,eduardo281,esem88,76561198101694792,76561198069996401,TorTlew,DavidDoDecree,TheShadowCobra,Nirvash-TypeZERO,taiyoyoyo,babydavestation
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.EXE,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Астролорды: Оружие Пришельцев,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We are converting umatrix_norm to a sparse matrix format to reduce the memory used and improve efficiency in handling large data sets, especially when most of the values ​​in the matrix are zeros. Then we normalize the sparse matrix too.

In [33]:
um_sparse = sp.sparse.csr_matrix(umatrix_norm.values)
um_sparse

<7520x2397 sparse matrix of type '<class 'numpy.float64'>'
	with 177838 stored elements in Compressed Sparse Row format>

In [34]:
um_sparse_normalized = normalize(um_sparse)

Now, we aply the cosine similarity model to the transposed normalized sparse matriz

In [35]:
user_similarity = cosine_similarity(um_sparse_normalized.T)

Now, we are going to save the model matrix in a dataframe in order to use it in our finctuion

In [36]:
user_sim_df = pd.DataFrame(user_similarity, index = umatrix_norm.columns, columns = umatrix_norm.columns)

In [37]:
user_sim_df

user_id,76561198045470612,fbibradfbi,13lazer,Malleeable,Urotsuki,centor111,kukkai_kfz,76561198196614946,76561198089242036,JorazGamerGuy,...,eduardo281,esem88,76561198101694792,76561198069996401,TorTlew,DavidDoDecree,TheShadowCobra,Nirvash-TypeZERO,taiyoyoyo,babydavestation
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
76561198045470612,1.000000,0.005644,0.136337,0.020317,0.022454,0.018603,0.010705,0.025429,0.014677,0.008524,...,0.010559,0.016090,0.001085,0.007455,0.038964,0.003444,0.002970,0.003688,0.022145,0.011504
fbibradfbi,0.005644,1.000000,0.062462,0.050703,0.011542,0.024492,0.013513,0.014905,0.018012,0.010488,...,0.001242,0.020002,0.000706,0.011091,0.031449,0.004527,0.024661,0.001879,0.010455,0.021449
13lazer,0.136337,0.062462,1.000000,0.068958,0.056818,0.047215,0.025471,0.038629,0.046681,0.048041,...,0.005118,0.038560,0.001211,0.013591,0.074787,0.042052,0.066195,0.009462,0.039809,0.049929
Malleeable,0.020317,0.050703,0.068958,1.000000,0.058823,0.026522,0.028838,0.049299,0.035785,0.049208,...,0.048433,0.079510,0.007681,0.028860,0.106406,0.028958,0.026964,0.007179,0.080038,0.031080
Urotsuki,0.022454,0.011542,0.056818,0.058823,1.000000,0.010578,0.010174,0.012954,0.016661,0.015783,...,0.007859,0.027364,0.003938,0.013139,0.018429,0.010307,0.005235,0.004506,0.015832,0.014016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
DavidDoDecree,0.003444,0.004527,0.042052,0.028958,0.010307,0.022199,0.009630,0.008138,0.014086,0.022210,...,0.002640,0.010283,0.002075,0.001139,0.009812,1.000000,0.025240,0.001399,0.009633,0.004569
TheShadowCobra,0.002970,0.024661,0.066195,0.026964,0.005235,0.018360,0.002313,0.005973,0.016388,0.008809,...,0.013717,0.012065,0.002676,0.008761,0.005243,0.025240,1.000000,0.004454,0.018545,0.006279
Nirvash-TypeZERO,0.003688,0.001879,0.009462,0.007179,0.004506,0.021292,0.004312,0.006711,0.005471,0.009628,...,0.010880,0.020590,0.002918,0.004727,0.009774,0.001399,0.004454,1.000000,0.033810,0.007055
taiyoyoyo,0.022145,0.010455,0.039809,0.080038,0.015832,0.038518,0.029072,0.183496,0.026142,0.039749,...,0.079036,0.050815,0.049554,0.010336,0.055390,0.009633,0.018545,0.033810,1.000000,0.010962


Create the user-item recomendation function

In [38]:
def similar_user_recs(user: str):
    '''
    Generates a list of the most recommended items for a user, based on ratings from similar users.

    Arguments:
        user (str): The name or identifier of the user for whom you want to generate recommendations.

    Returns:
        list: A list of the most recommended items for the user based on the rating of similar users.

    '''
    # Check if the user is present in the umatrix_norm columns (if not, return a message)
    if user not in umatrix_norm.columns:
        return('No data available on user {}'.format(user))
    
    # Get the users most similar to the given user
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    
    best = []  # List to store the items best rated by similar users
    most_common = {}  # Dictionary to count how many times each item is recommended
    
    # For each similar user, find the highest rated item and add it to the 'best' list
    for i in sim_users:
        max_score = umatrix_norm.loc[:, i].max()
        best.append(umatrix_norm[umatrix_norm.loc[:, i]==max_score].index.tolist())
           
    # Counts how many times each item is recommended
    for i in range(len(best)):
        for j in best[i]:
            if j in most_common:
                most_common[j] += 1
            else:
                most_common[j] = 1
    
    # Sort items by recommendation frequency in descending order
    sorted_list = sorted(most_common.items(), key=operator.itemgetter(1), reverse=True)
    
    # Return 5 most recommend items
    return 'Users who are similar to {}:'.format(user), 'also liked it', sorted_list[:5]

In [40]:
similar_user_recs('fbibradfbi')

('Users who are similar to fbibradfbi:',
 'also liked it',
 [("Garry's Mod", 10),
  ('Left 4 Dead 2', 10),
  ('Counter-Strike: Global Offensive', 9),
  ('PAYDAY 2', 8),
  ('Terraria', 8)])

# Item recomendation

This model model will have an item-item relationship, that is, an item is taken, based on how similar that item is to the rest, similar ones are recommended. Here the input is a game and the output is a list of recommended games

In [41]:
df_mod_game = df_model[['item_id', 'item_name', 'genres']]
df_mod_game.drop_duplicates(inplace=True)
df_mod_game

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mod_game.drop_duplicates(inplace=True)


Unnamed: 0,item_id,item_name,genres
0,282010,Carmageddon Max Pack,Action
94,282010,Carmageddon Max Pack,Indie
188,282010,Carmageddon Max Pack,Racing
282,70,Half-Life,Action
3609,1640,Disciples II: Gallean's Return,Strategy
...,...,...,...
4330374,30,Day of Defeat,Action
4331665,200980,Geneforge 2,Strategy
4331767,200980,Geneforge 2,RPG
4331869,200980,Geneforge 2,Indie


We create a new column 'genre_tag' with values 1. 

In [42]:
df_mod_game['genre_tag'] = 1 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_mod_game['genre_tag'] = 1


In [43]:
df_mod_game

Unnamed: 0,item_id,item_name,genres,genre_tag
0,282010,Carmageddon Max Pack,Action,1
94,282010,Carmageddon Max Pack,Indie,1
188,282010,Carmageddon Max Pack,Racing,1
282,70,Half-Life,Action,1
3609,1640,Disciples II: Gallean's Return,Strategy,1
...,...,...,...,...
4330374,30,Day of Defeat,Action,1
4331665,200980,Geneforge 2,Strategy,1
4331767,200980,Geneforge 2,RPG,1
4331869,200980,Geneforge 2,Indie,1


We create an auxiliar dataframe 'df_id' in order to extract the id_item for our recomendation function

In [44]:
df_id = df_mod_game[['item_id', 'item_name']].drop_duplicates()

In [45]:
df_id

Unnamed: 0,item_id,item_name
0,282010,Carmageddon Max Pack
282,70,Half-Life
3609,1640,Disciples II: Gallean's Return
3689,1630,Disciples II: Rise of the Elves
3756,3800,Advent Rising
...,...,...
4329754,40990,Mafia
4329984,259340,Chicken Shoot Gold
4330374,30,Day of Defeat
4331665,200980,Geneforge 2


We are going to create a matrix that contains the 'item_names' as indexes, and 'genres' as columns and the 'genre_tag' as values.

In [46]:
g_matrix = df_mod_game.pivot_table(index='item_name', columns='genres', values='genre_tag', fill_value=0)
g_matrix


genres,Action,Action RPG,Action-Adventure,Adventure,Aliens,Animation & Modeling,Anime,Arcade,Arena Shooter,Asynchronous Multiplayer,...,Violent,Visual Novel,Voxel,Walking Simulator,War,Wargame,Web Publishing,Werewolves,Zombies,e-sports
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
! That Bastard Is Trying To Steal Our Gold !,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"""Glow Ball"" - The billiard puzzle game",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
#SelfieTennis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$1 Ride,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
.EXE,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
theBlu,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
theHunter: Primal,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Астролорды: Оружие Пришельцев,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
侠客风云传(Tale of Wuxia),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In order to save memory and, without loss of generality, we are going to extract an auxiliary matrix with 60% of the data, randomizing the original matrix

In [47]:
random_rows = 3000
g_matrix_sample = g_matrix.sample(n=random_rows, random_state=42)

In [48]:
g_matrix_sample

genres,Action,Action RPG,Action-Adventure,Adventure,Aliens,Animation & Modeling,Anime,Arcade,Arena Shooter,Asynchronous Multiplayer,...,Violent,Visual Novel,Voxel,Walking Simulator,War,Wargame,Web Publishing,Werewolves,Zombies,e-sports
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Farming Giant,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Maria the Witch,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
March of the Eagles,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Rise of the Ancients,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bathory - The Bloody Countess,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zeno Clash 2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Numba Deluxe,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Panzer Warfare,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Void Destroyer,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


We going to convert g_matrix_sample to a sparse matrix format to reduce the memory used and improve efficiency in handling large data sets, especially when most of the values ​​in the matrix are zeros. 

In [49]:
game_sparse = sp.sparse.csr_matrix(g_matrix_sample.values)

Aply the cosine similarity model to saprse matrix

In [50]:
game_similarity = cosine_similarity(game_sparse)

Save the model matrix in a new dataframe

In [51]:
game_sim_df = pd.DataFrame(game_similarity, index = g_matrix_sample.index, columns = g_matrix_sample.index)
game_sim_df

item_name,Farming Giant,Maria the Witch,March of the Eagles,Rise of the Ancients,Bathory - The Bloody Countess,A grande bagunça espacial - The big space mess,Curse of the Assassin,Trigger Runners,Else Heart.Break(),Selknam Defense,...,Constant C,Jet Gunner,Cypress Inheritance: The Beginning,Mad Combat Marines,Kingdom Tales 2,Zeno Clash 2,Numba Deluxe,Panzer Warfare,Void Destroyer,Command: Northern Inferno
item_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Farming Giant,1.000000,0.000000,0.707107,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.377964,0.447214,0.707107
Maria the Witch,0.000000,1.000000,0.000000,0.353553,0.866025,0.866025,0.577350,0.750000,0.577350,0.577350,...,0.866025,0.866025,0.750000,0.866025,0.500000,0.707107,0.500000,0.566947,0.670820,0.000000
March of the Eagles,0.707107,0.000000,1.000000,0.500000,0.000000,0.000000,0.000000,0.000000,0.000000,0.408248,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.534522,0.632456,1.000000
Rise of the Ancients,0.000000,0.353553,0.500000,1.000000,0.408248,0.408248,0.408248,0.353553,0.408248,0.816497,...,0.408248,0.408248,0.353553,0.408248,0.000000,0.500000,0.000000,0.534522,0.632456,0.500000
Bathory - The Bloody Countess,0.000000,0.866025,0.000000,0.408248,1.000000,0.666667,0.666667,0.577350,0.666667,0.333333,...,0.666667,0.666667,0.577350,0.666667,0.577350,0.408248,0.577350,0.436436,0.516398,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Zeno Clash 2,0.000000,0.707107,0.000000,0.500000,0.408248,0.816497,0.408248,0.707107,0.408248,0.816497,...,0.816497,0.816497,0.707107,0.816497,0.000000,1.000000,0.000000,0.534522,0.632456,0.000000
Numba Deluxe,0.000000,0.500000,0.000000,0.000000,0.577350,0.577350,0.000000,0.000000,0.000000,0.000000,...,0.577350,0.000000,0.000000,0.577350,1.000000,0.000000,1.000000,0.377964,0.000000,0.000000
Panzer Warfare,0.377964,0.566947,0.534522,0.534522,0.436436,0.654654,0.218218,0.377964,0.218218,0.654654,...,0.654654,0.436436,0.377964,0.654654,0.377964,0.534522,0.377964,1.000000,0.676123,0.534522
Void Destroyer,0.447214,0.670820,0.632456,0.632456,0.516398,0.516398,0.516398,0.670820,0.516398,0.774597,...,0.516398,0.774597,0.670820,0.516398,0.000000,0.632456,0.000000,0.676123,1.000000,0.632456


Create the item-item recomendatoin function

In [52]:
def get_recommendations_by_id(item_id: int):
    '''
    Generates recommendations for a game given its ID.

    Parameters:
    - item_id (int): The ID of the game for which you want to obtain recommendations.

    Returns:
    - recommendations (list): A list of recommended game names for the given game.
    - message (str): A message indicating if the entered ID has no data available.
    '''

    # Get item name from Id
    game_name = df_id.loc[df_id['item_id'] == item_id, 'item_name'].iloc[0]

    # Check if item exists in the similarity matrix
    if game_name not in game_sim_df.index:
        return [], f"ID {item_id} has not data avalible."

    # Get the row corresponding to the item
    game_row = game_sim_df.loc[game_name]

    # Find similar items by sorting the row
    similar_games = game_sim_df.dot(game_row).sort_values(ascending=False)

    # Remove the item itself from the recommendation list
    similar_games = similar_games.drop(game_name)

    # Take the first 5 games as recommendations and return in list format
    recommendations = similar_games.head(5).index.tolist()

    return 'Recommend similar items to item {}'.format(item_id), recommendations


In [55]:
row = df_id[df_id['item_name'] == 'Void Destroyer']
row


Unnamed: 0,item_id,item_name
2558850,259660,Void Destroyer


In [56]:
get_recommendations_by_id(259660)

('Recommend similar items to item 259660',
 ["Where's My Helmet?",
  'Hydrophobia: Prophecy',
  'A Valley Without Wind 2',
  'Abyss Raiders: Uncharted',
  'Nova-111'])

Now, we save as parquet file the dataframes we need for our API functions

In [57]:
dfs = [umatrix_norm, user_sim_df, df_id, game_sim_df,]
# Nombres correspondientes a cada DataFrame
names = ['umatrix_norm','user_sim', 'df_id', 'game_sim']

ut.save_to_pq(dfs, names)

DataFrame 'umatrix_norm' save as 'data/umatrix_norm.parquet'
DataFrame 'user_sim' save as 'data/user_sim.parquet'
DataFrame 'df_id' save as 'data/df_id.parquet'
DataFrame 'game_sim' save as 'data/game_sim.parquet'
