In [40]:
# What follows is a step-by-step to get the reult:

# A recommendation system to get top 5 recommended games for a given user.
# Intermediate results are presented to see transformations taking place.
# At te end, the final response is shown.
# Whith all this working, we copy relevant results to the main.py file, so they can be consumend through the API.

# Although this may be a standalone procedure, when deploying, this recommendation system is going to call the other
# recommendation system. As we will si in just a few lines.

In [41]:
import pandas as pd

pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows',5)

In [42]:
genre_df = pd.read_csv('../light_data/playtimegenre.csv')
games_names_df = pd.read_csv('../light_data/item_id&name.csv')
games_played_df = pd.read_parquet('../light_data/user_id&playtime.parquet', engine='fastparquet')

In [43]:
games_played_df.sample(3)

Unnamed: 0_level_0,user_id,item_id,playtime_forever
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3721412,xinyo,57300,77
4682070,76561198066440534,239070,3651
110968,jichaelcoaching,246840,526


In [50]:
# Sort values within each 'user_id' group by 'playtime_forever'
# This dataframe contains the most played game by a given user, i assume this is what likes him the most.

most_played_game_by_user = games_played_df.sort_values(by='playtime_forever', ascending=False).groupby('user_id').head(1)
most_played_game_by_user.to_csv('../light_data/recommendfunc2.csv')
most_played_game_by_user

Unnamed: 0_level_0,user_id,item_id,playtime_forever
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
587715,wolop,4000,642773
2499068,Evilutional,212200,635295
...,...,...,...
5036595,zadow,291480,1
4775385,76561198071713972,231060,1


In [45]:
# This is the check for the correctness of the preceding sorting-grouping

games_played_df[games_played_df['user_id']=='Evilutional'].sort_values(by='playtime_forever', ascending=False)

Unnamed: 0_level_0,user_id,item_id,playtime_forever
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2499068,Evilutional,212200,635295
2499070,Evilutional,230410,68110
...,...,...,...
2499064,Evilutional,221640,4
2499101,Evilutional,304930,2


In [46]:
most_played_game_by_user[most_played_game_by_user['user_id']=='Evilutional']

Unnamed: 0_level_0,user_id,item_id,playtime_forever
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2499068,Evilutional,212200,635295


In [58]:
most_played_game_by_user[most_played_game_by_user['user_id']=='76561198068270286']

Unnamed: 0_level_0,user_id,item_id,playtime_forever
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
3805612,76561198068270286,209870,34961


In [63]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

def user_recommendation(user_id:str):

    data = pd.read_parquet("../light_data/recommendfunc1.parquet", engine="fastparquet")
    user_data = pd.read_csv("../light_data/recommendfunc2.csv")
    
    item_id = user_data.loc[user_data['user_id'] == user_id, 'item_id'].values[0]
       
    data['item_id'] = data['item_id'].astype(int)

    if data['item_id'].isin([item_id]).any():
        # Calculates the cosine similarity between the selected game (item_id) and
        # all other games in the genre_features DataFrame. The result is stored in the 'similarity' variable.
        selected_item = data[data['item_id'] == item_id][data.columns[2:]]
        features_columns = data[data.columns[2:]]
        similarity = cosine_similarity(selected_item[data.columns[2:]], features_columns)

        # Get the indices of the top 6 similar items (we include the first one as well)
        similar_items_indices = np.argsort(similarity[0])[::-1][0:6]
        
        # Extract item_ids of the top 6 similar items
        top_n_similar_items = data.loc[similar_items_indices, 'item_id'].tolist()
        
        return top_n_similar_items
    
    return {f'No item_id like {item_id}'}

# Example usage to get the top 6 similar items for item_id 208800
top_6_similar_items = user_recommendation('76561198068270286')
print(top_6_similar_items)

este es el item que vuelve del df 209870
[216445, 266490, 316700, 298950, 216430, 492890]


In [64]:
# Create a Categorical data type with the desired order. This data type is used to represent categorical data with a specified order.

order = pd.CategoricalDtype(top_6_similar_items, ordered=True)

# converting the 'item_id' column in the DataFrame (games_names_df) to the Categorical data type created.
# This step is crucial for ensuring that subsequent operations take into account the desired order of the categories.

games_names_df['item_id'] = games_names_df['item_id'].astype(order)

# Filter the DataFrame based on the 'item_id' values in top_6_similar_items

response = games_names_df[games_names_df['item_id'].isin(top_6_similar_items)]

# Sort sub_df based on the order of 'item_id'

response = response.sort_values(by='item_id')

# Reset the index if needed

response = response.reset_index(drop=True)
response['app_name&title']

0           Gotham City Impostors Free to Play: Pirate Costume
1                       Lili: Child of Geos - Complete Edition
                               ...                            
4    Gotham City Impostors Free to Play: Weapon Pack - Starter
5              Crusaders of the Lost Idols - Epic Starter Pack
Name: app_name&title, Length: 6, dtype: object