In [3]:
# What follows is a step-by-step to get the reult:

# A recommendation system to get top 5 similar games for a given one.
# Intermediate results are presented to see transformations taking place.
# At te end, the final response is shown.
# Whith all this working, we copy relevant results to the main.py file, so they can be consumend through the API.

In [2]:
import pandas as pd

pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows',7)

In [6]:
genre_df = pd.read_csv('../light_data/playtimegenre.csv')
games_names_df = pd.read_csv('../light_data/item_id&name.csv')

In [7]:
games_names_df.sample(1)

Unnamed: 0,item_id,app_name&title
31095,49476,Magic 2012 Deck Pack 3


In [8]:
genre_df.sample(1)

Unnamed: 0,item_id,release_date_imputed,tags&genres
2111,296210,2006-10-17,"['Strategy', 'Simulation', 'Tactical']"


In [9]:
genre_df[genre_df['tags&genres'].isna()]

Unnamed: 0,item_id,release_date_imputed,tags&genres
278,31990,2009-07-21,
297,35050,2009-08-19,
308,39392,2009-09-23,
...,...,...,...
31635,36270,2009-07-17,
31648,34910,2009-07-20,
31804,11920,2008-04-18,


In [10]:
genre_df.dropna(subset=['tags&genres'], inplace=True)


In [11]:
genre_df[genre_df['tags&genres'].isna()]

Unnamed: 0,item_id,release_date_imputed,tags&genres


In [12]:
# eval is a built-in that evaluates a string as a Python expression.
# Used to convert strings that represent Python objects of lists into the actual Python objects.

genre_df['tags&genres'] = genre_df['tags&genres'].astype(str).apply(eval)

In [13]:
genre_df.sample(1)

Unnamed: 0,item_id,release_date_imputed,tags&genres
10144,592812,2017-06-19,"[Adventure, RPG, Indie]"


In [14]:
recommendation_df = genre_df[['item_id', 'tags&genres']]

In [15]:
recommendation_df.sample(1)

Unnamed: 0,item_id,tags&genres
24302,424280,"[Free to Play, Action, Fighting, Indie, Funny, Gore, 2D, 2D Fighter, Survival, Cartoon, Fast-Pac..."


In [16]:
# Flatten the lists in the 'tags&genres' column

df_expanded = recommendation_df['tags&genres'].explode().reset_index(drop=True)

# Create dummy variables for the 'tags&genres' column

dummies = pd.get_dummies(df_expanded)

# Merge the dummy variables with the original

recommendation_df_large = pd.merge(recommendation_df, dummies, left_index=True, right_index=True)


In [19]:
recommendation_df_large.sample(1)

Unnamed: 0,item_id,tags&genres,1980s,1990's,2.5D,2D,2D Fighter,360 Video,3D Platformer,3D Vision,...,Warhammer 40K,Web Publishing,Werewolves,Western,Word Game,World War I,World War II,Wrestling,Zombies,e-sports
3844,336900,"[Action, Adventure, Indie]",False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [18]:
dummies.columns

Index(['1980s', '1990's', '2.5D', '2D', '2D Fighter', '360 Video',
       '3D Platformer', '3D Vision', '4 Player Local', '4X',
       ...
       'Warhammer 40K', 'Web Publishing', 'Werewolves', 'Western', 'Word Game',
       'World War I', 'World War II', 'Wrestling', 'Zombies', 'e-sports'],
      dtype='object', length=339)

In [23]:
recommendation_df_large.to_parquet('../light_data/recommendfunc1.parquet', engine='fastparquet')

In [16]:
recommendation_df_large.sample(1)

Unnamed: 0,item_id,tags&genres,1980s,1990's,2.5D,2D,2D Fighter,360 Video,3D Platformer,3D Vision,...,Warhammer 40K,Web Publishing,Werewolves,Western,Word Game,World War I,World War II,Wrestling,Zombies,e-sports
19900,555250,[Casual],False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [29]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pandas as pd

def game_recommendation(item_id):

    data = pd.read_parquet("../light_data/recommendfunc1.parquet", engine="fastparquet")

    data['item_id'] = data['item_id'].astype(int)

    if recommendation_df_large['item_id'].isin([item_id]).any():
        # Calculates the cosine similarity between the selected game (item_id) and
        # all other games in the genre_features DataFrame. The result is stored in the 'similarity' variable.
        selected_item = recommendation_df_large[recommendation_df_large['item_id'] == item_id][recommendation_df_large.columns[2:]]
        features_columns = recommendation_df_large[recommendation_df_large.columns[2:]]
        similarity = cosine_similarity(selected_item[recommendation_df_large.columns[2:]], features_columns)

        # Get the indices of the top 6 similar items (we include the first one as well)
        similar_items_indices = np.argsort(similarity[0])[::-1][0:6]
        
        # Extract item_ids of the top 6 similar items
        top_n_similar_items = recommendation_df_large.loc[similar_items_indices, 'item_id'].tolist()

        games_similar = response.iloc[0]

        
        return top_n_similar_items
    
    return {f'No item_id like {item_id}'}

# Example usage to get the top 6 similar items for item_id 208800
top_6_similar_items = game_recommendation(754120)
print(top_6_similar_items)

[333390, 505040, 224763, 590185, 234160, 501440]


In [38]:
# Create a Categorical data type with the desired order. This data type is used to represent categorical data with a specified order.

order = pd.CategoricalDtype(top_6_similar_items, ordered=True)

# converting the 'item_id' column in the DataFrame (games_names_df) to the Categorical data type created.
# This step is crucial for ensuring that subsequent operations take into account the desired order of the categories.

games_names_df['item_id'] = games_names_df['item_id'].astype(order)

# Filter the DataFrame based on the 'item_id' values in top_6_similar_items

response = games_names_df[games_names_df['item_id'].isin(top_6_similar_items)]

# Sort sub_df based on the order of 'item_id'

response = response.sort_values(by='item_id')

# Reset the index if needed

response = response.reset_index(drop=True)
response['app_name&title']

0                             Tales of Aravorn: Seasons Of The Wolf
1                                                           FORTIFY
2                                           FEZ Original Soundtrack
3    Rocksmith® 2014 Edition – Remastered – Pearl Jam - “Even Flow”
4                                              Strike Suit Infinity
5                                                             Stars
Name: app_name&title, dtype: object

In [39]:

response.iloc[0]['app_name&title']

'Tales of Aravorn: Seasons Of The Wolf'

In [40]:
response.iloc[1:]['app_name&title']

1                                                           FORTIFY
2                                           FEZ Original Soundtrack
3    Rocksmith® 2014 Edition – Remastered – Pearl Jam - “Even Flow”
4                                              Strike Suit Infinity
5                                                             Stars
Name: app_name&title, dtype: object