In [1]:
# Importing Libraries
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.pipeline import Pipeline
import joblib


In [2]:
# Importing games dataset
df_games = pd.read_parquet('datasets/processed/games.parquet')
df_games = df_games.drop(['url', 'reviews_url'], axis=1)
df_games.head(3)

Unnamed: 0,item_id,title,genre,tags,developer,release_year
0,761140,Lost Summoner Kitty,Indie,"['Action', 'Casual', 'Indie', 'Simulation', 'S...",Kotoshiro,2018
1,643980,Ironbound,Indie,"['Free to Play', 'Indie', 'RPG', 'Strategy']",Secret Level SRL,2018
2,670290,Real Pool 3D - Poolians,Indie,"['Casual', 'Free to Play', 'Indie', 'Simulatio...",Poolians.com,2017


In [4]:
df_games = df_games.rename(columns={'release_year': 'year'})

In [5]:
# Optimize data types for memory efficiency 
df_games[['item_id','year']] = df_games[['item_id','year']].astype(int)

In [3]:
# Exporting API dataset
df_games.to_parquet("API/datasets_API/df_games_model.parquet")

In [5]:
# Creating a TF-IDF vectorizer for the 'developer' column
developer_vectorizer = TfidfVectorizer(stop_words='english')

# Creating a TF-IDF vectorizer for the 'genre' column
genre_vectorizer = TfidfVectorizer(stop_words='english')

# Creating a TF-IDF vectorizer for the 'tags' column
tags_vectorizer = TfidfVectorizer(stop_words='english')

# Creating a transformation for the 'release_year' column (normalization)
year_transformer = 'passthrough'  

# Creating the column transformer
preprocessor = ColumnTransformer(
    transformers=[
        ('developer', developer_vectorizer, 'developer'),
        ('genre', genre_vectorizer, 'genre'),
        ('tags', tags_vectorizer, 'tags'),
        ('year', year_transformer, ['year'])
    ],
    remainder='drop'
)


In [6]:
# Creating KNN model
knn_model = NearestNeighbors(n_neighbors=6, algorithm='brute', metric='cosine')

# Creating the preprocessing and KNN model pipeline
pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('knn_model', knn_model)
])

# Fitting pipeline to the DataFrame
pipeline.fit(df_games)

In [8]:
# Saving model 
joblib.dump(pipeline, 'API/recommendation_model.joblib')

['API/recommendation_model.joblib']

In [9]:
# Function to get recommendations
def game_recommendations(item_id):
    '''
    This function helps users discover games that are similar to a given game (specified by item_id)
    by employing a K-nearest neighbors approach after transforming the input data.

    Args:
        item_id: from the game of wich we want to get recommendations based on (6 digits int).

    Returns: 
        The generated dictionary of recommendations.
    '''
    # Ensure proper data types
    if not isinstance(item_id, int) or not (000000 <= item_id <= 999999):
        raise TypeError(f"Expected 'item_id' to be an 6-digit integer, got {type(item_id)}.")

    # Check if item_id is in the data to be analyzed
    if item_id not in df_games['item_id'].values:
        return {'error': 'Item_id not found in the data to be analyzed.'}

    # Find the index of the game in the DataFrame based on the item_id
    game_info = df_games[df_games['item_id'] == item_id]

    # Transform the game information with the preprocessor
    transformed_input = pipeline.named_steps['preprocessor'].transform(game_info)

    # Find the nearest games using KNN
    distances, indices = pipeline.named_steps['knn_model'].kneighbors(transformed_input)

    # Filter the current game
    distances = distances.flatten()[1:]
    indices = indices.flatten()[1:]

    # Get recommended titles
    recommended_titles = df_games.loc[indices, 'title'].tolist()

    # Build and return dictionary
    return [{'{}'.format(i + 1): title} for i, title in enumerate(recommended_titles)]


In [10]:
# Aplicattion example
game_recommendations(643980)

[{'1': 'Brief Karate Foolish'},
 {'2': 'Nightside Demo'},
 {'3': "Defender's Quest: Valley of the Forgotten (DX edition)"},
 {'4': 'Labyrinth - Starter Pack'},
 {'5': 'MINDNIGHT'}]