In [1]:
# importing sys
import sys
# adding Folder_2 to the system path
sys.path.insert(0, "../ml/")

from module import *
import pandas as pd
import numpy as np
import random
import warnings
warnings.filterwarnings("ignore")
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import requests
import joblib

API Key loaded successfully!


In [2]:
df = pd.read_csv("../ml/15759_games_clean_formatted_CLUSTERED.csv")
df.drop(["Unnamed: 0"], axis=1, inplace=True)
df["name"] = df["name"].apply(lambda x: x.lower())
df["released"] = pd.to_datetime(df['released'])
df.head()

Unnamed: 0,name,released,rating,rating_top,ratings_count,playtime,suggestions_count,reviews_count,platform_name_0,platform_name_1,genre_0,genre_1,tags_extracted,exceptional_,recommended_,meh_,skip_,cluster
0,grand theft auto v,2013-09-17,4.47,5.0,6777.0,74.0,431.0,6878.0,PC,PlayStation 5,Action,undefined,"['Singleplayer', 'Multiplayer']",58.99,32.83,6.35,1.83,0
1,the witcher 3: wild hunt,2015-05-18,4.65,5.0,6489.0,45.0,675.0,6595.0,Xbox Series S/X,PlayStation 5,Action,RPG,['Singleplayer'],77.15,16.12,4.2,2.53,0
2,portal 2,2011-04-18,4.61,5.0,5615.0,11.0,550.0,5668.0,PlayStation 3,PC,Shooter,Puzzle,"['Singleplayer', 'Multiplayer']",70.15,24.95,2.7,2.21,0
3,counter-strike: global offensive,2012-08-21,3.57,4.0,3443.0,65.0,587.0,3478.0,PC,Linux,Shooter,undefined,['Multiplayer'],16.1,46.81,25.91,11.18,0
4,tomb raider (2013),2013-03-05,4.05,4.0,3863.0,10.0,646.0,3892.0,PlayStation 4,macOS,Action,undefined,"['Singleplayer', 'Multiplayer']",25.49,60.53,10.92,3.06,0


In [3]:
df.dtypes

name                         object
released             datetime64[ns]
rating                      float64
rating_top                  float64
ratings_count               float64
playtime                    float64
suggestions_count           float64
reviews_count               float64
platform_name_0              object
platform_name_1              object
genre_0                      object
genre_1                      object
tags_extracted               object
exceptional_                float64
recommended_                float64
meh_                        float64
skip_                       float64
cluster                       int64
dtype: object

In [4]:
# load the model from disk
kmeans = joblib.load("../ml/15759_games_kmean7.sav")
cluster_centers = kmeans.cluster_centers_

In [39]:
def format_clean_game(games):
    """Prepares game data by formatting and cleaning.

    Takes a games DataFrame and transforms it into a clean DataFrame ready for analysis and clustering. 
    Leverages the 'clean_format_and_export' function
    for the core transformation process.

    Args:
        games (pandas.DataFrame): The raw game data.

    Returns:
        pandas.DataFrame: A cleaned and formatted DataFrame containing the processed game data.
    """
    df = pd.DataFrame(games)
    df = clean_format_and_export(df)
    
    return df
    
def get_cluster_for_game(games, df):
    """Assigns a game to a cluster and suggests a similar game based on clustering analysis.

    Processes game data, finds the closest cluster based on numerical features, and 
    recommends a random game from the same cluster. Assumes the provided DataFrame 
    ('df') contains pre-calculated cluster assignments and cluster centers.

    Args:
        games (pandas.DataFrame):  Game data to be processed and assigned a cluster.
        df (pandas.DataFrame): A DataFrame containing minimum: 
            * 'cluster': Column indicating cluster assignments for existing games.
            * 'platform_name_0': Column indicating a game's primary platform.
            * 'name': Column containing game names.
            * Numerical feature columns used for clustering (assumed).

    Prints:
        * Platform information for the input game.
        * The assigned cluster number.
        * A recommendation for a similar game (name and platform) from the same cluster.
    """
    processed_game = format_clean_game(games)
    numericals = processed_game.copy().select_dtypes(np.number)
    # Scaled df
    new_game_scaled = StandardScaler().fit_transform(numericals)
    
    clusters = sorted(list(df["cluster"].value_counts().index))
    cluster_center_list = []
    
    for cluster in clusters:
        c = cluster_centers[cluster]
        cluster_center_list.append(c[cluster])
    
    distance_list = []
    for center in cluster_center_list:
        distance_list.append(np.linalg.norm(new_game_scaled - center))
        
    distance_list.index(min(distance_list))
    
    cluster = distance_list.index(min(distance_list))
    
    for index, row in processed_game.iterrows():
        print(f'Found {row["name"].title()} on:')
        for col in processed_game.columns:
            if col.startswith("platform_"):
                print(row[col])
        
    print("Game belongs to cluster: ", cluster)
    
    suggested_game = df[df['cluster'] == cluster]
    # Get a index at random from the suggested songs list
    random_index = random.randint(0, len(suggested_game) - 1)
    random_game = suggested_game.iloc[random_index]
    
    print(f'We also recommend: {random_game["name"].title()} on {random_game["platform_name_0"]}')

def cluster_search(temp_df, game_selection, df):
    """Takes a selected game and searches a DataFrame for other games belonging to the same cluster.
    Recommends a random game from the matching cluster.

    Args:
        temp_df (pandas.DataFrame): A DataFrame containing game information, including:
            * 'name': Column containing game names.
            * 'cluster': Column indicating cluster assignments.  
        game_selection (str): The name of the selected game.
        df (pandas.DataFrame): The main DataFrame containing cluster assignments and game details, including:
            * 'name': Column containing game names.
            * 'cluster': Column indicating cluster assignments. 
            * 'platform_name_0': Column indicating a game's primary platform. 

    Prints:
        * A recommendation for a similar game (name and platform) from the same cluster.
        * "No suggestions found" if no other games are found within the cluster.
    """
    selected_game = temp_df[temp_df["name"].str.startswith(game_selection)].reset_index(drop=True)
    
    cluster = selected_game["cluster"].iloc[0]
    cluster_search = df[df['cluster'] == cluster]

    if len(cluster_search) > 0:
        suggested_games = cluster_search.reset_index(drop=True)
        
        # Get a index at random from the suggested songs list
        random_index = random.randint(0, len(suggested_games) - 1)
        random_game = suggested_games.loc[random_index].to_frame().T

        available_platforms = []
        platform_columns = ["platform_name_0", "platform_name_1", "platform_name_2", "platform_name_3", "platform_name_4"]
        
        for col in platform_columns:
            if col in random_game.columns:
                if random_game[col].iloc[0] != "undefined":
                    available_platforms.append(random_game[col].iloc[0])
                    
        print(f'We also recommend: {random_game["name"].iloc[0].title()} on {", ".join(available_platforms)}')
    else:
        print("No suggestions found")

def get_suggestions(df):
    """Provides an interactive game search and recommendation experience.

    Takes user input, searches for matching games in a DataFrame (df), and either presents
    multiple results for the user to select from or directly initiates a cluster-based search.
    If no matches are found, leverages the RAWG API to find similar games and suggests recommendations.  

    Args:
        df (pandas.DataFrame): The DataFrame containing game information, including:
            * 'name': Column containing game names.
            * 'platform_name_0': Column indicating a game's primary platform. 
            * 'cluster': Column indicating cluster assignments (for recommendations).
        rawg_api_key (str): A valid API key for the RAWG game database (https://rawg.io/apidocs). 

    Requires:
        * The 'requests' library for making API calls.
        * The 'cluster_search' and 'get_cluster_for_game' functions (ensure these are documented). 
    """
    
    user_input = input("Enter a game:").lower()
    temp_df = df[df['name'].str.startswith(user_input)].reset_index(drop=True)
    
    # Multiple records scenario
    if len(temp_df) > 1:
        print(f'Found {len(temp_df)} results:')
        games = {}
        available_index = []
        platform_columns = ["platform_name_0", "platform_name_1", "platform_name_2", "platform_name_3", "platform_name_4"]
        
        for i in temp_df.index:
            name = temp_df.loc[i]["name"]
            available_platforms = []
            
            for col in platform_columns:
                if col in temp_df.columns: 
                    if temp_df[col].loc[i] != "undefined":
                        available_platforms.append(temp_df[col].loc[i])
            
            available_index.append(i)
            print(f"{i})- {name.title()} available in {', '.join(available_platforms)}")
        
        game_index = input("Which one is the game?(Enter index number)")
        
        game_index = int(game_index)
        
        if game_index in available_index:
            available_platforms = []
            selected_game = temp_df.loc[game_index]
            
            for col in platform_columns:
                if col in temp_df.columns:
                    if temp_df[col].loc[game_index] != "undefined":
                        available_platforms.append(temp_df[col].loc[game_index])
            
            print("--------------------------------------------------------------------------------------------")
            print(f'* Selected - {selected_game["name"].title()} available in {", ".join(available_platforms)}')
            print("--------------------------------------------------------------------------------------------")
            cluster_search(temp_df,selected_game["name"], df)
            
    # One record scenario
    elif len(temp_df) == 1:
        print(f'Found {temp_df["name"][0].title()} on {temp_df["platform_name_0"][0]} released {temp_df["released"].dt.year.to_string(index=False)}')
        cluster_search(temp_df,temp_df["name"][0], df)
        
    # No records scenario
    elif len(temp_df) == 0:
        search_params = {
            "key": rawg_api_key,
            "search": user_input
        }
        BASE_URL = "https://api.rawg.io/api/"
        
        response = requests.get(BASE_URL + "games", params=search_params)

        if response.status_code == 200:
            data = response.json()
            print("searching games... returned: ",len(data["results"]), "records")
            games = []
            for g in data["results"]:
                games.append(g)

            get_cluster_for_game(games, df)
        else:
            print("Error:", response)
        

    

In [41]:
get_suggestions(df)