In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

### Dataset obtained from Kaggle

https://www.kaggle.com/datasets/trentenberam/metacritic-games-all-time

In [2]:
games = pd.read_csv("games.csv")
reviews = pd.read_csv("reviews.csv")

In [3]:
games.isnull().sum()

Unnamed: 0            0
title                 0
release_date          0
genre                 0
platforms             0
developer            19
esrb_rating        2115
ESRBs             11462
metascore             0
userscore             0
critic_reviews        0
user_reviews       1364
num_players          13
summary             118
dtype: int64

In [4]:
games["platforms"].unique()

array(['Xbox', 'Dreamcast', 'PlayStation 3', 'Xbox 360', 'PlayStation 2',
       'PC', 'GameCube', 'PlayStation', 'Nintendo 64', 'Xbox Series X',
       'Switch', 'DS', 'Wii U', 'Wii', 'PlayStation 4',
       'PlayStation Vita', 'PlayStation 5', 'Game Boy Advance',
       'Xbox One', '3DS', 'PSP', 'Stadia'], dtype=object)

In [5]:
games = games[~games["platforms"].isin(["Xbox", "Dreamcast", "PlayStation 3", "Xbox 360", "PlayStation 2", 'GameCube', 'PlayStation',
                                        'Nintendo 64', 'DS', 'Wii U', 'Wii', 'PlayStation Vita', "Game Boy Advance","Xbox One", "3DS", 
                                        "PSP", "Stadia"])]

In [6]:
games.drop(columns = ['Unnamed: 0', 'release_date',
       'developer', 'ESRBs', 'metascore', 'userscore',
       'critic_reviews', 'user_reviews', 'summary'], inplace = True)

In [7]:
games.platforms.unique()

array(['PC', 'Xbox Series X', 'Switch', 'PlayStation 4', 'PlayStation 5'],
      dtype=object)

In [8]:
games.isnull().sum()

title             0
genre             0
platforms         0
esrb_rating    1980
num_players       6
dtype: int64

In [9]:
games.loc[games["esrb_rating"] == "K-A"]

Unnamed: 0,title,genre,platforms,esrb_rating,num_players
285,Sid Meier's Civilization II,"Strategy, Turn-Based, Historic, General, Histo...",PC,K-A,1 Player
608,Sid Meier's Gettysburg!,"Strategy, Real-Time, Wargame, Real-Time, General",PC,K-A,1-8 Players
1656,Obsidian,"Adventure, General, General",PC,K-A,1 Player
2280,Borderlands,"Adventure, 3D, First-Person, Fantasy, Fantasy",PC,K-A,Up to 12 Players
2760,Master of Orion II: Battle at Antares,"Strategy, Turn-Based, Sci-Fi, 4X",PC,K-A,Up to 8 Players
4543,Circle of Blood,"Adventure, General, General, Point-and-Click",PC,K-A,1 Player


In [10]:
games.loc[games["title"] == "Sid Meier's Civilization II", "esrb_rating"] = "E"
games.loc[games["title"] == "Sid Meier's Gettysburg!", "esrb_rating"] = "E"
games.loc[games["title"] == "Obsidian", "esrb_rating"] = "E10+"
games.loc[games["title"] == "Borderlands", "esrb_rating"] = "M"
games.loc[games["title"] == "Master of Orion II: Battle at Antares", "esrb_rating"] = "E"
games.loc[games["title"] == "Circle of Blood", "esrb_rating"] = "M"
games.loc[games["title"] == "PaRappa the Rapper", "esrb_rating"] = "E"

In [11]:
games["esrb_rating"].unique()

array(['M', 'T', 'E10+', nan, 'E', 'RP', 'AO'], dtype=object)

In [12]:
games["esrb_rating"] = games["esrb_rating"].fillna("RP")

In [13]:
games["esrb_rating"].unique()

array(['M', 'T', 'E10+', 'RP', 'E', 'AO'], dtype=object)

In [14]:
games["num_players"] = np.where(games["num_players"] == "1 Player", "1 Player", "Multiplayer")

In [15]:
games["num_players"].unique()

array(['Multiplayer', '1 Player'], dtype=object)

In [16]:
reviews.drop(columns = ["review", "Unnamed: 0"], inplace = True)

In [17]:
reviews.drop_duplicates(subset=["ids", "game"], inplace = True)

In [18]:
games["genre"].unique()

array(['Role-Playing, Action RPG, Action RPG',
       'Adventure, General, General, 3D, Third-Person, Fantasy',
       'Action RPG, Role-Playing, Action RPG', ...,
       'Action Adventure, Adventure, Third-Person, Open-World, Modern, Modern',
       'Massively Multiplayer Online, Modern, Role-Playing, Massively Multiplayer',
       'Driving, Modern, Racing, Motorcycle, Motocross, Modern, Action Adventure, General'],
      dtype=object)

In [19]:
games["genre"] = np.where(games["genre"].str.contains("Action Adventure") == True,
                          "Action Adventure",
                          np.where(games["genre"].str.contains("Shooter") == True,
                                   "Shooter",
                                   np.where(games["genre"].str.contains("Role-Playing") == True,
                                            "Role-Playing",
                                            np.where(games["genre"].str.contains("Strategy") == True,
                                                     "Strategy",
                                                     np.where(games["genre"].str.contains("Platformer") == True,
                                                              "Platformer",
                                                              np.where(games["genre"].str.contains("Fighting") == True,
                                                                       "Fighting",
                                                                       np.where(games["genre"].str.contains("Puzzle") == True,
                                                                                "Puzzle",
                                                                                np.where(games["genre"].str.contains("Sports") == True,
                                                                                         "Sports",
                                                                                         np.where(games["genre"].str.contains("Simulation") == True,
                                                                                                  "Simulation",
                                                                                                  np.where(games["genre"].str.contains("Racing") == True,
                                                                                                  "Racing",
                                                                                                  "Miscellaneous"))))))))))

In [20]:
games.genre.value_counts()

genre
Miscellaneous       1785
Action Adventure    1576
Strategy            1226
Role-Playing        1158
Shooter             1111
Platformer           614
Simulation           545
Sports               377
Puzzle               227
Racing               197
Fighting             139
Name: count, dtype: int64

In [21]:
games["title"].value_counts()

title
Life is Strange: True Colors                                 5
Lost in Random                                               5
Hot Wheels Unleashed                                         5
Tony Hawk's Pro Skater 1 + 2                                 5
Balan Wonderworld                                            5
                                                            ..
The World Ends with You: Final Remix                         1
Peter Jackson's King Kong: The Official Game of the Movie    1
Men of War: Red Tide                                         1
Star Renegades                                               1
JoJo's Bizarre Adventure: Eyes of Heaven                     1
Name: count, Length: 6968, dtype: int64

In [22]:
games.drop_duplicates(subset = ["title"], inplace = True)

In [23]:
games["title"].value_counts()

title
Diablo                                      1
Megaquarium                                 1
Ultima Online: Third Dawn                   1
The Matrix Online                           1
Exist Archive: The Other Side of the Sky    1
                                           ..
Austerlitz: Napoleon's Greatest Victory     1
SNK 40th Anniversary Collection             1
Life is Strange 2                           1
Fallout 3: The Pitt                         1
JoJo's Bizarre Adventure: Eyes of Heaven    1
Name: count, Length: 6968, dtype: int64

In [24]:
games.shape

(6968, 5)

In [25]:
unified = games.merge(
    reviews,
    left_on = ["title"],
    right_on = ["game"],
    how = "left"
)

In [26]:
unified.shape

(301952, 8)

In [27]:
unified["rating"].isnull()

0         False
1         False
2         False
3         False
4         False
          ...  
301947    False
301948    False
301949    False
301950    False
301951    False
Name: rating, Length: 301952, dtype: bool

In [28]:
unified.dropna(subset = ["rating"], inplace = True)

In [29]:
unified["rating"].isnull().sum()

np.int64(0)

In [30]:
conteo = unified.game.value_counts()

In [31]:
filtro = unified["game"].isin((conteo[conteo>10].index))

In [32]:
unified = unified.loc[filtro,:]

In [33]:
unified.shape

(289162, 8)

In [34]:
unified.nunique()

title            3956
genre              11
platforms           5
esrb_rating         6
num_players         2
ids            151971
game             3956
rating             11
dtype: int64

In [35]:
unified["ids"] = unified["ids"].astype("int")
unified["rating"] = unified["rating"].astype("int")

In [36]:
unified.dtypes

title          object
genre          object
platforms      object
esrb_rating    object
num_players    object
ids             int64
game           object
rating          int64
dtype: object

In [37]:
user_codes = unified["ids"].astype("category").cat.codes
game_codes = unified["title"].astype("category").cat.codes

matriz = csr_matrix(
    (unified["rating"], (user_codes, game_codes))
)

In [41]:
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

modelo = NearestNeighbors(
    n_neighbors = 50,
    metric="cosine"
)
modelo.fit(matriz)

nuevo_usuario = np.zeros(matriz.shape[1])

nuevo_usuario[1936] = 10
nuevo_usuario[2207] = 8
nuevo_usuario[2980] = 6

nuevo_usuario = csr_matrix(nuevo_usuario)

distancias, vecinos = modelo.kneighbors(nuevo_usuario)

vecinos_idx = vecinos[0]

tabla_vecinos = matriz[vecinos_idx]

rating_vecinos = np.array(tabla_vecinos.mean(axis=0)).ravel()

usuario_ratings = nuevo_usuario.toarray().ravel()
filtro_no_jugados = usuario_ratings == 0

top_idx = np.argsort(
    rating_vecinos[filtro_no_jugados]
)[::-1][:5]

game_ids = unified["title"].astype("category").cat.categories
juegos_recomendados = game_ids[filtro_no_jugados][top_idx]


print(juegos_recomendados)

Index(['Sonic Mania', 'The Legend of Zelda: Twilight Princess HD',
       'Super Mario Party', 'Blaster Master Zero', 'LEGO Worlds'],
      dtype='object')


In [None]:
def filtrador_recomendador (consola = "", genero = ""):
    if consola != "" and genero == "": 
        filtro = (games["platforms"] == consola)
    elif consola == "" and genero != "":
        filtro = (games["genre"] == genero)
    else:
        filtro = (games["platforms"] == consola )& (games["genre"] == genero)
        
    juegos_cumple_filtro = games.loc[filtro,"title"]

    filtro = juegos_recomendados.isin(juegos_cumple_filtro)
    return juegos_recomendados[filtro]

In [None]:
filtrador_recomendador(genero ="Sports")