In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
# We import the datasests 
dfPlay = pd.read_csv('datasets/play_time.csv')
dfGameGenre= pd.read_csv('datasets/game_genres.csv')
dfnameGame = pd.read_csv('datasets/name_games.csv')
dfGenre = pd.read_csv('datasets/genres.csv')
dfReviews = pd.read_csv('datasets/reviews.csv')

# Sistema de Recomendacion
### Similitud del Coseno : item - item

Se toma un item, en base a que tan similar esa ese ítem al resto, se recomiendan similares.</br>
Aquí el input es un juego y el output es una lista de juegos recomendados

In [3]:
# Para este SR de juegos consideraremos sus generos y el tiempo de juego de los usarios.
# Aquel juego que no tenga registro de horas jugadas, no sera considerado/recomendado

# Escogemos los dataframes con los que vamos a trabajar
print(dfGameGenre.head(1)) # Dataframe principal por la relacion juego - genero
print(dfnameGame.head(1))
print(dfGenre.head(1))
print(dfPlay.head(1))

   id_gameGenre  id_game  id_genre
0             1   761140         1
   id_game            name_game  anio
0   761140  LOST SUMMONER KITTY  2018
   id_genre   genre
0         1  Action
   id_game  playtime_forever            id_user
0       10                 6  76561197970982479


In [4]:
# Realizamos las uniones correspondientes para obtener el dataframe final
gameGenre = pd.merge(dfGameGenre, dfnameGame, on='id_game', how='left')
gameGenre = pd.merge(gameGenre, dfGenre, on='id_genre', how='left')


cols = ['id_game','name_game','genre']
gameGenre = gameGenre[cols]

In [5]:
gameGenre.head()

Unnamed: 0,id_game,name_game,genre
0,761140,LOST SUMMONER KITTY,Action
1,767400,DAN ZHA REN 2222,Action
2,773570,LOG CHALLENGE,Action
3,772540,BATTLE ROYALE TRAINER,Action
4,768570,UNCANNY ISLANDS,Action


In [6]:
gameGenre.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85487 entries, 0 to 85486
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id_game    85487 non-null  int64 
 1   name_game  85487 non-null  object
 2   genre      85487 non-null  object
dtypes: int64(1), object(2)
memory usage: 2.0+ MB


In [7]:
gameGenre.sample(5)

Unnamed: 0,id_game,name_game,genre
63229,328100,GUNSPELL - STEAM EDITION,Adventure
70793,292400,UNREST,Adventure
51079,483450,DEMOCRACY 3: ELECTIONEERING,Strategy
70480,298630,THE ESCAPISTS,Adventure
7807,406280,DESERTLAND 2115,Action


In [8]:
# En un analices anterior decidimos no eliminar valores de playtime_forever 
# Normalizamos la columna name_game
gameGenre['name_game'] = gameGenre['name_game'].apply(lambda x: re.sub(r'[^\w\s]', '', x.replace('-', ' ')).strip().upper())

In [9]:
# Revertimos un proceso, como en sus inicios.
# Agrupar el dataframe 'gameGenre' por la columna 'id_game' y 'name_game'
# Aplicamos una función lambda para obtener los valores únicos en la columna 'genre' para cada grupo.
gameGenre = gameGenre.groupby(['id_game','name_game'])['genre'].apply(lambda x: x.unique()).reset_index()
gameGenre.head()

Unnamed: 0,id_game,name_game,genre
0,10,COUNTER STRIKE,[Action]
1,20,TEAM FORTRESS CLASSIC,[Action]
2,30,DAY OF DEFEAT,[Action]
3,40,DEATHMATCH CLASSIC,[Action]
4,50,HALF LIFE OPPOSING FORCE,[Action]


In [10]:
# A continuación relacionamos el dataframe resultante con dfPlay
gameGenre = pd.merge(gameGenre, dfPlay, on='id_game', how='left' )

# Especificamos las columnas con las que vamos a relizar el SR
cols = ['id_game','name_game', 'genre', 'playtime_forever']
gameGenre = gameGenre[cols]

gameGenre.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4317005 entries, 0 to 4317004
Data columns (total 4 columns):
 #   Column            Dtype  
---  ------            -----  
 0   id_game           int64  
 1   name_game         object 
 2   genre             object 
 3   playtime_forever  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 131.7+ MB


In [11]:
gameGenre.head()

Unnamed: 0,id_game,name_game,genre,playtime_forever
0,10,COUNTER STRIKE,[Action],6.0
1,10,COUNTER STRIKE,[Action],0.0
2,10,COUNTER STRIKE,[Action],0.0
3,10,COUNTER STRIKE,[Action],93.0
4,10,COUNTER STRIKE,[Action],108.0


In [12]:
# Elimanos registros que no aporten valor al SR.

# Eliminamos registros con valores nulos
gameGenre = gameGenre.dropna()

#Filtramos el dataframe con playtime diferente a 0
gameGenre = gameGenre[gameGenre['playtime_forever'] != 0]

gameGenre.reset_index(drop=True, inplace=True)

In [13]:
gameGenre.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2868055 entries, 0 to 2868054
Data columns (total 4 columns):
 #   Column            Dtype  
---  ------            -----  
 0   id_game           int64  
 1   name_game         object 
 2   genre             object 
 3   playtime_forever  float64
dtypes: float64(1), int64(1), object(2)
memory usage: 87.5+ MB


In [14]:
gameGenre

Unnamed: 0,id_game,name_game,genre,playtime_forever
0,10,COUNTER STRIKE,[Action],6.0
1,10,COUNTER STRIKE,[Action],93.0
2,10,COUNTER STRIKE,[Action],108.0
3,10,COUNTER STRIKE,[Action],328.0
4,10,COUNTER STRIKE,[Action],6275.0
...,...,...,...,...
2868050,527510,THE LEGIONS OF ROME,"[Casual, Indie, Simulation, Strategy]",32.0
2868051,527810,DYNAMITE ALEX,"[Action, Indie]",2.0
2868052,527890,SHOP N SPREE SHOPPING PARADISE,"[Casual, Simulation]",1.0
2868053,527900,GRIM TALES THE BRIDE COLLECTORS EDITION,"[Casual, Adventure]",44.0


In [15]:
# Convertir la columna 'genre' en una cadena de texto separada por comas
gameGenre['genre_str'] = gameGenre['genre'].apply(lambda x: ', '.join(x))

cols = ['id_game','name_game', 'genre_str', 'playtime_forever']
gameGenre = gameGenre[cols]
gameGenre.head()

Unnamed: 0,id_game,name_game,genre_str,playtime_forever
0,10,COUNTER STRIKE,Action,6.0
1,10,COUNTER STRIKE,Action,93.0
2,10,COUNTER STRIKE,Action,108.0
3,10,COUNTER STRIKE,Action,328.0
4,10,COUNTER STRIKE,Action,6275.0


In [16]:
# Nuevamente....
# Agrupar el dataframe 'gameGenre' por la columna 'name_game' y 'genre'
# Para cada combinación única, calculamos la suma de los valores en la columna 'playtime_forever'.
gameGenre = gameGenre.groupby(['id_game','name_game', 'genre_str'])['playtime_forever'].sum().reset_index()
gameGenre.head()

Unnamed: 0,id_game,name_game,genre_str,playtime_forever
0,10,COUNTER STRIKE,Action,17386015.0
1,20,TEAM FORTRESS CLASSIC,Action,961702.0
2,30,DAY OF DEFEAT,Action,758991.0
3,40,DEATHMATCH CLASSIC,Action,154486.0
4,50,HALF LIFE OPPOSING FORCE,Action,734562.0


In [17]:
# Aquí les presento el dataframe final.
gameGenre.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8523 entries, 0 to 8522
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id_game           8523 non-null   int64  
 1   name_game         8523 non-null   object 
 2   genre_str         8523 non-null   object 
 3   playtime_forever  8523 non-null   float64
dtypes: float64(1), int64(1), object(2)
memory usage: 266.5+ KB


# Sistema de Recomendación

In [18]:
from sklearn.preprocessing import MinMaxScaler

# Normalizar la columna 'playtime_forever'
scaler = MinMaxScaler()
gameGenre['playtime_normalized'] = scaler.fit_transform(gameGenre['playtime_forever'].values.reshape(-1, 1))

In [19]:
gameGenre.head()

Unnamed: 0,id_game,name_game,genre_str,playtime_forever,playtime_normalized
0,10,COUNTER STRIKE,Action,17386015.0,0.022143
1,20,TEAM FORTRESS CLASSIC,Action,961702.0,0.001225
2,30,DAY OF DEFEAT,Action,758991.0,0.000967
3,40,DEATHMATCH CLASSIC,Action,154486.0,0.000197
4,50,HALF LIFE OPPOSING FORCE,Action,734562.0,0.000936


In [20]:
#gameGenre.to_csv('api_steam/data_render/gameRecom.csv', index=False)

In [21]:
from sklearn.feature_extraction.text import CountVectorizer

# Crear un objeto CountVectorizer para convertir texto en una matriz de términos-documentos
vectorizer = CountVectorizer(tokenizer=lambda x: x.split(', '))
matriz_generos = vectorizer.fit_transform(gameGenre['genre_str'])



In [22]:
# Calcular la similitud de coseno entre los juegos basados en el género
from sklearn.metrics.pairwise import cosine_similarity
cosine_sim = cosine_similarity(matriz_generos, matriz_generos)

In [23]:
# Asignar un peso para la similitud de género y para el tiempo de juego
genre_weight = 0.7
playtime_weight = 0.3

# Calcular la puntuación de similitud ponderada
weighted_similarity = (genre_weight * cosine_sim) + (playtime_weight * gameGenre['playtime_normalized'].values.reshape(-1, 1))


In [24]:
import pickle

# Guardar cosine_sim en un archivo usando pickle
'''with open('api_steam/data_render/weighted_similarity.pkl', 'wb') as file:
    pickle.dump(weighted_similarity, file)'''


"with open('api_steam/data_render/weighted_similarity.pkl', 'wb') as file:\n    pickle.dump(weighted_similarity, file)"

In [25]:
# Función para obtener recomendaciones basadas en la puntuación de similitud ponderada
def get_weighted_recommendations(game_name, weighted_similarity_scores):
    game_index = gameGenre[gameGenre['name_game'] == game_name].index[0]
    similar_games = list(enumerate(weighted_similarity_scores[game_index]))
    similar_games = sorted(similar_games, key=lambda x: x[1], reverse=True)
    similar_games = similar_games[1:11]  # Obtener las 10 recomendaciones principales
    game_indices = [index[0] for index in similar_games]
    return gameGenre['name_game'].iloc[game_indices]

In [26]:
gameGenre[gameGenre['name_game'].str.contains('CAR', case=False, na=False)]

Unnamed: 0,id_game,name_game,genre_str,playtime_forever,playtime_normalized
296,10270,DISCIPLES III REINCARNATION,"Strategy, RPG",13664.0,1.740101e-05
340,12360,FLATOUT ULTIMATE CARNAGE,Racing,161822.0,2.060930e-04
354,12560,RIDE CARNIVAL TYCOON,Simulation,1053.0,1.339813e-06
357,12590,SPRINT CARS ROAD TO KNOXVILLE,Racing,7472.0,9.514964e-06
555,24650,TRAINZ SETTLE AND CARLISLE,Simulation,1614.0,2.054295e-06
...,...,...,...,...,...
7673,454180,CRAZYCARS3D,"Action, Casual, Indie, Simulation, Adventure, ...",3994.0,5.085430e-06
8141,486850,CARD OF SPIRITS QIA LING,Casual,631.0,8.023594e-07
8159,488220,VALCARTA RISE OF THE DEMON,RPG,513.0,6.520762e-07
8165,488550,DREAM CAR BUILDER,"Indie, Simulation, Racing, Early Access",310.0,3.935382e-07


In [27]:
# Obtener recomendaciones ponderadas para un juego específico
game_name = 'DREAM CAR BUILDER'
weighted_recommendations = get_weighted_recommendations(game_name, weighted_similarity)
print(weighted_recommendations)

4041       HOMEBREW   VEHICLE SANDBOX
4737                        RACECRAFT
6690                          LIFTOFF
8338                     GI RACING 20
2912                      BEAMNGDRIVE
3927                     ZEN FISH SIM
4015                          SINARUN
4077    INFINITE GAME WORKS EPISODE 1
4707                          BESIEGE
6186              LABYRINTH SIMULATOR
Name: name_game, dtype: object
