In [14]:
import pandas as pd
import numpy as np
import os
import ast
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer


In [48]:
df_games = pd.read_csv('Datasets\games_limpios.csv')

In [49]:
df_games.head()

Unnamed: 0,genres,specs,price,early_access,id,release_year,publisher,app_name,title,developer
0,Action,['Single-player'],4.99,False,761140,2018,Kotoshiro,Lost Summoner Kitty,Lost Summoner Kitty,Kotoshiro
1,Casual,['Single-player'],4.99,False,761140,2018,Kotoshiro,Lost Summoner Kitty,Lost Summoner Kitty,Kotoshiro
2,Indie,['Single-player'],4.99,False,761140,2018,Kotoshiro,Lost Summoner Kitty,Lost Summoner Kitty,Kotoshiro
3,Simulation,['Single-player'],4.99,False,761140,2018,Kotoshiro,Lost Summoner Kitty,Lost Summoner Kitty,Kotoshiro
4,Strategy,['Single-player'],4.99,False,761140,2018,Kotoshiro,Lost Summoner Kitty,Lost Summoner Kitty,Kotoshiro


In [50]:
# Seleccionar las columnas necesarias
df = df_games.loc[:, ["specs", "id", "app_name"]]
df=df.sample(n=100)
df=df.rename(columns={'id': 'item_id'})
# Convertir 'item_id' a tipo entero
df["item_id"] = df["item_id"].astype(int)

# Limpiar la columna 'specs'
df['specs'] = df['specs'].apply(lambda x: str(x).replace('[', '').replace(']', '').replace("'", ''))

In [51]:
# Crear un vectorizador de texto
cv = CountVectorizer()
vectores = cv.fit_transform(df['specs']).toarray()

In [52]:
# Calcular la similitud del coseno entre vectores
similitud = cosine_similarity(vectores)

In [54]:
# Generar una función para obtener recomendaciones por título
def recomendacion(juego):
    #Se busca el índice del juego en el DataFrame original (df). Este índice es utilizado para acceder a la fila correspondiente en la matriz de similitud.
    indice_juego = df[df["item_id"] == juego].index[0]
    
    #Cálculo de Similitudes: Se obtienen las distancias de similitud entre el juego de entrada y todos los demás juegos en el conjunto de datos. 
    distances = similitud[indice_juego]
    
    #Las distancias se ordenan de manera descendente, y se seleccionan los cinco juegos más similares (excluyendo el juego de entrada)
    lista_juegos = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:6]
    
    #Salida. Devuelve la lista de títulos recomendados.
    recommended_titles = [df.iloc[i[0]]['app_name'] for i in lista_juegos]
    
    return recommended_titles

In [55]:
df

Unnamed: 0,specs,item_id,app_name
4023,"Single-player, Steam Achievements, Steam Leade...",272990,GAUGE
49641,"Single-player, Steam Trading Cards",499420,Star Fields
48790,"Single-player, Downloadable Content, Steam Ach...",526070,World of Guns: USSR Guns Pack #1
40195,"Single-player, Steam Achievements, Full contro...",525360,Shio
14042,"Multi-player, MMO, Downloadable Content, Steam...",479060,WildStar: Captain's Pack
...,...,...,...
4652,Single-player,310870,The Tower
29119,"Single-player, Steam Achievements",759940,前程似锦 Excellent Expectations
13250,"Single-player, Downloadable Content, Steam Ach...",433200,Goliath - Original Soundtrack
39486,"Single-player, Multi-player, Online Multi-Play...",342660,Brawlerz Nitro


In [56]:
# Asignar un nuevo índice al DataFrame
df = df.reset_index(drop=True)

In [57]:
df

Unnamed: 0,specs,item_id,app_name
0,"Single-player, Steam Achievements, Steam Leade...",272990,GAUGE
1,"Single-player, Steam Trading Cards",499420,Star Fields
2,"Single-player, Downloadable Content, Steam Ach...",526070,World of Guns: USSR Guns Pack #1
3,"Single-player, Steam Achievements, Full contro...",525360,Shio
4,"Multi-player, MMO, Downloadable Content, Steam...",479060,WildStar: Captain's Pack
...,...,...,...
95,Single-player,310870,The Tower
96,"Single-player, Steam Achievements",759940,前程似锦 Excellent Expectations
97,"Single-player, Downloadable Content, Steam Ach...",433200,Goliath - Original Soundtrack
98,"Single-player, Multi-player, Online Multi-Play...",342660,Brawlerz Nitro


In [58]:
# Aplicar la función a la columna 'item_id' y crear una nueva columna 'Recomendaciones'
df['Recomendaciones'] = df['item_id'].apply(recomendacion)

In [59]:
# Eliminar columnas innecesarias para disminuir el tamaño del archivo de salida
df.drop(columns=['app_name', 'specs'], inplace=True)
df

Unnamed: 0,item_id,Recomendaciones
0,272990,"[Pixel Traffic: Risky Bridge, Lucid, 前程似锦 Exce..."
1,499420,[Portal of Evil: Stolen Runes Collector's Edit...
2,526070,"[Botology - Map ""Barazin"" for Survival Mode, P..."
3,525360,"[Brigador: Up-Armored Edition, A Bastard's Tal..."
4,479060,"[Botology - Map ""Barazin"" for Survival Mode, W..."
...,...,...
95,310870,"[Story of the Survivor : Prisoner, FlatFatCat,..."
96,759940,"[前程似锦 Excellent Expectations, GAUGE, M.A.C.E. ..."
97,433200,"[Brigador: Up-Armored Edition, A Bastard's Tal..."
98,342660,"[Auto Age: Standoff, EMERGENCY 20, The HinterL..."


In [60]:
# Guardar el DataFrame resultante en un nuevo archivo CSV
df.to_csv('recomienda_item_item.csv', index=False)