In [1]:
import pandas as pd

# Leer el archivo CSV
steam_games = pd.read_csv('steam_games.csv')

# Mostrar las primeras filas del DataFrame
print(steam_games.head())

# Reemplazar los valores NaN en la columna 'price' con 0
steam_games['price'].fillna(0, inplace=True)

# Verificar valores nulos en las columnas relevantes
relevant_columns = ['title','id','tags', 'developer', 'price']
print("\nValores nulos en columnas relevantes:")
print(steam_games[relevant_columns].isnull().sum())

# Eliminar registros con valores nulos en columnas relevantes
steam_games = steam_games.dropna(subset=relevant_columns)

# Verificar valores duplicados en la columna 'id'
print("\nCantidad de valores duplicados en la columna 'id':", steam_games['id'].duplicated().sum())

# Eliminar registros duplicados en la columna 'id'
steam_games = steam_games.drop_duplicates(subset='id')

# Eliminar columnas no necesarias para el sistema de recomendación
columns_to_drop = ['genres','app_name', 'specs', 'year']
steam_games = steam_games.drop(columns=columns_to_drop)

# Guardar el DataFrame limpio en un nuevo archivo CSV
steam_games.to_csv('steam_games_clean.csv', index=False)

# Mostrar información del DataFrame limpio
print("\nInformación del DataFrame limpio:")
print(steam_games.info())


                                              genres                 app_name  \
0  ['Action', 'Casual', 'Indie', 'Simulation', 'S...      Lost Summoner Kitty   
1       ['Free to Play', 'Indie', 'RPG', 'Strategy']                Ironbound   
2  ['Casual', 'Free to Play', 'Indie', 'Simulatio...  Real Pool 3D - Poolians   
3                  ['Action', 'Adventure', 'Casual']                  弹炸人2222   
4              ['Action', 'Adventure', 'Simulation']    Battle Royale Trainer   

                     title                                               tags  \
0      Lost Summoner Kitty  ['Strategy', 'Action', 'Indie', 'Casual', 'Sim...   
1                Ironbound  ['Free to Play', 'Strategy', 'Indie', 'RPG', '...   
2  Real Pool 3D - Poolians  ['Free to Play', 'Simulation', 'Sports', 'Casu...   
3                  弹炸人2222                  ['Action', 'Adventure', 'Casual']   
4    Battle Royale Trainer  ['Action', 'Adventure', 'Simulation', 'FPS', '...   

                          

In [2]:
steam_games.info()

<class 'pandas.core.frame.DataFrame'>
Index: 28681 entries, 0 to 30066
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   title      28681 non-null  object 
 1   tags       28681 non-null  object 
 2   price      28681 non-null  float64
 3   id         28681 non-null  float64
 4   developer  28681 non-null  object 
dtypes: float64(2), object(3)
memory usage: 1.3+ MB


In [3]:
from sklearn.preprocessing import MultiLabelBinarizer

# Cargar el dataframe
df = steam_games

# Convertir las listas de etiquetas a un formato adecuado para MultiLabelBinarizer
tags_lists = [eval(tags) if isinstance(tags, str) else [] for tags in df['tags']]

# Inicializar y ajustar MultiLabelBinarizer
mlb = MultiLabelBinarizer()
tags_encoded = mlb.fit_transform(tags_lists)

# Crear un nuevo dataframe con las etiquetas binarizadas
tags_df = pd.DataFrame(tags_encoded, columns=mlb.classes_)

# Concatenar el nuevo dataframe con el dataframe original y eliminar la columna 'tags' original
df = pd.concat([df, tags_df], axis=1)
df.drop(columns=['tags'], inplace=True)

# Ver el resultado
print(df.head())


                     title  price        id            developer  1980s  \
0      Lost Summoner Kitty   4.99  761140.0            Kotoshiro    0.0   
1                Ironbound   0.00  643980.0     Secret Level SRL    0.0   
2  Real Pool 3D - Poolians   0.00  670290.0         Poolians.com    0.0   
3                  弹炸人2222   0.99  767400.0                 彼岸领域    0.0   
4    Battle Royale Trainer   3.99  772540.0  Trickjump Games Ltd    0.0   

   1990's  2.5D   2D  2D Fighter  3D Platformer  ...  Warhammer 40K  \
0     0.0   0.0  0.0         0.0            0.0  ...            0.0   
1     0.0   0.0  1.0         0.0            0.0  ...            0.0   
2     0.0   0.0  0.0         0.0            0.0  ...            0.0   
3     0.0   0.0  0.0         0.0            0.0  ...            0.0   
4     0.0   0.0  0.0         0.0            0.0  ...            0.0   

   Web Publishing  Werewolves  Western  Word Game  World War I  World War II  \
0             0.0         0.0      0.0    

In [4]:
df

Unnamed: 0,title,price,id,developer,1980s,1990's,2.5D,2D,2D Fighter,3D Platformer,...,Warhammer 40K,Web Publishing,Werewolves,Western,Word Game,World War I,World War II,Wrestling,Zombies,e-sports
0,Lost Summoner Kitty,4.99,761140.0,Kotoshiro,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Ironbound,0.00,643980.0,Secret Level SRL,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Real Pool 3D - Poolians,0.00,670290.0,Poolians.com,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,弹炸人2222,0.99,767400.0,彼岸领域,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Battle Royale Trainer,3.99,772540.0,Trickjump Games Ltd,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27978,,,,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
27987,,,,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28021,,,,,0.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
28251,,,,,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [5]:
# Verificar valores nulos en las columnas relevantes
relevant_columns = ['title','id', 'developer', 'price']

# Eliminar registros con valores nulos en columnas relevantes
df = df.dropna(subset=relevant_columns)

# Eliminar registros duplicados en la columna 'id'
df = df.drop_duplicates(subset='id')

In [6]:
# Guardar el DataFrame como un archivo CSV
df.to_csv('def_recommend_games.csv', index=False)

In [7]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

def recomendacion_juego(input_id, num_recommendations=5):
    # Cargar el DataFrame
    df = pd.read_csv('def_recommend_games.csv')

    # Llenar valores NaN con 0
    df_filled = df.fillna(0)

    # Encontrar el índice del juego de entrada en el DataFrame
    input_index = df.index[df['id'] == input_id].tolist()[0]

    # Calcular la similitud del coseno entre el juego de entrada y todos los demás juegos
    similarity_scores = cosine_similarity(df_filled.iloc[input_index, 5:].values.reshape(1, -1), df_filled.iloc[:, 5:])

    # Obtener los índices de los juegos más similares
    similar_game_indices = similarity_scores.argsort()[0][-num_recommendations-1:-1][::-1]

    # Obtener los títulos de los juegos más similares
    recommendations = df.iloc[similar_game_indices]['title'].tolist()

    return recommendations





In [8]:
# Ejemplo de uso
input_game_id = 761140.0  # Aquí debes especificar el ID del juego de entrada
recommendations = recomendacion_juego(input_game_id)
print("Juegos recomendados:")
for i, game in enumerate(recommendations, 1):
    print(f"{i}. {game}")

Juegos recomendados:
1. OMSI 2 Add-On HafenCity - Hamburg modern
2. LeftWay
3. BLUE REFLECTION - Summer Clothes Set C (Lime, Fumio, Chihiro)
4. Realms of Arkania: Star Trail
5. HYPERNOVA: Escape from Hadea


In [9]:
recommendations

['OMSI 2 Add-On HafenCity - Hamburg modern',
 'LeftWay',
 'BLUE REFLECTION - Summer Clothes Set C (Lime, Fumio, Chihiro)',
 'Realms of Arkania: Star Trail',
 'HYPERNOVA: Escape from Hadea']

In [29]:
import sklearn
print(sklearn.__version__)

1.3.2
