In [1]:
import pandas as pd
import numpy as np
import csv
import string
import re
import seaborn as sn
import matplotlib.pyplot as plt
from textblob import TextBlob
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Modelo de recomendación

```recomendacion_juego```

_Ingresando el id de producto, deberíamos recibir una lista con 5 juegos recomendados similares al ingresado._

In [2]:
# Se carga el dataframe steam_games

df_steam_games = pd.read_csv('/Users/mlucchesi/Henry/PI/data/csv/procesados/steam_games_procesado.csv')
df_steam_games.head()

Unnamed: 0.1,Unnamed: 0,publisher,genres,app_name,title,url,release_date,tags,reviews_url,specs,price,early_access,id,developer
0,88310,Kotoshiro,"['Action', 'Casual', 'Indie', 'Simulation', 'S...",Lost Summoner Kitty,Lost Summoner Kitty,http://store.steampowered.com/app/761140/Lost_...,2018-01-04,"['Strategy', 'Action', 'Indie', 'Casual', 'Sim...",http://steamcommunity.com/app/761140/reviews/?...,['Single-player'],4.99,0.0,761140.0,Kotoshiro
1,88311,"Making Fun, Inc.","['Free to Play', 'Indie', 'RPG', 'Strategy']",Ironbound,Ironbound,http://store.steampowered.com/app/643980/Ironb...,2018-01-04,"['Free to Play', 'Strategy', 'Indie', 'RPG', '...",http://steamcommunity.com/app/643980/reviews/?...,"['Single-player', 'Multi-player', 'Online Mult...",0.0,0.0,643980.0,Secret Level SRL
2,88312,Poolians.com,"['Casual', 'Free to Play', 'Indie', 'Simulatio...",Real Pool 3D - Poolians,Real Pool 3D - Poolians,http://store.steampowered.com/app/670290/Real_...,2017-07-24,"['Free to Play', 'Simulation', 'Sports', 'Casu...",http://steamcommunity.com/app/670290/reviews/?...,"['Single-player', 'Multi-player', 'Online Mult...",0.0,0.0,670290.0,Poolians.com
3,88313,彼岸领域,"['Action', 'Adventure', 'Casual']",弹炸人2222,弹炸人2222,http://store.steampowered.com/app/767400/2222/,2017-12-07,"['Action', 'Adventure', 'Casual']",http://steamcommunity.com/app/767400/reviews/?...,['Single-player'],0.99,0.0,767400.0,彼岸领域
4,88314,,,Log Challenge,,http://store.steampowered.com/app/773570/Log_C...,,"['Action', 'Indie', 'Casual', 'Sports']",http://steamcommunity.com/app/773570/reviews/?...,"['Single-player', 'Full controller support', '...",2.99,0.0,773570.0,


In [6]:
# Se extrae en otro df, unincamente las columnas necesarias para el modelo

df_games_filtered = df_steam_games[['app_name', 'tags']]
df_games_filtered.head()

Unnamed: 0,app_name,tags
0,Lost Summoner Kitty,"['Strategy', 'Action', 'Indie', 'Casual', 'Sim..."
1,Ironbound,"['Free to Play', 'Strategy', 'Indie', 'RPG', '..."
2,Real Pool 3D - Poolians,"['Free to Play', 'Simulation', 'Sports', 'Casu..."
3,弹炸人2222,"['Action', 'Adventure', 'Casual']"
4,Log Challenge,"['Action', 'Indie', 'Casual', 'Sports']"


In [7]:
# Se toma una muestra para el modelo

df_games_filtered = df_games_filtered.sample(frac=0.1, random_state=777)

df_games_filtered = df_games_filtered.reset_index(drop=True)

In [9]:
df_games_filtered.head()

Unnamed: 0,app_name,tags
0,DW8XLCE - DW7 ORIGINAL COSTUME PACK 2,['Action']
1,Dungeon Defenders - Quest for the Lost Eterni...,"['RPG', 'Indie']"
2,Raiders of the Broken Planet - Alien Myths Cam...,"['Action', 'Adventure', 'Violent', 'Indie', 'M..."
3,Rocksmith® 2014 Edition - Remastered – Kenny L...,"['Casual', 'Simulation']"
4,Strip Club Massacre,"['Nudity', 'Sexual Content', 'Movie', 'Violent..."


In [12]:
# Se crea una nueva columna que concatena todos los tags, separados unicamente de un espacio

df_games_filtered['tags_concat'] = df_games_filtered['tags'].str.replace('[{}]'.format(string.punctuation), '')

# Dropeo la columna de tags, ya que no se necesita más

df_games_filtered.drop(['tags'], axis=1, inplace=True)

# Muestro el resultado

df_games_filtered.head()

Unnamed: 0,app_name,tags_concat
0,DW8XLCE - DW7 ORIGINAL COSTUME PACK 2,['Action']
1,Dungeon Defenders - Quest for the Lost Eterni...,"['RPG', 'Indie']"
2,Raiders of the Broken Planet - Alien Myths Cam...,"['Action', 'Adventure', 'Violent', 'Indie', 'M..."
3,Rocksmith® 2014 Edition - Remastered – Kenny L...,"['Casual', 'Simulation']"
4,Strip Club Massacre,"['Nudity', 'Sexual Content', 'Movie', 'Violent..."


In [13]:
# Se instancia un objeto vectorizador usando TfidfVectorizer, con stop words en inglés.
vectorizador = TfidfVectorizer(stop_words='english')

In [18]:
# Los valores nulos en la columna 'tags_concat' se reemplazan con una cadena vacía.
df_games_filtered['tags_concat'] = df_games_filtered['tags_concat'].fillna('')

In [17]:
# Se realiza la transformación TF-IDF en los datos de la columna 'tags_concat'.
vector_matrix = vectorizador.fit_transform(df_games_filtered['tags_concat'])

In [19]:
# Se calcula la similitud del coseno

simil_coseno = linear_kernel(vector_matrix, vector_matrix)

In [20]:
# Se crea una serie de indices

index = pd.Series(df_games_filtered.index, index=df_games_filtered['app_name']).drop_duplicates()

In [30]:
# Defino la función
def recomendacion_juego(juego, simil_coseno):
    try:
        indice = index[juego]
        similitud = list(enumerate(simil_coseno[indice]))
        similitud = sorted(similitud, key=lambda x: x[1], reverse=True)
        idx_game_recommended = [i[0] for i in similitud[1:6]]
        resultado_recomendacion = df_games_filtered['app_name'].iloc[idx_game_recommended]

        print('Si te gustó:', juego, '\n')
        print('Te recomendamos:')
        for juego_recomendado in resultado_recomendacion:
            print('-', juego_recomendado)

    except KeyError:
        print(juego, 'no se encuentra en nuestra base.')

In [31]:
# Pruebo la función

recomendacion_juego('Strip Club Massacre', simil_coseno)

Si te gustó: Strip Club Massacre 

Te recomendamos:
- Skeleton Key 3 Part 2
- Mystic Destinies: Serendipity of Aeons - Takumi
- Berserk
- Crystal City
- Libra of the Vampire Princess
