In [2]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
import joblib

In [3]:
#carga del df
df = pd.read_csv(r'./datasets/game_profile.csv')

In [4]:
df[['id', 'title']].to_csv(r'./dataquery/game_profile.csv',index=False)

In [5]:
#Primer modelo con el profile mas extenso
import gc
gc.collect()

# Create a TF-IDF Vectorizer to transform the 'profile' into numerical data
tfidf = TfidfVectorizer(stop_words='english',max_features=5000)
tfidf_matrix = tfidf.fit_transform(df['profile'])

joblib.dump(tfidf_matrix, r'./dataquery/tfidf_matrix.pkl')



# Use k-NN to find similar games
knn = NearestNeighbors(n_neighbors=6, metric='cosine', algorithm='brute')  # Using 6 neighbors to include the game itself
knn.fit(tfidf_matrix)
joblib.dump(knn, r'./dataquery/modelo_knn.pkl')

# Create a function to get recommendations
def recommend(game_id):
    knn_load = joblib.load(r'./dataquery/modelo_knn.pkl')
    idx = df.index[df['id'] == game_id].tolist()[0]
    name = df['title'].loc[idx]
    print(f'Porque te gusto "{name}" te recomendamos:')
    values , indices = knn.kneighbors(tfidf_matrix[idx])
    
    # Exclude the first (index 0) since it'll be the game itself
    game_indices = indices[0][1:]
    return df['title'].iloc[game_indices]

In [6]:
tfidf_matrix


<30085x5000 sparse matrix of type '<class 'numpy.float64'>'
	with 508615 stored elements in Compressed Sparse Row format>

In [4]:
# segundo test como el profile solo con genero, developer y año
import gc
gc.collect()

tfidf_s = TfidfVectorizer(stop_words='english',max_features=5000)
tfidf_matrix_s = tfidf_s.fit_transform(df['profile_simple'])

knn_s = NearestNeighbors(n_neighbors=6, metric='cosine', algorithm='brute')  # Using 6 neighbors to include the game itself
knn_s.fit(tfidf_matrix_s)

joblib.dump(knn, r'./dataquery/modelo_knn_simple.pkl')

# Create a function to get recommendations
def recommend_s(game_id):
    knn_load = joblib.load(r'./dataquery/modelo_knn_simple.pkl')
    idx = df.index[df['id'] == game_id].tolist()[0]
    name = df['title'].loc[idx]
    print(f'Porque te gusto "{name}" te recomendamos:')
    values , indices = knn_s.kneighbors(tfidf_matrix_s[idx])
    
    # Exclude the first (index 0) since it'll be the game itself
    game_indices = indices[0][0:]
    return df['title'].iloc[game_indices]

In [7]:
print(recommend(47810))


Porque te gusto "Dragon Age: Origins - Ultimate Edition" te recomendamos:
29453    Dragon Age™: Origins Awakening
331                 Dragon Age: Origins
29742                       Mass Effect
29865     Jade Empire™: Special Edition
29485                     Mass Effect 2
Name: title, dtype: object


In [6]:
print(recommend_s(47810))

Porque te gusto "Dragon Age: Origins - Ultimate Edition" te recomendamos:
29453            Dragon Age™: Origins Awakening
29488      Mass Effect 2 Digital Deluxe Edition
29329    Dragon Age: Origins - Ultimate Edition
29485                             Mass Effect 2
331                         Dragon Age: Origins
29742                               Mass Effect
Name: title, dtype: object
