# **Filtrado basado en contenido**

https://www.kaggle.com/datasets/nikdavis/steam-store-games?resource=download

In [14]:
import time
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_distances
from sentence_transformers import SentenceTransformer
from sentence_transformers.util import cos_sim

In [8]:
raw_games = pd.read_csv('data/steam_games/steam.csv')
raw_descriptions = pd.read_csv('data/steam_games/steam_description_data.csv')
raw_descriptions.rename(columns={'steam_appid': 'appid'}, inplace=True)

games_df = raw_games.join(raw_descriptions, on='appid', rsuffix='_desc')
games_df['content'] = games_df['name'].fillna("") + " " + games_df['detailed_description'].fillna("")
games_df.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,...,negative_ratings,average_playtime,median_playtime,owners,price,appid_desc,detailed_description,about_the_game,short_description,content
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,3339,17612,317,10000000-20000000,7.19,240.0,THE NEXT INSTALLMENT OF THE WORLD'S # 1 ONLINE...,THE NEXT INSTALLMENT OF THE WORLD'S # 1 ONLINE...,"Just updated to include player stats, achievem...",Counter-Strike THE NEXT INSTALLMENT OF THE WOR...
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,633,277,62,5000000-10000000,3.99,500.0,"From Valve (the creators of Counter-Strike, Ha...","From Valve (the creators of Counter-Strike, Ha...","From Valve (the creators of Counter-Strike, Ha...",Team Fortress Classic From Valve (the creators...
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,...,398,187,34,5000000-10000000,3.99,1500.0,Combining fast-paced action with strategic bat...,Combining fast-paced action with strategic bat...,Combining fast-paced action with strategic bat...,Day of Defeat Combining fast-paced action with...
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,...,267,258,184,5000000-10000000,3.99,1700.0,This critically acclaimed first-person RPG tak...,This critically acclaimed first-person RPG tak...,This critically acclaimed first-person RPG tak...,Deathmatch Classic This critically acclaimed f...
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,...,288,624,415,5000000-10000000,3.99,2300.0,Let the Obsession begin. Again.<br />\r\n\t\t\...,Let the Obsession begin. Again.<br />\r\n\t\t\...,"Let the Obsession begin. Again. This time, the...",Half-Life: Opposing Force Let the Obsession be...


## **Game search**

In [4]:
games_df[games_df['name'].str.contains('zomboid', case=False)]

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,...,negative_ratings,average_playtime,median_playtime,owners,price,appid_desc,detailed_description,about_the_game,short_description,content
1133,108600,Project Zomboid,2013-11-08,1,The Indie Stone,The Indie Stone,windows;mac;linux,0,Single-player;Multi-player;Co-op;Shared/Split ...,Indie;RPG;Simulation;Early Access,...,2606,658,701,500000-1000000,9.99,,,,,Project Zomboid


In [9]:
tfidf = TfidfVectorizer(stop_words='english', max_features=15000, ngram_range=(1,2)).fit_transform(games_df['content'])
print(tfidf.shape)

cosine_matrix = cosine_distances(tfidf)

(27075, 15000)


## **Similar games to another game**

In [10]:
print(f"top 10 juegos más parecidos a *** {games_df.iloc[1133]['name']} ***")
idxs = sorted(enumerate(cosine_matrix[1133]), key=lambda x: x[1], reverse=False)

for i, score in idxs[:10]:
    print(f"{games_df.iloc[i]['name']}: {score: .5f}")

top 10 juegos más parecidos a *** Project Zomboid ***
Project Zomboid:  0.00000
The Troma Project:  0.00000
The Solus Project:  0.00000
Project: Gorgon:  0.00000
The Apotheosis Project:  0.00000
Project Tarvotan:  0.00000
Project Highrise:  0.00000
Project Pulsation:  0.00000
Project Graviton:  0.00000
Project G:  0.00000


## **Recommending games to a person**

In [12]:
player_ids = [1223, 1334, 1445, 1556]
d = tfidf[player_ids].sum(axis=0)
d = np.asarray(d).reshape(1, -1)
d = cosine_distances(d, tfidf)[0]

In [13]:
print(f"Top 10 juegos más parecidos a *** jugador misterioso ***")
idxs = sorted(enumerate(d), key=lambda x: x[1], reverse=False)
for i, score in [x for x in idxs if x[0] not in player_ids][:10]:
    print(f"{games_df.iloc[i]['name']}: {score: .5f}")

Top 10 juegos más parecidos a *** jugador misterioso ***
Street Fighter V:  0.59221
Don Bradman Cricket 17:  0.61451
Don Bradman Cricket 17 Demo:  0.63446
Ultra Street Fighter® IV:  0.67715
Arkhangel: The House of the Seven Stars:  0.70572
TEKKEN 7:  0.71067
The Race for the White House:  0.72094
J.U.L.I.A.: Among the Stars:  0.72823
Devouring Stars:  0.72823
Into the Stars:  0.72823


## **Using a Sentence Transformer**

In [18]:
model = SentenceTransformer('all-MiniLM-L12-v2')
corpus = list(games_df['content'])

start = time.time()
embedding = model.encode(corpus, batch_size=64, show_progress_bar=True, convert_to_tensor=True)
mm = cos_sim(embedding, embedding)
mm.shape

Batches: 100%|██████████| 424/424 [00:45<00:00,  9.34it/s]


torch.Size([27075, 27075])

In [16]:
print(f"top 10 juegos más parecidos a *** {games_df.iloc[1133]['name']} ***")
x = mm[1133].sort()
idxs = sorted(zip(x.indices, x.values), key=lambda x: x[1], reverse=True)
for i, score in idxs[:10]:
    print(f"{games_df.iloc[int(i)]['name']}: {score: .5f}")

top 10 juegos más parecidos a *** Project Zomboid ***
Project Zomboid:  1.00000
ZOMBI:  0.70454
ZombVR:  0.63422
Zombotron:  0.62776
Zimbo:  0.61906
PROJECT AZRIEL:  0.61701
Zombillie:  0.60557
Zombo Buster Rising:  0.59703
Project X:  0.59168
Project Nimbus:  0.59053
