# Steam Recommender System

Julia Tsaltas / Aug 2021

Data from Kaggle: https://www.kaggle.com/tamber/steam-video-games

## Part 2: Recommender Based on Play Time Comparison

In [13]:
import pandas as pd
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity

In [3]:
steam = pd.read_csv('datasets/steam_clean.csv')

In [4]:
steam.head()

Unnamed: 0,user,game,purchase,play_time
0,151603712,Fallout 4,1.0,87.0
1,151603712,Spore,1.0,14.9
2,151603712,Fallout New Vegas,1.0,12.1
3,151603712,Left 4 Dead 2,1.0,8.9
4,151603712,HuniePop,1.0,8.5


In [8]:
# Check unique number of users
len(steam['user'].unique())

11350

In [32]:
# How many games
len(steam['game'].unique())

3600

### Make a pivot table showing play time for each game for each player

In [49]:
pivot = pd.pivot_table(steam, index='game', columns='user', values='play_time')

In [50]:
pivot

user,5250,76767,86540,144736,181212,229911,298950,381543,547685,554278,...,309228590,309255941,309262440,309265377,309404240,309434439,309554670,309626088,309824202,309903146
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 Legends,,,,,,,,,,,...,,,,,,,,,,
0RBITALIS,,,,,,,,,,,...,,,,,,,,,,
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),,,,,,,,,,,...,,,,,,,,,,
10 Second Ninja,,,,,,,,,,,...,,,,,,,,,,
10000000,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rymdkapsel,,,,,,,,,,,...,,,,,,,,,,
sZone-Online,,,,,,,,,,,...,,,,,,,,,,
the static speaks my name,,,,,,,,,,,...,,,,,,,,,,
theHunter,,,,,,,,,0.2,,...,,,,,,,,,,


PSA: Please, drop beats, not ugly babies.

### Create a Sparse Matrix

In [51]:
sparse_pivot = sparse.csr_matrix(pivot.fillna(0))

### Calculate Cosine Similarity

In [52]:
recommender = cosine_similarity(sparse_pivot)

### Create DataFrame for Readability

In [53]:
recommender_df = pd.DataFrame(recommender, columns=pivot.index, index=pivot.index)

recommender_df

game,007 Legends,0RBITALIS,1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),10 Second Ninja,"10,000,000",100% Orange Juice,1000 Amps,12 Labours of Hercules,12 Labours of Hercules II The Cretan Bull,12 Labours of Hercules III Girl Power,...,rFactor,rFactor 2,realMyst,realMyst Masterpiece Edition,resident evil 4 / biohazard 4,rymdkapsel,sZone-Online,the static speaks my name,theHunter,theHunter Primal
game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 Legends,1.0,0.000000,0.000000,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
0RBITALIS,0.0,1.000000,0.000000,0.07528,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.003915,0.000000
1... 2... 3... KICK IT! (Drop That Beat Like an Ugly Baby),0.0,0.000000,1.000000,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.001579,0.000000,0.000000,0.000000
10 Second Ninja,0.0,0.075280,0.000000,1.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
10000000,0.0,0.000000,0.000000,0.00000,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
rymdkapsel,0.0,0.000000,0.000000,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,1.0,0.000000,0.000000,0.000000,0.000000
sZone-Online,0.0,0.000000,0.001579,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,1.000000,0.039860,0.041457,0.000000
the static speaks my name,0.0,0.000000,0.000000,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000000,0.0,0.039860,1.000000,0.002558,0.000000
theHunter,0.0,0.003915,0.000000,0.00000,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.000087,0.0,0.041457,0.002558,1.000000,0.029732


### Search for your fave Games. There are 3600 to choose from!

In [56]:
search = "Thomas"
for title in steam.loc[steam['game'].str.contains(search), 'game']:
    print(title)
    print(f"Average Play Time (Hrs): {round(pivot.loc[title, :].mean(), 2)}")
    print(f"Number of Players: {pivot.T[title].count()}")
    print('')
    print("10 closest titles:")
    print(round(recommender_df[title].sort_values(ascending=False)[1:11], 2))
    print('-----------------------------------------------')
    print('')

Thomas Was Alone
Average Play Time (Hrs): 2.24
Number of Players: 27

10 closest titles:
game
Portal 2 - The Final Hours                   0.52
Oozi Earth Adventure                         0.52
Cloud Chamber                                0.52
Return to Mysterious Island 2                0.52
Critical Mass                                0.52
Mission Control NanoMech                     0.52
Fester Mudd Curse of the Gold - Episode 1    0.52
GEARCRACK Arena                              0.52
Defense Grid 2 A Matter of Endurance         0.52
Dreamfall Chapters                           0.49
Name: Thomas Was Alone, dtype: float64
-----------------------------------------------

Thomas Was Alone
Average Play Time (Hrs): 2.24
Number of Players: 27

10 closest titles:
game
Portal 2 - The Final Hours                   0.52
Oozi Earth Adventure                         0.52
Cloud Chamber                                0.52
Return to Mysterious Island 2                0.52
Critical Mass           