# Content-Based Recommendations System based on Genres.

In [2]:
import pandas as pd
import numpy as np

In [3]:
vg_dumm = pd.read_csv('./data/genres_dumm.csv')
vg_dumm.head()

Unnamed: 0.1,Unnamed: 0,name,summary,genre,Adventure,Arcade,Card & Board Game,Fighting,Hack and slash/Beat 'em up,Indie,...,Racing,Real Time Strategy (RTS),Role-playing (RPG),Shooter,Simulator,Sport,Strategy,Tactical,Turn-based strategy (TBS),Visual Novel
0,0,Batman: Arkham City,After the events of Batman: Arkham Asylum (200...,"[""Hack and slash/Beat 'em up"", 'Adventure']",1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,1,Brink,Brink is an immersive first-person shooter tha...,['Shooter'],0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,2,FIFA 11,The best-selling and most critically-acclaimed...,"['Simulator', 'Sport']",0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
3,3,Fable III,"Set 50 years after the events of Fable II, the...","['Role-playing (RPG)', ""Hack and slash/Beat 'e...",1,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0
4,4,Hunted: The Demon's Forge,It will take the combined efforts of two great...,"['Role-playing (RPG)', ""Hack and slash/Beat 'e...",1,0,0,0,1,0,...,0,0,1,0,0,0,0,0,0,0


In [4]:
vg_dumm.columns

Index(['Unnamed: 0', 'name', 'summary', 'genre', 'Adventure', 'Arcade',
       'Card & Board Game', 'Fighting', 'Hack and slash/Beat 'em up', 'Indie',
       'MOBA', 'Music', 'Pinball', 'Platform', 'Point-and-click', 'Puzzle',
       'Quiz/Trivia', 'Racing', 'Real Time Strategy (RTS)',
       'Role-playing (RPG)', 'Shooter', 'Simulator', 'Sport', 'Strategy',
       'Tactical', 'Turn-based strategy (TBS)', 'Visual Novel'],
      dtype='object')

In [5]:
vg_dumm.drop('Unnamed: 0', axis = 1, inplace = True)

In [6]:
vg_dumm.drop('summary', axis = 1, inplace = True)

In [7]:
vg_dumm.drop('genre', axis = 1, inplace = True)

In [8]:
vg_dumm.set_index('name', inplace = True)

In [9]:
vg_dumm.head()

Unnamed: 0_level_0,Adventure,Arcade,Card & Board Game,Fighting,Hack and slash/Beat 'em up,Indie,MOBA,Music,Pinball,Platform,...,Racing,Real Time Strategy (RTS),Role-playing (RPG),Shooter,Simulator,Sport,Strategy,Tactical,Turn-based strategy (TBS),Visual Novel
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Batman: Arkham City,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Brink,0,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,0
FIFA 11,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,0,0,0,0
Fable III,1,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
Hunted: The Demon's Forge,1,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0


In [10]:
vg_dumm.loc['Fable III', :]

Adventure                     1
Arcade                        0
Card & Board Game             0
Fighting                      0
Hack and slash/Beat 'em up    1
Indie                         0
MOBA                          0
Music                         0
Pinball                       0
Platform                      0
Point-and-click               0
Puzzle                        0
Quiz/Trivia                   0
Racing                        0
Real Time Strategy (RTS)      0
Role-playing (RPG)            1
Shooter                       0
Simulator                     0
Sport                         0
Strategy                      0
Tactical                      0
Turn-based strategy (TBS)     0
Visual Novel                  0
Name: Fable III, dtype: int64

In [11]:
vg_dumm.loc['''Hunted: The Demon's Forge''', :]

Adventure                     1
Arcade                        0
Card & Board Game             0
Fighting                      0
Hack and slash/Beat 'em up    1
Indie                         0
MOBA                          0
Music                         0
Pinball                       0
Platform                      0
Point-and-click               0
Puzzle                        0
Quiz/Trivia                   0
Racing                        0
Real Time Strategy (RTS)      0
Role-playing (RPG)            1
Shooter                       0
Simulator                     0
Sport                         0
Strategy                      0
Tactical                      0
Turn-based strategy (TBS)     0
Visual Novel                  0
Name: Hunted: The Demon's Forge, dtype: int64

In [12]:
# Import numpy and the distance metric.
from sklearn.metrics import jaccard_score

# Extract just the rows containing Fable III and Hunted.
fable_values = vg_dumm.loc['Fable III', :].values
hunted_values = vg_dumm.loc['''Hunted: The Demon's Forge'''].values

In [13]:
fable_values

array([1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0], dtype=int64)

In [14]:
# Find the similarity between Fable and Hunted.
print('Similarity between Fable and Hunted:',jaccard_score(fable_values, hunted_values))

# Repeat for Fable and Batman.
batman_values = vg_dumm.loc['Batman: Arkham City'].values
print('Similarity between Fable and Batman:',jaccard_score(fable_values, batman_values))

Similarity between Fable and Hunted: 1.0
Similarity between Fable and Batman: 0.6666666666666666


In [15]:
# Taking a sample from the dataframe to avoid memory issues.

vg_sample = vg_dumm.sample(frac=0.05, random_state=42)

In [16]:
# Import functions from scipy.
from scipy.spatial.distance import pdist, squareform

# Calculate all pairwise distances.
jaccard_distances = pdist(vg_sample.values, metric='jaccard')

# Convert the distances to a square matrix.
jaccard_similarity_array = 1 - squareform(jaccard_distances)

# Wrap the array in a pandas DataFrame.
jaccard_similarity_df = pd.DataFrame(jaccard_similarity_array, index = vg_sample.index, columns = vg_sample.index)

# Print the top 5 rows of the DataFrame.
jaccard_similarity_df.head()

name,Forests Are For Trees,Green Moon 2,HA/CK,Oakshade Acres,Gambler Densetsu Tetsuya: Yomigaeru Densetsu,FRAMED,Dance School Stories,Men in Black: The Series – Crashdown,Tiger Tank 59 Ⅰ: Break The Fog MP086,Cosmic Fantasy: Bouken Shounen Yuu,...,Overcraft,Tetris Gems,Killing Floor,Gateball VR,Disney's Bolt,Queen's Quest 3: The End of Dawn,Slavistan 2,Galaxy Wars,Forced Alliance: The Glarious Mandate,SD Gundam Gaiden: Knight Gundam Monogatari
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Forests Are For Trees,1.0,1.0,0.0,0.333333,0.0,0.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.5,0.333333,0.5,0.0,0.0,0.0
Green Moon 2,1.0,1.0,0.0,0.333333,0.0,0.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.5,0.333333,0.5,0.0,0.0,0.0
HA/CK,0.0,0.0,1.0,0.333333,0.0,0.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0
Oakshade Acres,0.333333,0.333333,0.333333,1.0,0.0,0.25,0.25,0.0,0.333333,0.0,...,0.333333,0.0,0.0,0.0,0.25,0.2,0.666667,0.0,0.333333,0.0
Gambler Densetsu Tetsuya: Yomigaeru Densetsu,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
len(jaccard_distances)

18908175

In [18]:
squareform(jaccard_distances).shape

(6150, 6150)

In [23]:
# Find the values for the videogame Disney's Bolt.
jaccard_similarity_series = jaccard_similarity_df.loc['''Disney's Bolt''']

# Sort these values from highest to lowest.
ordered_similarities = jaccard_similarity_series.sort_values(ascending = False)

# Print the results.

print('Recomendacioens para Disney Bolt')
print(ordered_similarities)

Recomendacioens para Disney Bolt
name
L'intrépide, le futur de l'imparfait          1.0
Toy Story 3: The Video Game                   1.0
Kirby: Squeak Squad                           1.0
Disney Presents: Tigger's Honey Hunt          1.0
Blaster Master Zero 2                         1.0
                                             ... 
AFTERGRINDER                                  0.0
FIFA 10                                       0.0
Brayan Odleys Numbers                         0.0
DriveCrazy                                    0.0
SD Gundam Gaiden: Knight Gundam Monogatari    0.0
Name: Disney's Bolt, Length: 6150, dtype: float64


In [21]:
# Function to get recommendations based on a game you like.

def recommend():
    game = input(str())
    jaccard_similarity_recomm = jaccard_similarity_df.loc[game]
    ordered_recomm = jaccard_similarity_recomm.sort_values(ascending = False)
    print('También podrían interesarte: ')
    return ordered_recomm[:5]

In [24]:
recommend()

 FIFA 10


También podrían interesarte: 


name
Snooker Stars                 1.0
NBA Give 'n Go                1.0
Pro Evolution Soccer 2009     1.0
Major League Baseball 2K10    1.0
Tennis                        1.0
Name: FIFA 10, dtype: float64