# Content-Based Recommendations System based on Plots.

In [227]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [228]:
vg_plots = pd.read_csv('./data/vg_plots.csv')

In [229]:
vg_plots.head()

Unnamed: 0.1,Unnamed: 0,name,summary
0,0,Batman: Arkham City,After the events of Batman: Arkham Asylum (200...
1,1,Brink,Brink is an immersive first-person shooter tha...
2,2,FIFA 11,The best-selling and most critically-acclaimed...
3,3,Fable III,"Set 50 years after the events of Fable II, the..."
4,4,Hunted: The Demon's Forge,It will take the combined efforts of two great...


In [230]:
vg_plots.drop(columns = ['Unnamed: 0'], inplace = True)

In [231]:
vg_plots.isna().sum()

name       0
summary    0
dtype: int64

In [232]:
# Creating a smaller sample.
vg_sample = vg_plots.sample(frac=0.1, random_state=42)

In [233]:
vg_sample.shape

(12301, 2)

In [234]:
# Instantiate the vectorizer object to the vectorizer variable.
vectorizer = TfidfVectorizer(min_df = 2, max_df = 7)

# Fit and transform the plot column.
vectorized_data = vectorizer.fit_transform(vg_sample['summary'])

# Look at the features generated.
print(vectorizer.get_feature_names())



In [235]:
vectorized_data.shape

(12301, 9751)

In [236]:
# Create Dataframe from TF-IDFarray. The tfidf_df DataFrame contains the videogames and their TF-IDF features.
tfidf_df = pd.DataFrame(vectorized_data.toarray(), columns = vectorizer.get_feature_names())

# Assign the videogames titles to the index and inspect.
tfidf_df.index = vg_sample['name']
tfidf_df.head()

Unnamed: 0_level_0,00,001,002,007,010,011,012,014,016,02,...,ведь,не,от,происходит,то,что,этом,おんたま,おんぷ島へん,これは
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Forests Are For Trees,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Green Moon 2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HA/CK,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Oakshade Acres,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gambler Densetsu Tetsuya: Yomigaeru Densetsu,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [237]:
# Create the array of cosine similarity values.
cosine_similarity_array = cosine_similarity(tfidf_df)

# Wrap the array in a pandas DataFrame.
cosine_similarity_df = pd.DataFrame(cosine_similarity_array, index=tfidf_df.index, columns=tfidf_df.index)

cosine_similarity_df.head()

name,Forests Are For Trees,Green Moon 2,HA/CK,Oakshade Acres,Gambler Densetsu Tetsuya: Yomigaeru Densetsu,FRAMED,Dance School Stories,Men in Black: The Series – Crashdown,Tiger Tank 59 Ⅰ: Break The Fog MP086,Cosmic Fantasy: Bouken Shounen Yuu,...,BUCKLER 2,ArcBall 3: Infinity,Rebellion Again,Hill Quest,Psycho Killer,Life Is Strange: Before the Storm - Episode 2: Brave New World,Catch the Bowling Balls,Dungeons Again,Girls Dance,SnowNight
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Forests Are For Trees,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Green Moon 2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
HA/CK,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Oakshade Acres,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Gambler Densetsu Tetsuya: Yomigaeru Densetsu,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [238]:
# Find the values for the game Life is Strange 2.
cosine_similarity_series = cosine_similarity_df.loc['Life Is Strange: Before the Storm - Episode 2: Brave New World']

# Sort these values highest to lowest.
ordered_similarities = cosine_similarity_series.sort_values(ascending=False)

# Print the results.
print('Recomendaciones para Life is Strange 2')
print(ordered_similarities)

Recomendaciones para Life is Strange 2
name
Life Is Strange: Before the Storm - Episode 2: Brave New World    1.000000
Life is Strange Remastered                                        1.000000
Life is Strange: Before the Storm - Episode 1: Awake              0.580789
Size DOES Matter                                                  0.282756
The Witcher 2: Assassins Of Kings - Dark Edition                  0.113282
                                                                    ...   
Hellraid: The Escape                                              0.000000
Barbaros                                                          0.000000
Hei                                                               0.000000
Super Bear Adventure                                              0.000000
SnowNight                                                         0.000000
Name: Life Is Strange: Before the Storm - Episode 2: Brave New World, Length: 12301, dtype: float64


In [239]:
# Function to get recommendations from a short description (user's input).

add = (len(vg_sample)) + 1
vg_samples = vg_sample.copy()
def recommend():
    wish_game = input(str)
    vg_samples.loc[add] = ('Wished game by user', wish_game)
    vectorizerr = TfidfVectorizer(min_df = 2, max_df = 7)
    vectorized_dataa = vectorizerr.fit_transform(vg_samples['summary'])
    tfidf_dff = pd.DataFrame(vectorized_dataa.toarray(), columns = vectorizerr.get_feature_names())
    tfidf_dff.index = vg_samples['name']
    cosine_similarity_arrayy = cosine_similarity(tfidf_dff)
    cosine_similarity_dff = pd.DataFrame(cosine_similarity_arrayy, index=tfidf_dff.index, columns=tfidf_dff.index)
    cosine_similarity_seriess = cosine_similarity_dff.loc['Wished game by user']
    ordered_recomm = cosine_similarity_seriess.sort_values(ascending=False)
    print('You could enjoy this games: ')
    return ordered_recomm[:5]

In [241]:
recommend()

<class 'str'> DANI NO ME SUSPENDAS POR FAVOR


You could enjoy this games: 


name
Wished game by user       1.000000
Charlie Chaplin           0.310431
Little Computer People    0.241792
Jet Set Willy             0.221071
Milkyway Funland          0.000000
Name: Wished game by user, dtype: float64