### Import Libraries

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### Loading Data

In [14]:
# Get games data from CSV
dataGames = pd.read_csv('data/cleaned/processed_games_for_content-based.csv')

In [3]:
# Creating the dataframe 
cursor = mycol.find()
dataGames =  pd.DataFrame(list(cursor))
del dataGames['_id']
dataGames.head(5)

Unnamed: 0,name,developer,publisher,popular_tags,game_details,genre,original_price,discount_price,review_qualification,percentage_positive_review,ID,genre_publisher_developer
0,DOOM,idSoftware,"BethesdaSoftworks,BethesdaSoftworks","FPS,Gore,Action,Demons,Shooter,First-Person,Gr...","Single-player,Multi-player,Co-op,SteamAchievem...",Action,$19.99,$14.99,Very Positive,92,doom,"Action,idSoftware,BethesdaSoftworks"
1,PLAYERUNKNOWN'S BATTLEGROUNDS,PUBGCorporation,"PUBGCorporation,PUBGCorporation","Survival,Shooter,Multiplayer,BattleRoyale,PvP,...","Multi-player,OnlineMulti-Player,Stats","Action,Adventure,MassivelyMultiplayer",$29.99,,Mixed,49,playerunknownsbattlegrounds,"Action,Adventure,MassivelyMultiplayer,PUBGCorp..."
2,BATTLETECH,HarebrainedSchemes,"ParadoxInteractive,ParadoxInteractive","Mechs,Strategy,Turn-Based,Turn-BasedTactics,Sc...","Single-player,Multi-player,OnlineMulti-Player,...","Action,Adventure,Strategy",$39.99,,Mostly Positive,71,battletech,"Adventure,Action,ParadoxInteractive,Strategy,H..."
3,DayZ,BohemiaInteractive,"BohemiaInteractive,BohemiaInteractive","Survival,Zombies,OpenWorld,Multiplayer,PvP,Mas...","Multi-player,OnlineMulti-Player,SteamWorkshop,...","Action,Adventure,MassivelyMultiplayer",$44.99,,Mixed,61,dayz,"Action,Adventure,BohemiaInteractive,MassivelyM..."
4,EVE Online,CCP,"CCP,CCP","Space,MassivelyMultiplayer,Sci-fi,Sandbox,MMOR...","Multi-player,OnlineMulti-Player,MMO,Co-op,Onli...","Action,FreetoPlay,MassivelyMultiplayer,RPG,Str...",Free,,Mostly Positive,74,eveonline,"Action,MassivelyMultiplayer,CCP,RPG,Strategy,F..."


In [4]:
dataGames.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40833 entries, 0 to 40832
Data columns (total 12 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   name                        40833 non-null  object
 1   developer                   40833 non-null  object
 2   publisher                   40833 non-null  object
 3   popular_tags                40833 non-null  object
 4   game_details                40833 non-null  object
 5   genre                       40833 non-null  object
 6   original_price              35522 non-null  object
 7   discount_price              14543 non-null  object
 8   review_qualification        17363 non-null  object
 9   percentage_positive_review  40833 non-null  int64 
 10  ID                          40833 non-null  object
 11  genre_publisher_developer   40833 non-null  object
dtypes: int64(1), object(11)
memory usage: 3.7+ MB


In [5]:
# Construct a reverse map of indices and game names
indices = pd.Series(dataGames.index, index=dataGames['name']).drop_duplicates()

In [6]:
# get list of games we have info about
listGames = dataGames['name'].unique()
len(listGames)

40750

### Content based Recommendation

In [7]:
# create dataframe for recommendations
n_recommendation = 20
col_names = list(map(str, range(1, n_recommendation + 1)))
col_names = ["user_id"] + col_names

In [8]:
# Function that takes in game name and Cosine Similarity matrix as input and outputs most similar games
def get_recommendations(title, cosine_sim):

    if title not in listGames:
        return []

    # Get the index of the game that matches the name
    idx = indices[title]

    # if there's 2 games or more with same name (game RUSH)
    if type(idx) is pd.Series:
        return []

    # Get the pairwise similarity scores of all games with that game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the most similar games
    # (not the first one because this games as a score of 1 (perfect score) similarity with itself)
    sim_scores = sim_scores[1:n_recommendation + 1]

    # Get the games indices
    game_indices = [i[0] for i in sim_scores]

    # Return the top most similar games
    return dataGames['name'].iloc[game_indices].tolist()

In [9]:
def generate_recommendation_output(column_name):

    # need to do some modification on data to make sure there is no NaN in column
    dataGames[column_name] = dataGames[column_name].fillna('')
    # Compute the Cosine Similarity matrix using the column
    count = CountVectorizer(stop_words='english')
    count_matrix = count.fit_transform(dataGames[column_name])
    cosine_sim_matrix = cosine_similarity(count_matrix, count_matrix)

    return cosine_sim_matrix

In [10]:
similarity_matrix = generate_recommendation_output('genre_publisher_developer')

In [22]:
import numpy as np
np.shape(similarity_matrix)

(40833, 40833)

In [None]:
from bson.binary import Binary
with open('test.npy', 'wb') as f:
    np.save(f, similarity_matrix)

In [None]:
# mycol = mydb["recommendation_matrix"]
# record['name'] = "Steam Content Based"

### Similar games

#### Example 1

In [21]:
recommendations = get_recommendations('Dota 2', similarity_matrix)

In [22]:
dataGames[dataGames['name'] == 'Dota 2']

Unnamed: 0,name,developer,publisher,popular_tags,game_details,genre,original_price,discount_price,review_qualification,percentage_positive_review,ID,genre_publisher_developer
419,Dota 2,Valve,"Valve,Valve","FreetoPlay,MOBA,Multiplayer,Strategy,e-sports,...","Multi-player,Co-op,SteamTradingCards,SteamWork...","Action,FreetoPlay,Strategy",Free to Play,$65.70,Very Positive,85,dota2,"Action,FreetoPlay,Strategy,Valve"


In [23]:
dataGames[dataGames['name'].isin(recommendations)]

Unnamed: 0,name,developer,publisher,popular_tags,game_details,genre,original_price,discount_price,review_qualification,percentage_positive_review,ID,genre_publisher_developer
26,Portal,Valve,"Valve,Valve","Puzzle,First-Person,Singleplayer,Sci-fi,Comedy...","Single-player,SteamAchievements,Captionsavaila...",Action,$9.99,$14.98,Overwhelmingly Positive,97,portal,"Action,Valve"
42,Counter-Strike: Source,Valve,"Valve,Valve","Shooter,Action,FPS,Multiplayer,Team-Based,Firs...","Multi-player,Cross-PlatformMultiplayer,SteamAc...",Action,$9.99,$65.70,Overwhelmingly Positive,95,counterstrikesource,"Action,Valve"
271,Left 4 Dead 2,Valve,"Valve,Valve","Zombies,Co-op,FPS,Multiplayer,Shooter,Action,O...","Single-player,Multi-player,Co-op,SteamAchievem...",Action,$9.99,$14.98,Overwhelmingly Positive,96,left4dead2,"Action,Valve"
386,Team Fortress 2,Valve,"Valve,Valve","FreetoPlay,Multiplayer,FPS,Shooter,Action,Clas...","Multi-player,Cross-PlatformMultiplayer,SteamAc...","Action,FreetoPlay",Free to Play,$65.70,Very Positive,93,teamfortress2,"Action,FreetoPlay,Valve"
422,Counter-Strike: Global Offensive,"Valve,HiddenPathEntertainment","Valve,Valve","FPS,Shooter,Multiplayer,Competitive,Action,Tea...","Multi-player,SteamAchievements,Fullcontrollers...","Action,FreetoPlay",Free to Play,$65.70,Very Positive,86,counterstrikeglobaloffensive,"Action,FreetoPlay,HiddenPathEntertainment,Valve"
593,Half-Life 2,Valve,"Valve,Valve","FPS,Action,Sci-fi,Classic,Singleplayer,Masterp...","Single-player,SteamAchievements,SteamTradingCa...",Action,$9.99,$65.70,Overwhelmingly Positive,96,halflife2,"Action,Valve"
596,Half-Life,Valve,"Valve,Valve","FPS,Classic,Action,Sci-fi,Singleplayer,Masterp...","Single-player,Multi-player,OnlineMulti-Player,...",Action,$9.99,$65.70,Overwhelmingly Positive,95,halflife,"Action,Valve"
767,Left 4 Dead Bundle,Valve,Valve,,"Single-player,Multi-player,Co-op,SteamAchievem...",Action,,$14.98,,-1,left4deadbundle,"Action,Valve"
990,Bloons TD Battles,NinjaKiwi,"NinjaKiwi,NinjaKiwi","FreetoPlay,TowerDefense,Multiplayer,Strategy,A...","Cross-PlatformMultiplayer,SteamAchievements,St...","Action,FreetoPlay,Strategy",Free to Play,,Very Positive,84,bloonstdbattles,"Action,NinjaKiwi,FreetoPlay,Strategy"
1208,The Orange Box,Valve,Valve,,"Single-player,Multi-player,Cross-PlatformMulti...","Action,FreetoPlay",$19.99,$9.99,,-1,theorangebox,"Action,FreetoPlay,Valve"


#### Example 2

In [24]:
recommendations = get_recommendations('Starbound', similarity_matrix)

In [25]:
dataGames[dataGames['name'] == 'Starbound']

Unnamed: 0,name,developer,publisher,popular_tags,game_details,genre,original_price,discount_price,review_qualification,percentage_positive_review,ID,genre_publisher_developer
303,Starbound,Chucklefish,"Chucklefish,Chucklefish","Sandbox,Survival,Adventure,Space,Crafting,Indi...","Single-player,Multi-player,OnlineMulti-Player,...","Action,Adventure,Casual,Indie,RPG",$14.99,$79.84,Very Positive,90,starbound,"Adventure,Action,Chucklefish,Indie,RPG,Casual"


In [26]:
dataGames[dataGames['name'].isin(recommendations)]

Unnamed: 0,name,developer,publisher,popular_tags,game_details,genre,original_price,discount_price,review_qualification,percentage_positive_review,ID,genre_publisher_developer
73,Castle Crashers®,TheBehemoth,"TheBehemoth,TheBehemoth","Co-op,Action,Multiplayer,Adventure,LocalCo-Op,...","Single-player,Multi-player,OnlineMulti-Player,...","Action,Adventure,Casual,Indie,RPG",$14.99,$29.56,Overwhelmingly Positive,96,castlecrashers,"Adventure,Action,Indie,RPG,Casual,TheBehemoth"
964,Bladed Fury,NEXTStudios,"NEXTStudios,NEXTStudios","Action,FemaleProtagonist,Indie,Violent,Adventu...","Single-player,SteamAchievements,Fullcontroller...","Action,Adventure,Casual,Indie,RPG",$9.99,$6.99,Very Positive,84,bladedfury,"Adventure,Action,Indie,RPG,NEXTStudios,Casual"
1161,Timespinner,LunarRayGames,"Chucklefish,Chucklefish","Metroidvania,Indie,RPG,Adventure,Action,Female...","Single-player,LocalCo-op,SteamAchievements,Ful...","Action,Adventure,Indie,RPG",$19.99,$79.84,Very Positive,85,timespinner,"Adventure,Action,Chucklefish,Indie,RPG,LunarRa..."
2308,Book of Demons,ThingTrunk,"ThingTrunk,ThingTrunk","DungeonCrawler,HackandSlash,Singleplayer,RPG,I...","Single-player,SteamAchievements,Fullcontroller...","Action,Adventure,Casual,Indie,RPG",$24.99,$70.28,Very Positive,92,bookofdemons,"Adventure,ThingTrunk,Action,Indie,RPG,Casual"
3432,Ages of Mages: The last keeper,YFCgames,"YFCgames,YFCgames","RPG,Indie,Action,Adventure,Casual,LocalCo-Op,L...","Single-player,LocalMulti-Player,LocalCo-op,Sha...","Action,Adventure,Casual,Indie,RPG",$11.99,,Very Positive,81,agesofmagesthelastkeeper,"YFCgames,Adventure,Action,Indie,RPG,Casual"
4016,Ara Fell,StegosoftGames,"StegosoftGames,StegosoftGames","RPG,RPGMaker,Indie,Adventure,JRPG,FemaleProtag...","Single-player,Fullcontrollersupport,SteamTradi...","Action,Adventure,Casual,Indie,RPG",$9.99,,Very Positive,95,arafell,"StegosoftGames,Adventure,Action,Indie,RPG,Casual"
4457,Dabman: When the Haters Dab Back,BmcStudio,"BmcStudio,BmcStudio","Casual,Action,Adventure,PsychologicalHorror,Me...","Single-player,ProfileFeaturesLimited \r\n\t\t\...","Action,Adventure,Casual,Indie,RPG",$36.87,$32.56,Very Positive,92,dabmanwhenthehatersdabback,"Adventure,Action,Indie,RPG,Casual,BmcStudio"
4481,Chucklefish & Friends Bundle,Robotality,Chucklefish,,"Single-player,Multi-player,OnlineMulti-Player,...","Action,Indie,RPG,Strategy,Adventure,Casual",,$79.84,,-1,chucklefishfriendsbundle,"Adventure,Action,Chucklefish,Indie,RPG,Robotal..."
5886,Loot Hero DX,VaragtP,"VaragtP,VaragtP","Casual,Indie,RPG,Action,Adventure,Clicker,Pixe...","Single-player,SteamAchievements,Fullcontroller...","Action,Adventure,Casual,Indie,RPG",$2.99,$5.70,Mixed,68,lootherodx,"Adventure,Action,VaragtP,Indie,RPG,Casual"
5951,XGen Studios Bundle,XGenStudios,XGenStudios,,"Single-player,Shared/SplitScreen,SteamAchievem...","Adventure,Casual,Indie,RPG,Action",,$54.96,,-1,xgenstudiosbundle,"Adventure,XGenStudios,Action,Indie,RPG,Casual"
