In [1]:
import pandas as pd
import numpy as np
import scipy

In [2]:
#load dataset

import os.path

datasetdir = os.path.join('.', 'dataset')
filename = 'games.clean.csv'
path = os.path.join(datasetdir, filename)

data = pd.read_csv(path, index_col='id')

In [3]:
for idx in data.index:
    if pd.isna(data.loc[idx, 'genres']):
        data.loc[idx, 'genres'] = str(idx)
    if pd.isna(data.loc[idx, 'keywords']):
        data.loc[idx, 'keywords'] = str(idx)
    if pd.isna(data.loc[idx, 'themes']):
        data.loc[idx, 'themes'] = str(idx)
    if pd.isna(data.loc[idx, 'player_perspectives']):
        data.loc[idx, 'player_perspectives'] = str(idx)
    if pd.isna(data.loc[idx, 'game_modes']):
        data.loc[idx, 'game_modes'] = idx



 N   Column               Non-Null Count   Dtype      Type    Points  
---  ------               --------------   -----      ----    ------
 0   rating               103536 non-null  float64     S
 1   aggregated_rating    103536 non-null  float64     S
 2   follows              103536 non-null  float64     S
 3   game_modes           103536 non-null  object      N        20
 4   genres               93346 non-null   object      N        16
 5   involved_companies   103536 non-null  object      D
 6   keywords             56122 non-null   object      N        12
 7   summary              97520 non-null   object      N        6
 8   storyline            12658 non-null   object      N        4
 9   name                 103536 non-null  object      R
 10  platforms            103536 non-null  object      S
 11  player_perspectives  46982 non-null   object      N        6
 12  themes               63848 non-null   object      N        12
 13  campaigncoop         103536 non-null  bool        N        1
 14  dropin               103536 non-null  bool        N        1
 15  lancoop              103536 non-null  bool        N        1
 16  offlinecoop          103536 non-null  bool        N        1
 17  onlinecoop           103536 non-null  bool        N        1
 18  splitscreen          103536 non-null  bool        N        1
 19  year                 103536 non-null  int64       S

In [4]:
import stopwords
stopw = stopwords.ENGLISH_STOP_WORDS

In [5]:
from sklearn.feature_extraction.text import CountVectorizer
    
vectorizer = CountVectorizer(stop_words=stopw)
keywordsbag = vectorizer.fit_transform(data['keywords'])
sum_words = keywordsbag.sum(axis=0)

words_freq = [(word, sum_words[0, idx]) for word, idx in vectorizer.vocabulary_.items()]
words_freq = sorted(words_freq, key = lambda x: x[1], reverse=True)

words_freq

[('game', 58586),
 ('play', 13569),
 ('world', 13415),
 ('player', 12947),
 ('adventure', 12234),
 ('puzzle', 11519),
 ('time', 10499),
 ('level', 9890),
 ('character', 9569),
 ('steam', 9301),
 ('new', 7920),
 ('base', 7470),
 ('shooter', 7374),
 ('protagonist', 7329),
 ('action', 7103),
 ('enemy', 7057),
 ('fight', 7049),
 ('digital', 7021),
 ('strategy', 6563),
 ('fantasy', 6519),
 ('person', 6386),
 ('distribution', 6031),
 ('combat', 5987),
 ('weapon', 5942),
 ('use', 5697),
 ('battle', 5553),
 ('gun', 5382),
 ('make', 5321),
 ('fi', 5302),
 ('3d', 5192),
 ('race', 5163),
 ('sci', 5145),
 ('art', 5085),
 ('achievement', 5049),
 ('control', 5000),
 ('jump', 4967),
 ('platformer', 4929),
 ('role', 4866),
 ('simulation', 4810),
 ('story', 4718),
 ('based', 4703),
 ('anime', 4627),
 ('real', 4590),
 ('playstation', 4581),
 ('war', 4575),
 ('experience', 4574),
 ('turn', 4422),
 ('space', 4356),
 ('point', 4355),
 ('card', 4297),
 ('screen', 4284),
 ('mode', 4280),
 ('support', 4277),


In [6]:
for e in ['game', 'steam', 'protagonist','digital',
          'distribution', 'character', 'play', 'player', 
          'new', 'world', 'use', 'level', 'gameplay']:
    stopw.add(e)

In [7]:
from sklearn.feature_extraction.text import CountVectorizer

tovectorize = ['game_modes',
               'genres',
               'keywords',
               'storyline',
               'player_perspectives',
               'themes']
def addzero(s):
    zeroedstring = []
    for a in s.split():
        zeroedstring.append('0' + a)
    return ' '.join(zeroedstring)

data['player_perspectives'] = data['player_perspectives'].apply(addzero)
data['game_modes'] = data['game_modes'].apply(str).apply(addzero)

vectorizers = {feature: CountVectorizer(stop_words=stopw) for feature in tovectorize}
bagsofwords = {feature: vectorizers[feature].fit_transform(data[feature]) for feature in tovectorize}

In [8]:
data

Unnamed: 0_level_0,rating,aggregated_rating,follows,game_modes,genres,keywords,storyline,name,platforms,player_perspectives,themes,campaigncoop,dropin,lancoop,offlinecoop,onlinecoop,splitscreen,year
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
56115,70.0,70.0,0.0,056115,31,modern warfare exploration jet first person sh...,an early 3d flight simulator game release by s...,Jet Rocket,52,01,56115,False,False,False,False,False,False,1970
11304,70.0,70.0,0.0,01,11304,western text-based text adventure,highnoon be basic game develop by christopher ...,Highnoon,82 107,05,11304,False,False,False,False,False,False,1970
76506,70.0,70.0,0.0,01,14,game life life british game evolution state re...,the game of life also know simply a life be ce...,Game of Life,6 69 91 103,07,34 35,False,False,False,False,False,False,1970
11485,50.0,50.0,0.0,01,11485,pinball spaceship sci-fi shooter discrete logic,star trek be text-based computer game that put...,Star Trek,104 106,05,39,False,False,False,False,False,False,1971
11396,70.0,70.0,3.0,02,11396,spaceship sci-fi shooter record breaker,galaxy game be one of the early know coin-oper...,Galaxy Game,108,03,11396,False,False,False,False,False,False,1971
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217623,70.0,70.0,0.0,01,12 25 31,like dragon yakuza series feature dual protago...,like dragon be the next installment in the yak...,Like a Dragon 8,6 48 49 167 169,02,1 31,False,False,False,False,False,False,2024
148057,70.0,70.0,0.0,01,2 9 31 32 34,indie retro point and click point and click ad...,pandorex nobody really know what be up with it...,Pandorex,6,04,17 27 43,False,False,False,False,False,False,2025
214912,70.0,70.0,0.0,01,12,blood alternative history horror jrpg spiritua...,penny blood be alternative history horror jrpg...,Penny Blood,6 167 169,02,17 19 22 43,False,False,False,False,False,False,2025
83563,70.0,70.0,0.0,01 02,5 32,fps space 6dof,neptunegl be fast-paced three dimensional six ...,NeptuneGL,6,01,1,False,False,False,False,False,False,2030


In [9]:
tosparse = ['campaigncoop',
            'dropin',
            'lancoop',
            'offlinecoop',
            'onlinecoop',
            'splitscreen']

bagsofwords['multiplayermodes'] = scipy.sparse.csr_matrix(data[tosparse].astype(np.int64).values)

In [10]:
weights = {'game_modes': 10,
           'genres': 8,
           'keywords':7,
           'storyline':5,
           'player_perspectives':9,
           'themes':6,
           'multiplayermodes':1}

for i in bagsofwords:
    bagsofwords[i] = bagsofwords[i].multiply(weights[i])

X = scipy.sparse.hstack(list(bagsofwords.values()))

In [11]:
from sklearn.neighbors import NearestNeighbors

model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(X)

In [13]:
gameid = 19164
print(data.loc[gameid])
distances, indices = model_knn.kneighbors(X.getrow(data.index.get_loc(gameid)), n_neighbors = 100)
result = data.iloc[indices[0]][(data.iloc[indices[0]]['platforms'] == '6')]
result

rating                                                              70.0
aggregated_rating                                                   70.0
follows                                                            138.0
game_modes                                                      01 02 03
genres                                                                 5
keywords                                                   unreal engine
storyline              the original shooter-looter return pack bazill...
name                                                       Borderlands 3
platforms                                         6 14 48 49 167 169 170
player_perspectives                                                   01
themes                                                        1 17 27 38
campaigncoop                                                        True
dropin                                                              True
lancoop                                            

Unnamed: 0_level_0,rating,aggregated_rating,follows,game_modes,genres,keywords,storyline,name,platforms,player_perspectives,themes,campaigncoop,dropin,lancoop,offlinecoop,onlinecoop,splitscreen,year
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
172936,70.0,70.0,0.0,01 02 03,31 32,base building,undarkened be wizarding survival-adventure gam...,Undarkened: Revival,6,01,21,True,False,False,False,True,False,2021
8542,70.0,70.0,0.0,01 02,5,paintball,paintball game,Extreme Paintbrawl 4,6,01,8542,False,False,False,False,False,False,2002
25446,70.0,70.0,1.0,01 02,5,paintball,action game,Paintball Heroes,6,01,28,False,False,False,False,False,False,2001
146297,70.0,70.0,0.0,01 02,5,multiplayer,146297,Urban Mercenary,6,01,146297,False,False,False,False,False,False,2000
144823,70.0,70.0,0.0,01 02 03,5 32,retro procedural generation futuristic 6dof,desecrators be degree of freedom shooter where...,Desecrators,6,01,1 18,False,False,False,False,True,False,2021
142991,70.0,70.0,0.0,01 02 03,5 32,classic dynamic shooter science underground ce...,classic dynamic shooter which take place in sc...,Object N,6,01,1,True,True,False,False,True,False,2020
127584,70.0,70.0,0.0,01 02 03,32,casual shoot em up,if you enjoy absolute chaos blast enemy to pie...,Fight High,6,02 03,1 21,False,False,False,False,False,False,2020
12393,70.0,70.0,0.0,01 02,15,chess,12393,Arcade Chess,6,01,12393,False,False,False,False,False,False,2004
201359,70.0,70.0,0.0,01 02 03,32,ghost phasmophobia,this is ghost be horror game in which you embo...,This is a Ghost,6,01,1 19 21,False,False,False,False,False,False,2022
163904,70.0,70.0,0.0,01 02 03,5 32,hunt friend map weapon kill monster,hunt the chupacabras alone or with your friend...,Chupacabras: Night Hunt,6,01,1 19,False,False,False,False,False,False,2021
