## Importing libraries

In [1]:
import pandas as pd

## Reading csv file

In [2]:
metacritic = pd.read_csv("./dataset/metacritic_Toppc_games.csv")

In [3]:
metacritic.head()

Unnamed: 0,Name,Release_Date,Rating,Description,Score
0,Disco Elysium: The Final Cut,"Mar 30, 2021",M,Disco Elysium - The Final Cut is the definitiv...,97 Metascore
1,Half-Life 2,"Nov 16, 2004",M,[Metacritic's 2004 PC Game of the Year] By ta...,96 Metascore
2,Grand Theft Auto V,"Apr 13, 2015",M,"Los Santos is a vast, sun-soaked metropolis fu...",96 Metascore
3,Baldur's Gate 3,"Aug 3, 2023",M,"An ancient evil has returned to Baldur's Gate,...",96 Metascore
4,The Orange Box,"Oct 10, 2007",M,Games included in The Orange Box compilation: ...,96 Metascore


## Function to set all the column values to lowercase

In [4]:
def lowercaseColumns(df):
    for i in df.columns:
        if df[i].dtype == "object":
            df[i] = df[i].str.lower()
    return df

In [5]:
metacritic.columns = [i.lower() for i in metacritic.columns]
metacritic = lowercaseColumns(metacritic)

In [6]:
metacritic.shape

(6294, 5)

## Droping unnecesary features

In [7]:
metacritic = metacritic.drop(columns=["release_date", "rating", "score"])

metacritic

## Checking null values and droping them

In [8]:
metacritic.isnull().sum()

name           0
description    6
dtype: int64

In [9]:
metacritic = metacritic.dropna(subset=["description"])

In [10]:
metacritic.isnull().sum()

name           0
description    0
dtype: int64

## Preprocessing text using NLTK, applying stopword removal, lemmatization

In [11]:
import re
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

In [12]:
lemma = WordNetLemmatizer()

In [13]:
corpus = []
for description in metacritic["description"]:
    description = re.sub("[^a-zA-Z]", " ", description)
    description = description.split()
    description = [lemma.lemmatize(i) for i in description if i not in stopwords.words("english")]
    description = " ".join(description)
    corpus.append(description)

In [14]:
corpus

['disco elysium final cut definitive edition smash hit rpg pursue political dream new quest meet question city local explore whole extra area full voice acting controller support expanded language option also included get even award winning open world detective unique skill system disposal whole city block carve path across interrogate unforgettable character crack murder take bribe become hero absolute disaster human',
 'metacritic pc game year taking suspense challenge visceral charge original adding startling new realism responsiveness half life open door world player presence affect everything around physical environment behavior even emotion friend enemy player pick crowbar research scientist gordon freeman find alien infested earth picked bone resource depleted populace dwindling freeman thrust unenviable role rescuing world wrong unleashed back black mesa lot people people care counting vivendi universal',
 'los santos vast sun soaked metropolis full self help guru starlet impor

## Creating token vector

In [15]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer()
x = cv.fit_transform(corpus).toarray()

In [16]:
x

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [17]:
x.shape

(6288, 23389)

## sample text for recommentation

In [18]:
sample = "game about mountains and castle"
# sample = "first person shooter with powerups and aliens"

## reapplying NLTK preprocessing steps

In [19]:
sample = re.sub("[^a-zA-Z]", " ", sample)
sample = sample.split()
sample = [lemma.lemmatize(i) for i in sample if i not in stopwords.words("english")]
sample = " ".join(sample)
sample = [sample]

sample

['game mountain castle']

In [20]:
y = cv.transform(sample).toarray()
y

array([[0, 0, 0, ..., 0, 0, 0]])

## Recommending games based on cosine similarity


In [21]:
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
cosine_similarity(x, y)

array([[0.        ],
       [0.06666667],
       [0.        ],
       ...,
       [0.13423121],
       [0.05292561],
       [0.        ]])

In [23]:
recommendation = metacritic.copy()
recommendation["recommendation"] = cosine_similarity(x, y)

## Displaying top 5 recommendations

In [24]:
recommendation.sort_values(by=["recommendation"], ascending=False).head()

Unnamed: 0,name,description,recommendation
3909,kingdoms and castles,kingdoms and castles is a game about growing a...,0.447214
3483,the eternal castle remastered,the eternal castle [remastered] is a remake of...,0.428845
4564,game dev tycoon,game dev tycoon is a business simulation game ...,0.392837
3010,dark fall 3: lost souls,the third game in the dark fall adventure game...,0.3849
5595,outlive (2001),a 2d battlefield game set during a 21st centur...,0.3849
