In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#Importing Libraries
import numpy as np
import pandas as pd
import difflib #for close text match
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.stem.porter import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
#Reading Data
game = pd.read_csv("/content/drive/MyDrive/ML/GameRecom/games.csv")
gen = pd.read_csv("/content/drive/MyDrive/ML/GameRecom/genre.csv")

In [4]:
print(game.shape)
game.head()

(21236, 13)


Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,price_original,discount,steam_deck
0,10090,Call of Duty: World at War,2008-11-18,True,False,False,Very Positive,92,37039,19.99,19.99,0.0,True
1,13500,Prince of Persia: Warrior Within™,2008-11-21,True,False,False,Very Positive,84,2199,9.99,9.99,0.0,True
2,22364,BRINK: Agents of Change,2011-08-03,True,False,False,Positive,85,21,2.99,2.99,0.0,True
3,113020,Monaco: What's Yours Is Mine,2013-04-24,True,True,True,Very Positive,92,3722,14.99,14.99,0.0,True
4,226560,Escape Dead Island,2014-11-18,True,False,False,Mixed,61,873,14.99,14.99,0.0,True


In [5]:
print(gen.shape)
gen.head()

(21236, 3)


Unnamed: 0,app_id,description,tags
0,10090,"Call of Duty is back, redefining war like you'...",Zombies\nWorld War II\nFPS\nMultiplayer\nActio...
1,13500,Enter the dark underworld of Prince of Persia ...,Action\nAdventure\nParkour\nThird Person\nGrea...
2,22364,,Action
3,113020,Monaco: What's Yours Is Mine is a single playe...,Co-op\nStealth\nIndie\nHeist\nLocal Co-Op\nStr...
4,226560,Escape Dead Island is a Survival-Mystery adven...,Zombies\nAdventure\nSurvival\nAction\nThird Pe...


In [6]:
#Merging both datasets as one 
df = game.merge(gen,on="app_id")
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21236 entries, 0 to 21235
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   app_id          21236 non-null  int64  
 1   title           21236 non-null  object 
 2   date_release    21236 non-null  object 
 3   win             21236 non-null  bool   
 4   mac             21236 non-null  bool   
 5   linux           21236 non-null  bool   
 6   rating          21236 non-null  object 
 7   positive_ratio  21236 non-null  int64  
 8   user_reviews    21236 non-null  int64  
 9   price_final     21236 non-null  float64
 10  price_original  21236 non-null  float64
 11  discount        21236 non-null  float64
 12  steam_deck      21236 non-null  bool   
 13  description     15907 non-null  object 
 14  tags            21136 non-null  object 
dtypes: bool(4), float64(3), int64(3), object(5)
memory usage: 2.0+ MB


In [7]:
df = df[["app_id",'title','win','mac','linux','description','tags']]
df.head()

Unnamed: 0,app_id,title,win,mac,linux,description,tags
0,10090,Call of Duty: World at War,True,False,False,"Call of Duty is back, redefining war like you'...",Zombies\nWorld War II\nFPS\nMultiplayer\nActio...
1,13500,Prince of Persia: Warrior Within™,True,False,False,Enter the dark underworld of Prince of Persia ...,Action\nAdventure\nParkour\nThird Person\nGrea...
2,22364,BRINK: Agents of Change,True,False,False,,Action
3,113020,Monaco: What's Yours Is Mine,True,True,True,Monaco: What's Yours Is Mine is a single playe...,Co-op\nStealth\nIndie\nHeist\nLocal Co-Op\nStr...
4,226560,Escape Dead Island,True,False,False,Escape Dead Island is a Survival-Mystery adven...,Zombies\nAdventure\nSurvival\nAction\nThird Pe...


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21236 entries, 0 to 21235
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   app_id       21236 non-null  int64 
 1   title        21236 non-null  object
 2   win          21236 non-null  bool  
 3   mac          21236 non-null  bool  
 4   linux        21236 non-null  bool  
 5   description  15907 non-null  object
 6   tags         21136 non-null  object
dtypes: bool(3), int64(1), object(3)
memory usage: 891.7+ KB


In [9]:
df.dropna(inplace=True)

In [10]:
df.isna().sum()
df = df.reset_index() #Reset index as after drop there will be gap between dataframe

In [11]:
df.isnull().sum()

index          0
app_id         0
title          0
win            0
mac            0
linux          0
description    0
tags           0
dtype: int64

In [12]:
df.duplicated().sum()

0

In [13]:
df['description']

0        Call of Duty is back, redefining war like you'...
1        Enter the dark underworld of Prince of Persia ...
2        Monaco: What's Yours Is Mine is a single playe...
3        Escape Dead Island is a Survival-Mystery adven...
4        Dungeon of the Endless is a Rogue-Like Dungeon...
                               ...                        
15901    Dark Past is based on psychological horror by ...
15902    Remember the times at the movies? Maybe you've...
15903    Roguelite bullet-hell meets sexy anime in Beau...
15904    Create and customize your own campground in th...
15905    經典商業模擬遊戲《創業王》讓你一圓當老闆的夢想！選地建廠，佈置廠房、僱用員工，打造夢幻生產線...
Name: description, Length: 15906, dtype: object

In [14]:
df['desc'] = df['description'].apply(lambda x: x.split())
df['genre'] = df['tags'].apply(lambda x: x.split("\n"))

In [15]:
df = df[['title','win','mac','linux','desc','genre']]
df.head()

Unnamed: 0,title,win,mac,linux,desc,genre
0,Call of Duty: World at War,True,False,False,"[Call, of, Duty, is, back,, redefining, war, l...","[Zombies, World War II, FPS, Multiplayer, Acti..."
1,Prince of Persia: Warrior Within™,True,False,False,"[Enter, the, dark, underworld, of, Prince, of,...","[Action, Adventure, Parkour, Third Person, Gre..."
2,Monaco: What's Yours Is Mine,True,True,True,"[Monaco:, What's, Yours, Is, Mine, is, a, sing...","[Co-op, Stealth, Indie, Heist, Local Co-Op, St..."
3,Escape Dead Island,True,False,False,"[Escape, Dead, Island, is, a, Survival-Mystery...","[Zombies, Adventure, Survival, Action, Third P..."
4,Dungeon of the ENDLESS™,True,True,False,"[Dungeon, of, the, Endless, is, a, Rogue-Like,...","[Roguelike, Strategy, Tower Defense, Pixel Gra..."


In [16]:
# Replacing Space betweem words
df["desc"] = df['desc'].apply(lambda x:[i.replace(" ","") for i in x])
df["genre"] = df['genre'].apply(lambda x:[i.replace(" ","") for i in x])
df.head()

Unnamed: 0,title,win,mac,linux,desc,genre
0,Call of Duty: World at War,True,False,False,"[Call, of, Duty, is, back,, redefining, war, l...","[Zombies, WorldWarII, FPS, Multiplayer, Action..."
1,Prince of Persia: Warrior Within™,True,False,False,"[Enter, the, dark, underworld, of, Prince, of,...","[Action, Adventure, Parkour, ThirdPerson, Grea..."
2,Monaco: What's Yours Is Mine,True,True,True,"[Monaco:, What's, Yours, Is, Mine, is, a, sing...","[Co-op, Stealth, Indie, Heist, LocalCo-Op, Str..."
3,Escape Dead Island,True,False,False,"[Escape, Dead, Island, is, a, Survival-Mystery...","[Zombies, Adventure, Survival, Action, ThirdPe..."
4,Dungeon of the ENDLESS™,True,True,False,"[Dungeon, of, the, Endless, is, a, Rogue-Like,...","[Roguelike, Strategy, TowerDefense, PixelGraph..."


In [17]:
#Making a column of name "detail" which have both description and tags text
df['detail'] = df['desc'] + df['genre']
df['detail']

0        [Call, of, Duty, is, back,, redefining, war, l...
1        [Enter, the, dark, underworld, of, Prince, of,...
2        [Monaco:, What's, Yours, Is, Mine, is, a, sing...
3        [Escape, Dead, Island, is, a, Survival-Mystery...
4        [Dungeon, of, the, Endless, is, a, Rogue-Like,...
                               ...                        
15901    [Dark, Past, is, based, on, psychological, hor...
15902    [Remember, the, times, at, the, movies?, Maybe...
15903    [Roguelite, bullet-hell, meets, sexy, anime, i...
15904    [Create, and, customize, your, own, campground...
15905    [經典商業模擬遊戲《創業王》讓你一圓當老闆的夢想！選地建廠，佈置廠房、僱用員工，打造夢幻生產...
Name: detail, Length: 15906, dtype: object

In [18]:
#Taking the combined data "detail" and removed the acess data
cldata = df[['title','detail']]
cldata['detail'] = cldata['detail'].apply(lambda x:" ".join(x))

#Lower Casing the "detail" as its a standard for nlp
cldata['detail'] = cldata['detail'].apply(lambda x:x.lower())

cldata['detail'][0]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cldata['detail'] = cldata['detail'].apply(lambda x:" ".join(x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cldata['detail'] = cldata['detail'].apply(lambda x:x.lower())


"call of duty is back, redefining war like you've never experienced before. building on the call of duty 4®: modern warfare engine, call of duty: world at war immerses players into the most gritty and chaotic wwii combat ever experienced. zombies worldwarii fps multiplayer action shooter co-op singleplayer moddable first-person war onlineco-op gore historical classic survival greatsoundtrack tanks horror adventure"

In [39]:
#Extracting feature 
cv = TfidfVectorizer(max_features=300,stop_words="english") #Creating object of TfidfVectorizer class

In [40]:
vector = cv.fit_transform(cldata['detail']).toarray()
#To check feature name extracted by TfidfVectorizer
#cv.get_feature_names()
vector

array([[0.        , 0.        , 0.        , ..., 0.12653822, 0.        ,
        0.21362903],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.11771871, ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.15889771, ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [43]:
#Creating object of PorterStemmer
ps = PorterStemmer()  

In [42]:
#Making fuction for steming the text 
def stem(txt):
  k = []
  for i in txt.split():
    k.append(ps.stem(i))
  return " ".join(k)

In [44]:
#Applying stem to "detail"
cldata['detail'] = cldata['detail'].apply(stem)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cldata['detail'] = cldata['detail'].apply(stem)


In [45]:
cldata['detail'][0]

"call of duti is back, redefin war like you'v never experienc before. build on the call of duti 4®: modern warfar engine, call of duty: world at war immer player into the most gritti and chaotic wwii combat ever experienced. zombi worldwarii fp multiplay action shooter co-op singleplay moddabl first-person war onlineco-op gore histor classic surviv greatsoundtrack tank horror adventur"

In [46]:
similar = cosine_similarity(vector)

In [51]:
def recommend(val):
  game_ind = df[df['title'] == val].index[0]
  distance = similar[game_ind]
  game_list = sorted(list(enumerate(distance)),reverse=True, key= lambda x: x[1])[:5]
  for i in game_list:
    print("Recommended Game:",df.iloc[i[0]].title)
    print("Description:"," ".join(df.iloc[i[0]].desc))
    print("Available for:",end=" ")
    l = [df.iloc[i[0]].win,df.iloc[i[0]].mac,df.iloc[i[0]].linux]
    count = 0
    for j in l:
      if j==True and count==0:
        print("Windows",end="")
        count +=1
      elif j==True and count==1:
        print(", Mac",end="")
        count +=1
      elif j==True and count==2:
        print(", Linux")
    print("\n")

In [50]:
#result analysis
def result(val):
  game_ind = df[df['title'] == val].index[0]
  distance = similar[game_ind]
  res = sorted(list(enumerate(distance)),reverse=True, key= lambda x: x[1])[:5]
  print("Result Analysis",*res,sep="\n")

In [57]:
user_input = input("Give the name of your favourite game: ")
find_close_match = difflib.get_close_matches(user_input,df['title'])
recommend(find_close_match[0])
result(find_close_match[0])

Give the name of your favourite game: resident evil
Recommended Game: Resident Evil
Description: The game that defined the survival-horror genre is back! Check out the remastered HD version of Resident Evil.
Available for: Windows

Recommended Game: resident evil 4 (2005)
Description: (Release: 2014) Special agent Leon S. Kennedy is sent on a mission to rescue the U.S. President’s daughter who has been kidnapped.
Available for: Windows

Recommended Game: Them and Us
Description: Them and Us is a classic, survival horror game with a focus on atmosphere and player immersion. Discover the truth of Alicia’s past as she descends further into her strange, unending nightmare.
Available for: Windows

Recommended Game: Resident Evil 2
Description: A deadly virus engulfs the residents of Raccoon City in September of 1998, plunging the city into chaos as flesh eating zombies roam the streets for survivors. An unparalleled adrenaline rush, gripping storyline, and unimaginable horrors await you. Wi