In [74]:
import numpy 
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [75]:
games = pd.read_csv('games.csv')

In [76]:
games.head()

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,DLC count,About the game,Supported languages,...,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,Galactic Bowling is an exaggerated and stylize...,['English'],...,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",...,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",...,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.0,0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",...,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [77]:
games.shape

(85103, 39)

In [78]:
games.describe

<bound method NDFrame.describe of          AppID                     Name  Release date Estimated owners  \
0        20200         Galactic Bowling  Oct 21, 2008        0 - 20000   
1       655370             Train Bandit  Oct 12, 2017        0 - 20000   
2      1732930             Jolt Project  Nov 17, 2021        0 - 20000   
3      1355720                 Henosis™  Jul 23, 2020        0 - 20000   
4      1139950    Two Weeks in Painland   Feb 3, 2020        0 - 20000   
...        ...                      ...           ...              ...   
85098  2669080  Mannerheim's Saloon Car   Jan 2, 2024            0 - 0   
85099  2736910                 Beer Run   Jan 3, 2024            0 - 0   
85100  2743220     My Friend The Spider   Jan 4, 2024            0 - 0   
85101  2293130        Path of Survivors   Jan 8, 2024            0 - 0   
85102  2738840          The Night Heist   Jan 5, 2024            0 - 0   

       Peak CCU  Required age  Price  DLC count  \
0             0           

In [79]:
games.duplicated().sum()

0

In [80]:
games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 85103 entries, 0 to 85102
Data columns (total 39 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   AppID                       85103 non-null  int64  
 1   Name                        85097 non-null  object 
 2   Release date                85103 non-null  object 
 3   Estimated owners            85103 non-null  object 
 4   Peak CCU                    85103 non-null  int64  
 5   Required age                85103 non-null  int64  
 6   Price                       85103 non-null  float64
 7   DLC count                   85103 non-null  int64  
 8   About the game              81536 non-null  object 
 9   Supported languages         85103 non-null  object 
 10  Full audio languages        85103 non-null  object 
 11  Reviews                     9743 non-null   object 
 12  Header image                85103 non-null  object 
 13  Website                     394

In [81]:
cat_col = [col for col in games.columns if games[col].dtype == 'object']
cat_col

games[cat_col].nunique()

Name                    84367
Release date             4469
Estimated owners           14
About the game          81100
Supported languages     11306
Full audio languages     2240
Reviews                  9646
Header image            84994
Website                 32199
Support url             27297
Support email           42081
Metacritic url           3814
Notes                   10570
Developers              49870
Publishers              43366
Categories               5648
Genres                   2471
Tags                    57101
Screenshots             82988
Movies                  78579
dtype: int64

In [82]:
num_col = [col for col in games.columns if games[col].dtype != 'object']
num_col

games[num_col].nunique()

AppID                         85103
Peak CCU                       1445
Required age                     19
Price                           584
DLC count                        95
Windows                           2
Mac                               2
Linux                             2
Metacritic score                 73
User score                       33
Positive                       4532
Negative                       2303
Score rank                        4
Achievements                    431
Recommendations                4035
Average playtime forever       2209
Average playtime two weeks      781
Median playtime forever        1896
Median playtime two weeks       784
dtype: int64

In [83]:
df = games.drop(['Estimated owners', 'Full audio languages' ,'Metacritic url', 'Notes', 'Peak CCU', 'DLC count', 'Metacritic score', 'User score', 'Score rank', 'Achievements', 'Recommendations', 'Average playtime forever', 'Average playtime two weeks', 'Median playtime forever', 'Median playtime two weeks'], axis=1)
df.head()

Unnamed: 0,AppID,Name,Release date,Required age,Price,About the game,Supported languages,Reviews,Header image,Website,...,Linux,Positive,Negative,Developers,Publishers,Categories,Genres,Tags,Screenshots,Movies
0,20200,Galactic Bowling,"Oct 21, 2008",0,19.99,Galactic Bowling is an exaggerated and stylize...,['English'],,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.galacticbowling.net,...,False,6,11,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
1,655370,Train Bandit,"Oct 12, 2017",0,0.99,THE LAW!! Looks to be a showdown atop a train....,"['English', 'French', 'Italian', 'German', 'Sp...",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://trainbandit.com,...,False,53,5,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
2,1732930,Jolt Project,"Nov 17, 2021",0,4.99,Jolt Project: The army now has a new robotics ...,"['English', 'Portuguese - Brazil']",,https://cdn.akamai.steamstatic.com/steam/apps/...,,...,False,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",,https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
3,1355720,Henosis™,"Jul 23, 2020",0,5.99,HENOSIS™ is a mysterious 2D Platform Puzzler w...,"['English', 'French', 'Italian', 'German', 'Sp...",,https://cdn.akamai.steamstatic.com/steam/apps/...,https://henosisgame.com/,...,True,3,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...
4,1139950,Two Weeks in Painland,"Feb 3, 2020",0,0.0,ABOUT THE GAME Play as a hacker who has arrang...,"['English', 'Spanish - Spain']",,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.unusual-games.com/home/,...,False,50,8,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",https://cdn.akamai.steamstatic.com/steam/apps/...,http://cdn.akamai.steamstatic.com/steam/apps/2...


In [84]:
df.shape

(85103, 24)

In [85]:
input_tags = "Indie Action".split()



tfidf_vectorizer = TfidfVectorizer()

tfidf_matrix = tfidf_vectorizer.fit_transform(df['Tags'].values.astype('U'))

tag_vector = tfidf_vectorizer.transform([" ".join(input_tags)])
cosine_similarities = cosine_similarity(tfidf_matrix, tag_vector)

top_n = 10
similar_games_indices = cosine_similarities.flatten().argsort()[-top_n:][::-1]
similiar_games = df.iloc[similar_games_indices]

print("Input tags", input_tags)
print("Similar games")
print(similiar_games[['Name', 'Tags', 'About the game']])

Input tags ['Indie', 'Action']
Similar games
                                 Name          Tags  \
55701                     Dear Leader  Action,Indie   
19821            Break The Food Chain  Action,Indie   
6084                         Pixelord  Action,Indie   
1083                   S.T.R.E.T.C.H.  Action,Indie   
1081                    Brutal Runner  Action,Indie   
40648                   Action Legion  Indie,Action   
9822                           Smithy  Action,Indie   
22082                      Cubiques 2  Action,Indie   
10794  Red Barton and The Sky Pirates  Action,Indie   
19203                    Cotropitorii  Action,Indie   

                                          About the game  
55701  Your glorious regime is under attack! Will you...  
19821  Break The Food Chain is an arcade game about c...  
6084   The simplest click runner for a long distance....  
1083   S.T.R.E.T.C.H. - fascinating top down flight s...  
1081   Do not you like to run and sweat? Then this in.