## This is small recommendation system for steam apps 

This system is made using data of 27000 games from steam game store. Dataset is taken from kaggle, Link:- https://www.kaggle.com/nikdavis/steam-store-games

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('steam.csv')

In [3]:
data.shape

(27075, 18)

In [4]:
data.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99


In [5]:
# Removing columns which are not used for recommendation
data = data.drop(['release_date',
                            'platforms',
                            'required_age',
                            'achievements',
                            'average_playtime',
                            'median_playtime',
                            'owners',
                            'price',
                            'categories',
                            'publisher',],1)

#Removing non-english games
data = data.drop(data[data['english'] == 0].index).reset_index(drop=True)

#Creating rating diff column and removing games with overall positive ratings
data['rating'] = data['positive_ratings']-data['negative_ratings']
data = data.drop(data[data['rating'] <= 0].index).reset_index(drop=True)

In [6]:
data.shape

(21501, 9)

In [7]:
# Separating tags with a space
def globalization(strok:str):
    return strok.replace(';', ' ')

# Removing duplicate tags
def del_rep(strok:str):
    spl_strok = strok.split()
    return ' '.join(sorted(set(spl_strok), key=spl_strok.index))

# Converting to lower case
def to_low(strok:str):
    return strok.lower()

In [8]:
# Separating tags with a space
data['genres'] = data['genres'].apply(globalization)
data['steamspy_tags'] = data['steamspy_tags'].apply(globalization)

# Combining genres and tags
data['genres&tags'] = data['genres'] + ' ' + data['steamspy_tags']

# Removing duplicate tags
data['genres&tags'] = data['genres&tags'].apply(del_rep)

# Convert strings to lowercase and concatenate into one column
cols = ['developer', 'genres&tags']
data['alls'] = data['name'].copy().apply(to_low)
for i in cols:
  data['alls'] += ' ' + data[i].apply(to_low)

In [9]:
data['alls']

0              counter-strike valve action fps multiplayer
1        team fortress classic valve action fps multipl...
2        day of defeat valve action fps world war ii mu...
3          deathmatch classic valve action fps multiplayer
4        half-life: opposing force gearbox software act...
                               ...                        
21496    old edge ii ltrust adventure casual indie simu...
21497    room of pandora shen jiawei adventure casual i...
21498      cyber gun semyon maximov action adventure indie
21499    new yankee 7: deer hunters yustas game studio ...
21500    rune lord adept studios gd adventure casual indie
Name: alls, Length: 21501, dtype: object

In [10]:
# Convert a collection of text documents to a matrix of token counts
count_vec = CountVectorizer(stop_words='english')
count_matrix = count_vec.fit_transform(data['alls'])

# Compute cosine similarity
cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [11]:
# The main function that makes recommendations
def get_rec(nam:str):
#     Determine the index
    ind = data[data['name'] == nam].index.to_list()[0]
    
#     Obtaining cosine convergence by index
    cos_scor = list(enumerate(cosine_sim[ind]))
    
#     Getting the most suitable games
    cos_scor = sorted(cos_scor, key=lambda x: x[1], reverse=True)
    cos_scor = cos_scor[1:11]
    ten_ind = [i[0] for i in cos_scor]
    return data['name'].iloc[ten_ind]

In [12]:
game = "Counter-Strike"
print('Recommendation for', naz, '\n')
print('Recommendations using game name, developer, genre and tags \n')
print(get_rec(game),'\n')

Recommendation for Counter-Strike 

Recommendations using game name, developer, genre and tags 

10              Counter-Strike: Source
7       Counter-Strike: Condition Zero
5                             Ricochet
3                   Deathmatch Classic
25    Counter-Strike: Global Offensive
1                Team Fortress Classic
13             Half-Life 2: Deathmatch
15        Half-Life Deathmatch: Source
19                     Team Fortress 2
2                        Day of Defeat
Name: name, dtype: object 

