In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
# Store data
df = pd.read_csv("products.csv")
df = df.rename(columns={'a_name':'Name', 'b_date':'Date', 'c_os':'OS','d_origin_price':'Price','e_discount':'Discount','f_n_reviews':'No. of Review','g_percent_positive':'Positive %'})
df.insert(0, 'Game_id', range(0, 0 + len(df)))

Unnamed: 0,Game_id,Name,Date,OS,Price,Discount,No. of Review,Positive %
0,0,Counter-Strike: Global Offensive,"21 Aug, 2012","win,mac,linux",FreetoPlay,0,163633,88%
1,1,Dota 2,"21 Aug, 2012","win,mac,linux",FreetoPlay,0,660335,83%
2,2,MONSTER HUNTER RISE,"21 Aug, 2012",win,1.410.000₫,0,694,86%
3,3,God of War,"21 Aug, 2012",win,1.139.000₫,0,711,96%
4,4,Apex Legends™,"21 Aug, 2012",win,FreetoPlay,0,377051,86%
...,...,...,...,...,...,...,...,...
4995,4995,Eternal Radiance,"21 Aug, 2012",win,188.000₫,0,364,84%
4996,4996,Orbi Universo,"21 Aug, 2012",win,165.000₫,0,117,80%
4997,4997,TAISHO x ALICE epilogue,"21 Aug, 2012",win,120.000₫,0,166,97%
4998,4998,APE OUT,"21 Aug, 2012","win,mac",165.000₫,0,842,94%


In [3]:
# Important feature combination for recommendation
columns = ['Name','No. of Review','Positive %']

In [4]:
# Check missing value of important data
df[columns].isnull().values.any() 
# No missing value

False

In [5]:
# Get important feature and combine into a string
def get_important_features(data):
    important_features = []
    for i in range(0, data.shape[0]):
        important_features.append(data['Name'][i]+' '+data['No. of Review'][i]+' '+data['Positive %'][i])
    return important_features

In [18]:
def recommend_game(name):
    df['important_features'] = get_important_features(df)
    
    # Convert text to matrix of token count
    cm = CountVectorizer().fit_transform(df['important_features'])
    
    #Get cousine similarity
    cs = cosine_similarity(cm)
    game_id = df[df.Name == name]['Game_id'].values[0]
    
    # Create list of tuple of similarity score
    sorces = list(enumerate(cs[game_id]))
    
    # Sort
    sorted_score = sorted(sorces, key = lambda x:x[1], reverse = True) # Get the highest similarity scores
    sorted_score = sorted_score[1:]
    
    # Recommended game
    for item in sorted_score:
        game = df[df.Game_id == item[0]]['Name'].values[0]
        print(game)

In [19]:
recommend_game("Dota 2")

Craftopia
Wasteland 3
Foxhole
Chernobylite
Moonlighter
Othercide
Ruinarch
SURV1V3
KartKraft™
AGAINST
Silence
Succumate
RIDE 3
仙剑奇侠传三
Scrutinized
vrkshop
Zompiercer
Blackwake
Arboria
Infliction
Kona
Warhammer: Vermintide 2
ENDLESS™ Space 2
The Isle
RimWorld - Royalty
Cooking Simulator
Legendary Tales
Fishing Planet
The Crew™ 2
Zero Hour
Serious Sam 4
Noble Fates
Fable Anniversary
SAMURAI WARRIORS 5
HARD BULLET
Portal Knights
Hello Neighbor
Expeditions: Viking
Spiral Knights
Barn Finders
Crystal Clash
Eldest Souls
Executive Assault 2
60 Seconds!
Dealer's Life 2
Poker Championship
Blue Fire
Zaccaria Pinball
Rising World
Surgeon Simulator
Raw Data
Genesis Noir
Creativerse - Pro
Titan Souls
Stellar Tactics
Narita Boy
Cyber Shadow
Learning Factory
Arma 3 Karts
Aeon's End
Sacred Gold
Arx Fatalis
Train Sim World® 2
War of Rights
Orcs Must Die! 3
HOT WHEELS UNLEASHED™
PGA TOUR 2K21
Far Cry® Primal
Rebel Inc: Escalation
RISK: Global Domination
Bully: Scholarship Edition
Seek Girl:Fog Ⅰ
The Evil 