In [35]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

In [36]:
df_steam_games = pd.read_csv('Prepped_games_out.csv')
# df_steam_games = df_steam_games.set_index('App ID')

In [37]:
df_steam_games.sample(20)

Unnamed: 0,App ID,Name,Short Description,Positive Reviews,Negative Reviews,Total Reviews,prepped_description_lemm
20600,1704980,Jigsaw Puzzle - Jack and the Beanstalk,Once upon a time... If you like Jigsaw Puzzles...,10,0,10,upon time like jigsaw puzzle nice story game f...
26261,2012630,A Pact With Me - Boys Love (BL) Visual Novel,"Three humans summon Belial, a mischievous demo...",10,0,10,three human summon belial mischievous demon fo...
17071,1549420,メンヘラフレシア　フラワリングアビス,フリーゲーム原作「メンヘラフレシア」リメイク作品 メンヘラな少女たちを攻略する恋愛アドベンチ...,25,1,26,メンヘラな少女たちを攻略する恋愛アドベンチャーゲーム
37517,502320,Boulder Dash - 30th Anniversary,Boulder Dash® - 30th Anniversary™ features cla...,79,19,98,boulder feature classic gameplay exciting leve...
24177,1885880,Might of the Stones,The world is in danger. An evil entity is tryi...,3,0,3,world danger evil entity try destroy world job...
51242,872040,Hovership Havoc,Hovership Havoc is a sci-fi twin stick shooter...,21,5,26,hovership havoc twin stick shooter person bos ...
38401,525460,Sullen: Light is Your Friend,FIVE YEARS AGO IN THIS FOREST.. A journalist w...,12,34,46,five year ago forest journalist want go course...
34844,424010,Crudelis,"Crudelis is a 60 minutes RPG game, during whic...",20,4,24,crudelis minute rpg game every decision meanin...
27596,2126700,Double Trouble,"Meet Olivia and Elodie, two sisters on an adve...",4,0,4,meet olivia elodie two sister adventure infini...
5674,1162430,Salad Fields,A DREAMYQUEER experience. It's an adventure-pu...,24,0,24,dreamyqueer experience game surreal place weir...


In [38]:
game_names = dict(zip(df_steam_games['App ID'], df_steam_games['Name']))

df_game_desc = df_steam_games[["App ID", "prepped_description_lemm"]]
df_game_desc.columns = ['ID', 'Description']
game_desc_df = df_game_desc.set_index('ID')
game_desc_df = game_desc_df.dropna()
game_desc_df.sample(10)

Unnamed: 0_level_0,Description
ID,Unnamed: 1_level_1
1076290,immerse dark virtual thriller police detective...
1082000,help nathan beat platformer part tournament gr...
1422150,pp puncher rpg mostly guy go quest save waifus...
1267170,explore underground solve puzzle get skill fig...
525920,become manager professional cycling team compe...
723420,vr game traditional chinese mythology game act...
292230,game character hub art program focus game asse...
820310,adventure saga create best tradition fantasy t...
390720,anarcute rampage riot simulator combine adorab...
1147140,biggest police chase town run get catch steal ...


In [39]:
# Create the td-idf vectorizer. 
tfidfvec = TfidfVectorizer()

# Fit the model 
tfidf_gamedesc = tfidfvec.fit_transform((game_desc_df['Description']))

# Add cosine similarity
cos_sim = cosine_similarity(tfidf_gamedesc, tfidf_gamedesc)

# functions for retrieving similar game IDs from cos_sim
indices = pd.Series(game_desc_df.index)

In [40]:
'''
Create a new function to get the data by the game's descriptions.
Returns a list of recommended games from the model.

@param: id: id of the game to get recommendations on.
@param: total_matches: total number of matches to return.
'''
def get_related_games_by_desc(id, total_matches):
    
    # Set a new dataframe to return.
    recommended_games =  pd.DataFrame()
    
    # Set the index variable.
    index = indices[indices == id].index[0]
    
    # Increment total_matches to be 1 higher.
    total_matches += 1
    
    # Obtain the list of cosine similiarities and set them into a series.
    similarity_scores = pd.Series(cos_sim[index]).sort_values(ascending = False)
    top = list(similarity_scores.iloc[1:total_matches].index)
    recommended_games['Score'] = similarity_scores.iloc[1:total_matches]
    
    # Create a new list.
    app_ids = []
    
    # Loop over and set the top items in the list.
    for i in top:
        app_ids.append(list(df_steam_games.index)[i])
    recommended_games['App ID'] = app_ids
    
    print(f"Games related to: {df_steam_games.loc[df_steam_games.index == game_id].Name.values[0]}\n")
    for i, val in enumerate(recommended_games["App ID"]):
        print(f"{df_steam_games.loc[df_steam_games.index == val].Name.values[0]}: {recommended_games.Score.values[i]:.2f}")

In [49]:
def get_recommendations(id):
    recommended_games = []
    index = indices[indices == id].index[0]
    similarity_scores = pd.Series(cos_sim[index]).sort_values(ascending = False)
    top = list(similarity_scores.iloc[1:11].index)
    for i in top:
        recommended_games.append(list(game_desc_df.index)[i])
    return recommended_games

In [50]:
game_id = 221380
  
recommended_games = get_recommendations(game_id)
  
print(f"Games related to: {game_names[game_id]}")
for i in recommended_games:
    print(game_names[i])

Games related to: Age of Empires II (2013)
Age of Mythology: Extended Edition
Rise of Nations: Extended Edition
Spades
Spectre
Escape Legacy: Ancient Scrolls
DRAKERZ-Confrontation
Project Beril / 贝丽尔养成计划
Age of Empires II: Definitive Edition
Age of Wonders II: The Wizard's Throne
Akin


In [51]:
game_id = 1287450
  
recommended_games = get_recommendations(game_id)
  
print(f"Games related to: {game_names[game_id]}")
for i in recommended_games:
    print(game_names[i])

Games related to: Ultimate Chess
The Rooster's Chess
Chessmates
The Chess Variants Club
ChessCraft
Chesskoban - Chess Puzzles
Ragnarok Chess
Chessia
Wild Wild Chess
Samurai Chess
ChessLocke
