## Steam Game Review Analysis

In [1]:
from pyspark.sql import SparkSession, functions, types, Row
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle
import parquet

In [2]:
spark = SparkSession.builder.appName('Steam Games Analysis').getOrCreate()
sc = spark.sparkContext

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/11/30 19:33:56 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/11/30 19:33:57 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


## Steam Game Content-Based Recommender

In [4]:
# pip install pyarrow
# pip install parquet

In [5]:
df_recommend = pd.read_parquet('cleaned_steam_games', engine='pyarrow')

In [6]:
df_rec = df_recommend[['name', 'desc_snippet', 'popular_tags', 'developers', 'game_description']].fillna('')
df_rec["developers"] = df_rec["developers"].apply(lambda x: " ".join(map(str, x)))
df_rec['popular_tags'] = df_rec['popular_tags'].str.replace(',', ' ')


In [7]:
df_rec.head()

Unnamed: 0,name,desc_snippet,popular_tags,developers,game_description
0,World War 3,World War 3 is a multiplayer military FPS set ...,Early Access FPS Military War Multiplayer Earl...,The Farm 51,About This Game World War 3 is a multiplayer ...
1,Knight Online,The Great Battle between Karus and El Morad ha...,Free to Play MMORPG Massively Multiplayer Acti...,Noah System Mgame,About This Game Knight Online is a PvP-centri...
2,Super Mega Baseball 2,The critically acclaimed Super Mega Baseball s...,Sports Simulation Baseball Indie Local Multipl...,Metalhead Software Inc.,About This Game The critically acclaimed Supe...
3,Plague Inc: Evolved,Plague Inc: Evolved is a unique mix of high st...,Strategy Simulation Indie Singleplayer Multipl...,Ndemic Creations,About This Game Plague Inc: Evolved is a uni...
4,DEFCON,"Inspired by the 1983 cult classic film, Wargam...",Strategy Indie Wargame Multiplayer Cold War Re...,Introversion Software,About This Game Inspired by the 1983 cult cla...


In [8]:
# combine features
def combined_features(row):
    return row['desc_snippet']+" "+row['game_description']+" "+row['popular_tags']+" "+row['developers']

df_rec['combined_features'] = df_rec.apply(combined_features,axis = 1)

In [9]:
# transforms raw text into a numerical representation of the importance of each word

# cleaning stop words and get frequency and importance of words in combined text
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_rec['combined_features'])

In [None]:
# Cosine Similarity Matrix
# similarity b/w two texts based on their TF-IDF feature vectors
cosine_sim = cosine_similarity(tfidf_matrix)

In [None]:
pickle.dump(cosine_sim,open('cosine_sim.pkl','wb'))

In [None]:
def get_index_from(title):
    return df_rec[df_rec.name == title].index.values[0]

In [None]:
def get_title_from_index(index):
    return df_rec[df_rec.index == index]["name"].values[0]

In [None]:
def steam_game_recommender(title, cosine_sim, dataframe):
    game_index = get_index_from(title)

    # generate similar games matrix
    similar_games = list(enumerate(cosine_sim[game_index]))

    # Sorting the similar games in descending order
    sorted_similar_games = sorted(similar_games, key = lambda x:x[1], reverse = True)
    
    # Use integer-based indexing with iloc
    # recommended_games = dataframe['name'].iloc[scores.index]

    i=0
    for games in sorted_similar_games:
        if i != 0:
            print(get_title_from_index(games[0]))
        i = i+1
        if i>5:
            break
    
    # return recommended_games