## Sentiment Analysis using vader

In [183]:
# Importing libraries 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = [8, 8]
sns.set_theme(style="whitegrid")
sns.color_palette("rocket", as_cmap=True)
sns.set_palette("pastel")
#hide all warnings
import warnings
warnings.filterwarnings('ignore')
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sid_obj = SentimentIntensityAnalyzer()

# Defining vader function 

def vader_sentiment(x):
    """Define sentiment analysis function"""
    sentiment_dict = sid_obj.polarity_scores(x)
    if sentiment_dict['compound'] > 0.05:
        return pd.Series(['positive', sentiment_dict['compound']])
    elif sentiment_dict['compound'] <= -0.05:
        return pd.Series(['negative', sentiment_dict['compound']])
    else:
        return pd.Series(['neutral',sentiment_dict['compound']])

In [184]:
# read csv
reviews_sen = pd.read_csv("reviews_sentiment.csv") # to get lemmatized reviews
lexicon = pd.read_csv("topic-grouping-v2.csv") # to get lexicon
reviews = pd.read_csv("reviews.csv") 
game_list = reviews[["game_id", "game_name", 'score']] 
game_list = pd.DataFrame(game_list.groupby(['game_id', 'game_name'])['score'].mean()).reset_index() # to get game names and average scores

In [185]:
words = lexicon.word # define list of words 
groups = lexicon.Group # define list of groups 

In [186]:
# Running sentiment analysis on key word +- 2 

results_all = []
for row in reviews_sen.lemmatized:
    row = row.replace("’","")
    results_row = []
    for word in row.split():
        if word in list(words):
            position = row.split().index(word)
            position_add = row.split().index(word)+2
            position_rem = row.split().index(word)-2
            
            if len(row.split()) <= position_add:
                temp_str = " ".join(row.split()[position_rem:position]) + " " + row.split()[position]
                results_row.append([word,temp_str,vader_sentiment(temp_str)[1]])    
            
            elif position_rem <0:
                temp_str = row.split()[position] + " " + " ".join(row.split()[position+1:position_add+1])
                results_row.append([word,temp_str,vader_sentiment(temp_str)[1]])

            else:
                temp_str = " ".join(row.split()[position_rem:position]) + " " + row.split()[position] + " " + " ".join(row.split()[position+1:position_add+1])
                results_row.append([word, temp_str, vader_sentiment(temp_str)[1]])
    
    results_all.append(results_row)

In [187]:
# Linking words with groups grom lexicon

results_groups_all = []
for review in results_all:
    results_groups= []
    for word in review:
        indx = list(words).index(word[0])
        group = list(groups)[indx]
        results_groups.append([group, word[-1]])
    results_groups_all.append(results_groups)

In [188]:
results_groups_all[2]

[['game design', 0.5423],
 ['game design', 0.2263],
 ['game design', 0.2263],
 ['game design', 0.0]]

In [189]:
# Formatting dataset
final_list= []
for review in results_groups_all:
    if len(review) == 0:
        final_list.append(0)
    else:
        final_list.append(np.array(pd.DataFrame(review).groupby(0)[1].mean().reset_index()))

# pd.DataFrame(results_groups_all[2]).groupby(0)[1].mean()

In [190]:
# Getting average sentiment score per game per attribute

final_df= pd.DataFrame(columns = groups.unique(), index = np.arange(len(sen_results)))
for indx in np.arange(len(final_list)):
    if type(final_list[indx]) == int:
        continue
    else:
        for length in np.arange(len(final_list[indx])):
            final_df.loc[indx,final_list[indx][length][0]]= final_list[indx][length][1]
            
game_sen = final_df.merge(game_list, left_index = True, right_index=True)
game_sen = game_sen.groupby(['game_id', 'game_name', 'score']).mean().reset_index()

In [191]:
game_sen.head(4)

Unnamed: 0,game_id,game_name,score,tone,strategy based gameplay,game design,difficulty,skill based gameplay,enjoyment,luck based gameplay,world building,ignore,visuals,technical performance,innovative,playthrough time,value,narrative,multiplayer,sound track
0,0,Disco Elysium: The Final Cut,97.272727,,,,,,,,-0.25,0.2023,,,,,,0.0,,
1,1,Final Fantasy XIV: Endwalker,91.962963,,,,,,,,,0.0,,,,,,0.0,,
2,2,Forza Horizon 5,91.1,,,0.248725,,,,,,,,,,,,,,
3,3,Chicory: A Colorful Tale,89.782609,,,,,,,,,0.247533,,-0.4404,,,,,,


In [192]:
# Saving to csv 
game_sen.to_csv('Sentiment_Analysis_Per_Game_Per_Attribute.csv')