In [141]:
# Importing libraries 

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = [3, 4]
sns.set_theme(style="whitegrid")
sns.color_palette("rocket", as_cmap=True)
sns.set_palette("pastel")
#hide all warnings
import warnings
warnings.filterwarnings('ignore')
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.wordnet import WordNetLemmatizer

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
sid_obj = SentimentIntensityAnalyzer()



# Defining vader function 

def vader_sentiment(x):
    """Define sentiment analysis function"""
    sentiment_dict = sid_obj.polarity_scores(x)
    if sentiment_dict['compound'] > 0.05:
        return pd.Series(['positive', sentiment_dict['compound']])
    elif sentiment_dict['compound'] <= -0.05:
        return pd.Series(['negative', sentiment_dict['compound']])
    else:
        return pd.Series(['neutral',sentiment_dict['compound']])

In [79]:
data_dir = "../data/"
plots_dir = "../plots/"

In [80]:
reviews = pd.read_csv(data_dir+"gta-reviews.csv")

In [81]:
reviews

Unnamed: 0,title,score,critic,summary,score_critic,year
0,Grand Theft Auto: Liberty City Stories,88,Gamer 2.0,"On one hand, it is watered-down, it is not as ...",83.0,24-Oct-05
1,Grand Theft Auto: San Andreas,93,Gamer's Hell,"An absolute blast to play, if you can forgive ...",78.0,07-Jun-05
2,Grand Theft Auto: Chinatown Wars,93,IGN AU,For Nintendo-only gamers out there – and these...,93.0,17-Mar-09
3,Grand Theft Auto: San Andreas,95,,Bigger and richer in every way - a near bottom...,88.0,26-Oct-04
4,Grand Theft Auto: Vice City,95,Thunderbolt,"It has the right blend of intense gameplay, an...",90.0,27-Oct-02
...,...,...,...,...,...,...
823,Grand Theft Auto IV: The Ballad of Gay Tony,89,,Slightly less satisfying than last time around...,90.0,29-Oct-09
824,Grand Theft Auto: Vice City 10th Anniversary E...,80,,If you aren't grinning by mission two then the...,82.0,06-Dec-12
825,Grand Theft Auto: Liberty City Stories,78,,"Spectacularly good value, despite some of the ...",90.0,06-Jun-06
826,Grand Theft Auto IV: The Lost and Damned,88,Vandal,A dark and sordid revisit to Liberty City full...,90.0,13-Apr-10


In [82]:
reviews['date'] =  pd.to_datetime(reviews['year']).dt.year


In [83]:
reviews['date'].describe()

count     828.000000
mean     2006.602657
std         2.534472
min      2002.000000
25%      2005.000000
50%      2006.000000
75%      2009.000000
max      2012.000000
Name: date, dtype: float64

We have reviews of Grand Theft Auto series launched between 2002-2012

In [84]:
#processing review data and then calculating the sentiment score 
lexicon = pd.read_csv(data_dir + "topic-grouping-v2.csv") # to get lexicon
words = lexicon.word # define list of words 
groups = lexicon.Group # define list of groups 

In [85]:
#tokenization 
stop_words=set(stopwords.words("english"))

#tokenised list of reviews that do not contain stop words
def tokenized_summary(x):
    filtered_sent=[]
    for w in word_tokenize(x):
        if w not in stop_words:
            filtered_sent.append(str(w))
    return filtered_sent

reviews["tokenised"] = reviews['summary'].apply(lambda x: tokenized_summary(str(x)))

In [86]:
#Lemmatization 
import string
lem = WordNetLemmatizer()
def lemmatized_list(x):
    lemmatized=[]
    for w in x:
        lemmatized.append(lem.lemmatize(w))
    final_string =  " ".join(lemmatized)
    return final_string.translate(str.maketrans('', '', string.punctuation))


reviews["lemmatized"] = reviews['tokenised'].apply(lambda x: lemmatized_list(x))

In [87]:
# Running sentiment analysis on key word +- 2 

results_all = []
for row in reviews.lemmatized:
    row = row.replace("’","")
    row = row.replace("-","")
    results_row = []
    for word in row.split():
        if word in list(words):
            position = row.split().index(word)
            position_add = row.split().index(word)+2
            position_rem = row.split().index(word)-2
            
            if len(row.split()) <= position_add:
                temp_str = " ".join(row.split()[position_rem:position]) + " " + row.split()[position]
                results_row.append([word,temp_str,vader_sentiment(temp_str)[1]])    
            
            elif position_rem <0:
                temp_str = row.split()[position] + " " + " ".join(row.split()[position+1:position_add+1])
                results_row.append([word,temp_str,vader_sentiment(temp_str)[1]])

            else:
                temp_str = " ".join(row.split()[position_rem:position]) + " " + row.split()[position] + " " + " ".join(row.split()[position+1:position_add+1])
                results_row.append([word, temp_str, vader_sentiment(temp_str)[1]])
    
    results_all.append(results_row)

In [88]:
# Linking words with groups grom lexicon

results_groups_all = []
for review in results_all:
    results_groups= []
    for word in review:
        indx = list(words).index(word[0])
        group = list(groups)[indx]
        results_groups.append([group, word[-1]])
    results_groups_all.append(results_groups)

In [89]:
# Formatting dataset
final_list= []
for review in results_groups_all:
    if len(review) == 0:
        final_list.append(0)
    else:
        final_list.append(np.array(pd.DataFrame(review).groupby(0)[1].mean().reset_index()))

# pd.DataFrame(results_groups_all[2]).groupby(0)[1].mean()

In [101]:
# Getting average sentiment score per game per attribute

final_df= pd.DataFrame(columns = groups.unique(), index = np.arange(reviews.shape[0]))
for indx in np.arange(len(final_list)):
    if type(final_list[indx]) == int:
        continue
    else:
        for length in np.arange(len(final_list[indx])):
            final_df.loc[indx,final_list[indx][length][0]]= final_list[indx][length][1]
            
game_sen = reviews.merge(final_df, left_index = True, right_index=True)


In [102]:
game_sen.fillna(0, inplace=True)

In [194]:
game_sen = game_sen.groupby(['date']).mean().reset_index().round(3)

In [195]:
game_sen['date']=game_sen.date.astype(int)

In [196]:
game_sen.sort_values(by='date', inplace=True)

In [197]:
game_sen.describe()

Unnamed: 0,date,score_critic,tone,strategy based gameplay,game design,difficulty,skill based gameplay,enjoyment,luck based gameplay,world building,ignore,visuals,technical performance,innovative,playthrough time,value,narrative,multiplayer,sound track
count,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
mean,2006.333333,86.902833,0.022833,-0.0005,0.062667,0.003833,0.003333,0.0885,0.001667,0.011333,0.093833,0.014167,0.0015,0.019167,0.021333,0.028667,0.025,0.003833,0.0045
std,3.614784,6.719752,0.01199,0.000837,0.034349,0.007441,0.003559,0.028176,0.001862,0.015069,0.024236,0.012734,0.010035,0.010028,0.016293,0.029173,0.016864,0.008448,0.007232
min,2002.0,79.632,0.009,-0.002,0.026,-0.005,-0.002,0.062,0.0,0.004,0.057,0.0,-0.01,0.004,0.005,0.002,0.009,-0.004,-0.001
25%,2004.25,81.079,0.01675,-0.00075,0.0305,-0.0015,0.001,0.0635,0.0,0.00425,0.083,0.007,-0.0065,0.015,0.0115,0.01025,0.01575,-0.0015,0.0
50%,2005.5,86.5835,0.0195,0.0,0.0665,0.0035,0.0045,0.0845,0.0015,0.0055,0.095,0.0135,0.0005,0.021,0.018,0.0225,0.022,0.0,0.0015
75%,2008.25,92.6625,0.0275,0.0,0.092,0.00775,0.00575,0.1085,0.003,0.00675,0.1055,0.0155,0.00975,0.02175,0.0245,0.0325,0.02525,0.00975,0.006
max,2012.0,94.685,0.043,0.0,0.098,0.015,0.007,0.127,0.004,0.042,0.128,0.037,0.014,0.034,0.051,0.083,0.057,0.016,0.018


In [198]:
game_sen = game_sen[game_sen['date']!=2010]

In [227]:
import plotly.express as px
import plotly.graph_objects as go
for column in game_sen.columns:
    
    if column not in ['title', 'date', 'ignore', 'score_critic']:
            fig = go.Figure()
            fig.add_trace(
                go.Scatter( x=game_sen['date'], y=game_sen['score_critic'], name="Critic Score"))

            fig.add_trace(
                go.Bar(
                    x=game_sen['date'],
                   y=game_sen[column]*1000, name=column))
            
            fig.update_layout(
                title = column.upper(),
                hovermode='x unified',
                autosize=False,
                width=800,
                height=250,
                margin=dict(
                    l=50,
                    r=50,
                    b=10,
                    t=40,
                    pad=4
                ))

            fig.show()
            #fig = px.bar(game_sen, x='date', y=column,hover_name="title",width=800, height=300)
            #fig.add_line(game_sen, x='date', y='score_critic')

            #fig.show()

In [None]:
wide_df = px.data.medals_wide()


In [226]:
reversed_df = game_sen.T
reversed_df.columns=game_sen.date.values
reversed_df = reversed_df.drop('date', axis=0)
reversed_df

Unnamed: 0,2002,2004,2005,2006,2009,2012
score_critic,94.685,93.427,82.798,80.506,90.369,79.632
tone,0.016,0.009,0.019,0.03,0.02,0.043
strategy based gameplay,0.0,-0.002,0.0,0.0,-0.001,0.0
game design,0.044,0.089,0.026,0.026,0.098,0.093
difficulty,0.015,0.003,0.004,-0.003,0.009,-0.005
skill based gameplay,0.006,0.004,0.005,-0.002,0.007,0.0
enjoyment,0.062,0.068,0.062,0.127,0.101,0.111
luck based gameplay,0.003,0.0,0.0,0.003,0.004,0.0
world building,0.004,0.004,0.005,0.007,0.006,0.042
ignore,0.057,0.108,0.092,0.08,0.098,0.128


In [244]:
import plotly.express as px
px.box(game_sen,y=list(game_sen.columns.drop(['date', 'score_critic'])), animation_frame="date",
              range_y=[-.01,0.1])