In [98]:
import pandas as pd
import numpy as np
import gensim
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
import wordcloud
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go

from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from pprint import pprint

In [68]:
df = pd.read_csv('.../clean_text.csv')
df

Unnamed: 0,voted_up,clean_token,clean_str
0,True,"['fall', 'guy', 'chill', 'fun', 'game', 'reall...",fall guy chill fun game really need server imp...
1,True,"['give', 'little', 'hug', 'player', 'stare', '...",give little hug player stare back second jump ...
2,True,"['reccomend', 'much', 'buy']",reccomend much buy
3,True,"['hug', 'people']",hug people
4,True,"['little', 'boy', 'like', 'see', 'wipeout', 't...",little boy like see wipeout tv always dream ta...
...,...,...,...
13089,True,"['well', 'make', 'game', 'get', 'enough', 'man...",well make game get enough many way differentia...
13090,True,"['fun', 'competitive', 'especially', 'friend',...",fun competitive especially friend recommend bu...
13091,True,"['game', 'extremely', 'fun', 'nice', 'break', ...",game extremely fun nice break usual drop loot ...
13092,True,"['fun', 'play', 'keep', 'work', 'game']",fun play keep work game


In [69]:
df_yes = df[df['voted_up'] == True]

In [70]:
df_yes.reset_index(inplace = True, drop = True)

In [71]:
df_yes

Unnamed: 0,voted_up,clean_token,clean_str
0,True,"['fall', 'guy', 'chill', 'fun', 'game', 'reall...",fall guy chill fun game really need server imp...
1,True,"['give', 'little', 'hug', 'player', 'stare', '...",give little hug player stare back second jump ...
2,True,"['reccomend', 'much', 'buy']",reccomend much buy
3,True,"['hug', 'people']",hug people
4,True,"['little', 'boy', 'like', 'see', 'wipeout', 't...",little boy like see wipeout tv always dream ta...
...,...,...,...
9763,True,"['well', 'make', 'game', 'get', 'enough', 'man...",well make game get enough many way differentia...
9764,True,"['fun', 'competitive', 'especially', 'friend',...",fun competitive especially friend recommend bu...
9765,True,"['game', 'extremely', 'fun', 'nice', 'break', ...",game extremely fun nice break usual drop loot ...
9766,True,"['fun', 'play', 'keep', 'work', 'game']",fun play keep work game


In [72]:
df_yes.clean_str=df_yes.clean_str.astype(str)

In [75]:
vectorizer = CountVectorizer(analyzer='word',  
                             min_df=3,   
                             stop_words=['game', 'fun'],
                             ngram_range=(2,2),                           
                             lowercase=True,                  
                             token_pattern='[a-zA-Z0-9]{3,}', 
                             max_features=10000,           
                            )

data_vectorized = vectorizer.fit_transform(df_yes['clean_str'])

lda_model = LatentDirichletAllocation(n_components= 8, 
                                      learning_method='online',s
                                      random_state=0,       
                                      n_jobs = -1 
                                     )
lda_output = lda_model.fit_transform(data_vectorized)

In [76]:
def show_topics(vectorizer=vectorizer, lda_model=lda_model, n_words=10):
    keywords = np.array(vectorizer.get_feature_names())
    topic_keywords = []
    for topic_weights in lda_model.components_:
        top_keyword_locs = (-topic_weights).argsort()[:n_words]
        topic_keywords.append(keywords.take(top_keyword_locs))
    return topic_keywords

topic_keywords = show_topics(vectorizer=vectorizer, lda_model=lda_model, n_words=10)        

# Topic - Keywords Dataframe
df_topic_keywords = pd.DataFrame(topic_keywords)
df_topic_keywords.columns = ['Word '+str(i) for i in range(df_topic_keywords.shape[1])]
df_topic_keywords.index = ['Topic '+str(i) for i in range(df_topic_keywords.shape[0])]
df_topic_keywords

Unnamed: 0,Word 0,Word 1,Word 2,Word 3,Word 4,Word 5,Word 6,Word 7,Word 8,Word 9
Topic 0,mario party,battle royale,ever play,friend play,tail tag,even well,fall ball,well friend,feel like,grab tail
Topic 1,long time,pretty good,feel like,play long,server problem,really like,new content,wait see,worth every,need fix
Topic 2,fall guy,battle royale,jelly bean,server issue,finish line,negative review,launch day,final round,royale genre,get well
Topic 3,server issue,highly recommend,play hour,look forward,lot potential,day server,first day,takeshis castle,one thing,stop play
Topic 4,good good,battle royale,good big,good ever,good battle,get knock,would fall,easy learn,worth price,could use
Topic 5,play friend,fall guy,great play,play alone,good time,server issue,even lose,new map,play even,time play
Topic 6,takeshis castle,would recommend,play friend,good play,especially friend,highly recommend,team mode,many people,battle pass,definitely worth
Topic 7,fall guy,guy fall,run run,really good,battle royal,get eliminate,one good,yellow team,obstacle course,people play


In [112]:
Topics_theme = ['fun to play with friends', 'good game but need new content and server fix', 'server issue on launch day', 'a potentially good game but has server issue', 'easy to play', 'can be played with friends or alone', 'recommend the game', 'enjoying the team mode but get eliminated' ]
df_topic_keywords['topic_theme'] = Topics_theme

In [113]:
df_topic_keywords.set_index('topic_theme', inplace=True)

In [114]:
df_topic_keywords.T

topic_theme,fun to play with friends,good game but need new content and server fix,server issue on launch day,a potentially good game but has server issue,easy to play,can be played with friends or alone,recommend the game,enjoying the team mode but get eliminated
Word 0,mario party,long time,fall guy,server issue,good good,play friend,takeshis castle,fall guy
Word 1,battle royale,pretty good,battle royale,highly recommend,battle royale,fall guy,would recommend,guy fall
Word 2,ever play,feel like,jelly bean,play hour,good big,great play,play friend,run run
Word 3,friend play,play long,server issue,look forward,good ever,play alone,good play,really good
Word 4,tail tag,server problem,finish line,lot potential,good battle,good time,especially friend,battle royal
Word 5,even well,really like,negative review,day server,get knock,server issue,highly recommend,get eliminate
Word 6,fall ball,new content,launch day,first day,would fall,even lose,team mode,one good
Word 7,well friend,wait see,final round,takeshis castle,easy learn,new map,many people,yellow team
Word 8,feel like,worth every,royale genre,one thing,worth price,play even,battle pass,obstacle course
Word 9,grab tail,need fix,get well,stop play,could use,time play,definitely worth,people play


In [115]:
lda_output = lda_model.transform(data_vectorized)

topicnames = df_topic_keywords.T.columns

docnames = ["Doc" + str(i) for i in range(len(df_yes))]

df_document_topic = pd.DataFrame(np.round(lda_output, 2), columns=topicnames, index=docnames)

dominant_topic = np.argmax(df_document_topic.values, axis=1)
df_document_topic['dominant_topic'] = dominant_topic

In [116]:
df_document_topic.reset_index(inplace=True)
df_sent_topic= pd.merge(df_yes, df_document_topic, left_index=True, right_index=True)
df_sent_topic.drop('index', axis=1, inplace=True)

In [117]:
df_sent_topic

Unnamed: 0,voted_up,clean_token,clean_str,fun to play with friends,good game but need new content and server fix,server issue on launch day,a potentially good game but has server issue,easy to play,can be played with friends or alone,recommend the game,enjoying the team mode but get eliminated,dominant_topic
0,True,"['fall', 'guy', 'chill', 'fun', 'game', 'reall...",fall guy chill fun game really need server imp...,0.01,0.96,0.01,0.01,0.01,0.01,0.01,0.01,1
1,True,"['give', 'little', 'hug', 'player', 'stare', '...",give little hug player stare back second jump ...,0.03,0.53,0.03,0.03,0.28,0.03,0.03,0.03,1
2,True,"['reccomend', 'much', 'buy']",reccomend much buy,0.06,0.06,0.06,0.06,0.06,0.06,0.56,0.06,6
3,True,"['hug', 'people']",hug people,0.56,0.06,0.06,0.06,0.06,0.06,0.06,0.06,0
4,True,"['little', 'boy', 'like', 'see', 'wipeout', 't...",little boy like see wipeout tv always dream ta...,0.90,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9763,True,"['well', 'make', 'game', 'get', 'enough', 'man...",well make game get enough many way differentia...,0.31,0.21,0.01,0.01,0.01,0.20,0.13,0.11,0
9764,True,"['fun', 'competitive', 'especially', 'friend',...",fun competitive especially friend recommend bu...,0.03,0.03,0.03,0.03,0.03,0.03,0.78,0.03,6
9765,True,"['game', 'extremely', 'fun', 'nice', 'break', ...",game extremely fun nice break usual drop loot ...,0.37,0.04,0.04,0.04,0.04,0.37,0.04,0.04,0
9766,True,"['fun', 'play', 'keep', 'work', 'game']",fun play keep work game,0.04,0.04,0.37,0.04,0.04,0.04,0.04,0.38,7


In [118]:
df_topic_theme = df_sent_topic[['clean_str', 'dominant_topic']]

In [119]:
df_topic_theme.head(10)

Unnamed: 0,clean_str,dominant_topic
0,fall guy chill fun game really need server imp...,1
1,give little hug player stare back second jump ...,1
2,reccomend much buy,6
3,hug people,0
4,little boy like see wipeout tv always dream ta...,0
5,nice br fun would fall,4
6,funnychaotic madness player bounce collect pus...,1
7,fun short burst tedious long stretch moment mi...,0
8,see game potential good design game good multi...,6
9,great game fun play need get rid cheater asap ...,5


In [120]:
topic_remap = {0: 'fun to play with friends', 1: 'good game but need new content and server fix', 2: 'server issue on launch day', 3: 'a potentially good game but has server issue', 4: 'easy to play', 5: 'can be played with friends or alone', 6: 'recommend the game', 7: 'enjoying the team mode but get eliminated'}

In [121]:
df_topic_theme['dominant_topic_theme'] = df_topic_theme['dominant_topic'].map(topic_remap)

In [122]:
df_topic_theme

Unnamed: 0,clean_str,dominant_topic,dominant_topic_theme
0,fall guy chill fun game really need server imp...,1,good game but need new content and server fix
1,give little hug player stare back second jump ...,1,good game but need new content and server fix
2,reccomend much buy,6,recommend the game
3,hug people,0,fun to play with friends
4,little boy like see wipeout tv always dream ta...,0,fun to play with friends
...,...,...,...
9763,well make game get enough many way differentia...,0,fun to play with friends
9764,fun competitive especially friend recommend bu...,6,recommend the game
9765,game extremely fun nice break usual drop loot ...,0,fun to play with friends
9766,fun play keep work game,7,enjoying the team mode but get eliminated


In [123]:
df_topic_theme['dominant_topic_theme'].value_counts()

fun to play with friends                         4232
server issue on launch day                       1106
can be played with friends or alone               872
recommend the game                                773
enjoying the team mode but get eliminated         747
good game but need new content and server fix     744
a potentially good game but has server issue      699
easy to play                                      595
Name: dominant_topic_theme, dtype: int64

In [124]:
df_result = df_topic_theme['dominant_topic_theme'].value_counts().rename_axis('topic').reset_index(name='counts')

In [125]:
df_result

Unnamed: 0,topic,counts
0,fun to play with friends,4232
1,server issue on launch day,1106
2,can be played with friends or alone,872
3,recommend the game,773
4,enjoying the team mode but get eliminated,747
5,good game but need new content and server fix,744
6,a potentially good game but has server issue,699
7,easy to play,595


In [126]:
fig = go.Figure([go.Bar(x=df_result['topic'], y=df_result['counts'])])
fig.update_layout(title=go.layout.Title(text="Topics in Recommended Reviews"))
fig.show()