# Importing Library

In [15]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation

In [2]:
import pyLDAvis
import pyLDAvis.sklearn
pyLDAvis.enable_notebook()

  from collections import Iterable
  from collections import Mapping


In [3]:
import sys
sys.path.insert(0, '../src')
from cleaner import clean_text
%load_ext autoreload
%autoreload 2

  regargs, varargs, varkwargs, defaults, formatvalue=lambda value: ""
  from collections import Sequence, defaultdict


# Define handy functions

In [4]:
def show_topics(model, feature_names, no_top_words):
    topic_dict = {}
    for idx, topic in enumerate(model.components_):
        topic_dict["Topic %d words" % (idx)]= ['{}'.format(feature_names[i])
                        for i in topic.argsort()[:-no_top_words - 1:-1]]
        topic_dict["Topic %d weights" % (idx)]= ['{:.1f}'.format(topic[i])
                        for i in topic.argsort()[:-no_top_words - 1:-1]]
    return pd.DataFrame(topic_dict)

In [5]:
def show_topics2(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        message = "Topic #%d: " % topic_idx
        message += " ".join([feature_names[i]
                             for i in topic.argsort()[:-n_top_words - 1:-1]])
        print(message)
    print()

# Importing Data

In [57]:
df = pd.read_csv('../data/all_comments_with_sentiment.csv')

# Prelaunch Focus

## Cleaning the text

In [135]:
period_start = '2020-03-13' #inclusive
period_stop = '2020-03-20' #exclusive

df_pre_pos = df[(df['date'] < period_stop) & (df['date'] >= period_start) & (df['sentiment'] == 'pos')]
df_pre_neu = df[(df['date'] < period_stop) & (df['date'] >= period_start) & (df['sentiment'] == 'neu')]
df_pre_neg = df[(df['date'] < period_stop) & (df['date'] >= period_start) & (df['sentiment'] == 'neg')]
df_pre_pos.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)
df_pre_neu.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)
df_pre_neg.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)

In [136]:
df_pre_pos.loc[:,'cleaned'] = df_pre_pos['body'].apply(clean_text)
df_pre_pos.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [137]:
df_pre_neu.loc[:,'cleaned'] = df_pre_neu['body'].apply(clean_text)
df_pre_neu.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [138]:
df_pre_neg.loc[:,'cleaned'] = df_pre_neg['body'].apply(clean_text)
df_pre_neg.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [141]:
# check the dataframes
df_pre_pos

Unnamed: 0,datetime,author,body,date,sent_score,sentiment,cleaned
1,2020-03-13 14:00:21,soundwave145,"Doom,Final fantasy 7,Resident evil 3,Persona 5...",2020-03-13,0.0772,pos,doom final fantasy resident evil persona royal...
2,2020-03-13 14:00:22,frescapades,Dang! Just went to pick one up and bc there wa...,2020-03-13,0.2003,pos,dang went pick one bc barcode website anything...
4,2020-03-13 14:00:43,pearlescentsheep,SW-1421-4584-3913 — Probably going with Pearl...,2020-03-13,0.1007,pos,probably going pearl still deciding island nam...
5,2020-03-13 14:00:45,derekscardigan,The adding and removing insurance tip was key....,2020-03-13,0.4199,pos,adding removing insurance tip key thank
8,2020-03-13 14:00:58,AnonymousSplash,That sounds adorable! I think we have similar ...,2020-03-13,0.9095,pos,sound adorable think similar mindset trying de...
...,...,...,...,...,...,...,...
93720,2020-03-19 23:59:46,Smiles-Bite,It isn't great in the least. Our virus count i...,2020-03-19,0.3235,pos,great least virus count skyrocketing still pro...
93722,2020-03-19 23:59:47,Flux85,You do realize a major part of the game in thi...,2020-03-19,0.1154,pos,realize major part game version resides server...
93723,2020-03-19 23:59:54,Benson2500,Of course you would. I 100% believe you lmao,2020-03-19,0.5994,pos,course would believe lmao
93724,2020-03-19 23:59:54,BloomyC,Really cute! I would buy from you.,2020-03-19,0.5551,pos,really cute would buy


## Vectorization

In [192]:
tfidfvectorizer = TfidfVectorizer(
#     max_df = 0.99,
#     min_df = 0.01,
#     max_features = 
)

In [193]:
tf_vec = tfidfvectorizer.fit_transform(df_pre_neg['cleaned'])

In [194]:
tf_vec.shape

(10670, 10782)

In [177]:
countvectorizer = CountVectorizer(
#     max_df=0.95,
#     min_df=2,
#     max_features=n_features,
)

In [178]:
count_vec = countvectorizer.fit_transform(df_pre_neg['cleaned'])

## Fitting LDA model

In [170]:
number_of_topics = 10
random_seed = 99
ldamodel = LatentDirichletAllocation(
    n_components=number_of_topics,
    max_iter=50,
    learning_method='online',
    learning_offset=50.,
    random_state=random_seed)

In [179]:
ldamodel.fit(count_vec)

LatentDirichletAllocation(batch_size=128, doc_topic_prior=None,
                          evaluate_every=-1, learning_decay=0.7,
                          learning_method='online', learning_offset=50.0,
                          max_doc_update_iter=100, max_iter=50,
                          mean_change_tol=0.001, n_components=10, n_jobs=None,
                          perp_tol=0.1, random_state=99, topic_word_prior=None,
                          total_samples=1000000.0, verbose=0)

In [182]:
top_n_words = 10
ct_feature_names = countvectorizer.get_feature_names()
show_topics(ldamodel,ct_feature_names,top_n_words)

Unnamed: 0,Topic 0 words,Topic 0 weights,Topic 1 words,Topic 1 weights,Topic 2 words,Topic 2 weights,Topic 3 words,Topic 3 weights,Topic 4 words,Topic 4 weights,Topic 5 words,Topic 5 weights,Topic 6 words,Topic 6 weights,Topic 7 words,Topic 7 weights,Topic 8 words,Topic 8 weights,Topic 9 words,Topic 9 weights
0,get,684.6,oh,245.5,played,140.3,im,175.4,save,73.6,digital,411.4,game,1030.8,damn,170.6,animal,494.1,die,210.6
1,game,655.5,hate,189.4,never,105.8,love,129.6,as,57.7,copy,389.3,one,835.4,like,104.8,crossing,450.9,sorry,180.4
2,itch,492.1,pick,187.3,moved,103.3,fruit,95.5,meant,50.6,physical,251.4,like,647.4,look,56.1,post,214.8,man,123.4
3,order,462.2,worry,104.0,villager,97.9,bob,72.6,true,50.0,two,152.0,time,636.3,cant,51.4,shit,179.3,fuck,103.5
4,store,447.1,dont,99.8,name,82.2,sadly,70.5,axe,43.7,bad,136.6,new,581.0,eye,50.5,doom,120.7,op,55.7
5,got,392.2,suck,66.0,read,80.7,lazy,67.3,japanese,40.0,want,91.0,people,561.3,color,50.2,miss,95.1,ich,37.8
6,day,365.9,yet,62.1,code,76.2,always,66.8,favourite,34.6,scared,79.4,get,558.9,design,49.7,see,72.6,actual,37.8
7,release,362.1,essential,60.5,question,66.9,sure,66.2,dizzy,34.1,problem,76.5,would,493.7,idk,47.1,please,58.0,da,34.1
8,still,344.9,keep,60.1,cute,57.7,hard,65.1,rock,32.5,really,71.9,know,474.5,send,46.6,information,56.0,image,33.9
9,mine,316.0,non,54.9,house,57.0,mad,64.1,personality,31.6,gyroids,69.0,really,469.4,bitch,46.6,removed,48.7,cherry,33.6


In [197]:
p = pyLDAvis.sklearn.prepare(ldamodel, count_vec, countvectorizer)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  return pd.concat([default_term_info] + list(topic_dfs))


In [198]:
pyLDAvis.save_html(p, 'lda_pre_neg.html')

## NMF model

In [229]:
number_of_topics = 10
random_seed = 99
nmfmodel = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel.fit(tf_vec)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [230]:
top_n_words = 10
tf_feature_names = tfidfvectorizer.get_feature_names()
show_topics2(nmfmodel,tf_feature_names,top_n_words)

Topic #0: get time like people really bad know want think way
Topic #1: sorry im man hear mean suck omg right meant sold
Topic #2: mine got th still say delayed ordered amazon yet shipped
Topic #3: problem thank yeah enjoy bud lt see fun need make
Topic #4: oh damn shit god yeah hell suck well hot sad
Topic #5: game animal crossing played doom never play stop first release
Topic #6: new sad villager leaf town horizon moved favorite first away
Topic #7: one itch island ac name per hacked console hard know
Topic #8: digital cancel copy order physical buy pre go get cancelled
Topic #9: would die cry ride goldie doubt slider marshal hard pay



# Launch focus

In [58]:
period_start = '2020-03-20' #inclusive
period_stop = '2020-04-01' #exclusive

df_launch = df[(df['date'] < period_stop) & (df['date'] >= period_start)]
df_launch.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)

df_launch.loc[:,'cleaned'] = df_launch['body'].apply(clean_text)
df_launch.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


nan
nan


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [211]:
# df_launch.to_csv('../data/launch_cleaned_all_sentiment.csv')

# Fitting NMF Model

In [51]:
tfidfvectorizer_launch = TfidfVectorizer(
#     max_df = 0.99,
#     min_df = 0.01,
#     max_features = 
)

In [59]:
sentiment_focus = 'pos'
filtered_frame = df_launch[df_launch['sentiment'] == sentiment_focus]['cleaned']

tf_vec_launch = tfidfvectorizer_launch.fit_transform(filtered_frame)
tf_vec_launch.shape

(238242, 43316)

In [60]:
number_of_topics = 10
random_seed = 99
nmfmodel_launch = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_launch.fit(tf_vec_launch)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [61]:
top_n_words = 10
tf_feature_names_launch = tfidfvectorizer_launch.get_feature_names()
show_topics2(nmfmodel_launch,tf_feature_names_launch,top_n_words)

Topic #0: thank much awesome oh okay ok ah great amazing know
Topic #1: code dodo please looking fruit dm peach orange cherry apple
Topic #2: thanks ok much awesome oh okay cool know info ah
Topic #3: yes please omg move day believe ah one unfortunately oh
Topic #4: like island get one game time day villager know want
Topic #5: love would much visit omg come amazing see great design
Topic #6: lol oh yeah got ok mine omg thought know need
Topic #7: nice look oh really job would wow great work good
Topic #8: friend code request sent add play best send new itch
Topic #9: cute awesome super omg look really oh great idea job



In [62]:
sentiment_focus = 'neu'
filtered_frame = df_launch[df_launch['sentiment'] == sentiment_focus]['cleaned']

tf_vec_launch = tfidfvectorizer_launch.fit_transform(filtered_frame)
tf_vec_launch.shape

(163143, 30733)

In [63]:
number_of_topics = 10
random_seed = 99
nmfmodel_launch = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_launch.fit(tf_vec_launch)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [64]:
top_n_words = 10
tf_feature_names_launch = tfidfvectorizer_launch.get_feature_names()
show_topics2(nmfmodel_launch,tf_feature_names_launch,top_n_words)

Topic #0: code dodo dm qr pm send looking open new hemisphere
Topic #1: island come fruit open visit nook hemisphere fish southern mile
Topic #2: added open gate name back pop ya bring hi add
Topic #3: cherry peach pear apple orange trade bring looking coconut fruit
Topic #4: get recipe nook mile able game villager house trying ladder
Topic #5: got today mine pear balloon recipe one first yesterday iron
Topic #6: way make know game go think find headed work bringing
Topic #7: day one remindme time think next first villager new game
Topic #8: sent request dm pm message chat amp link check downloadable
Topic #9: need still fruit iron know open anything omg shop space



In [65]:
sentiment_focus = 'neg'
filtered_frame = df_launch[df_launch['sentiment'] == sentiment_focus]['cleaned']

tf_vec_launch = tfidfvectorizer_launch.fit_transform(filtered_frame)
tf_vec_launch.shape

(73427, 23125)

In [66]:
number_of_topics = 10
random_seed = 99
nmfmodel_launch = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_launch.fit(tf_vec_launch)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [67]:
top_n_words = 20
tf_feature_names_launch = tfidfvectorizer_launch.get_feature_names()
show_topics2(nmfmodel_launch,tf_feature_names_launch,top_n_words)

Topic #0: time game like people think day thing get really know travel way would play sure make animal even back wrong
Topic #1: sorry loss know sure im new closed mean hear internet yet guy gate meant full people post ah connection question
Topic #2: problem lol know fun else exact anyone yeah see fix haha thanks work let thank hope well glad fixed sound
Topic #3: island tarantula mystery nook mile fruit ticket native bamboo flower visit spawn full go rock fish found find itch different
Topic #4: tree get axe fruit rock shake shovel hit stone flimsy cut use wood iron grow break shaking chop need eat
Topic #5: bad want feel luck lol really like go make bot need good thanks look got turnip thing man omg right
Topic #6: oh damn shit god fuck know thanks thank hell suck man yeah well really holy thought idea wait full crap
Topic #7: code dodo drop need peach cherry orange apple fruit pear dm ill come looking interference pay open bell pm bring
Topic #8: one got day caught first fish per c

# Bunny day focus

In [68]:
period_start = '2020-04-01' #inclusive
period_stop = '2020-04-13' #exclusive

df_bunny = df[(df['date'] < period_stop) & (df['date'] >= period_start)]
df_bunny.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)

df_bunny.loc[:,'cleaned'] = df_bunny['body'].apply(clean_text)
df_bunny.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


nan


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [235]:
# df_bunny.to_csv('../data/bunny_cleaned_all_sentiment.csv')

# NMF - Bunny day

In [7]:
# df_bunny = pd.read_csv('../data/bunny_cleaned_all_sentiment.csv')
# df_bunny.dropna(axis=0, inplace=True)

In [69]:
tfidfvectorizer_bunny = TfidfVectorizer(
#     max_df = 0.99,
#     min_df = 0.01,
#     max_features = 
)

### bunny day - positive

In [70]:
sentiment_focus = 'pos'
filtered_frame = df_bunny[df_bunny['sentiment'] == sentiment_focus]['cleaned']

tf_vec_bunny = tfidfvectorizer_bunny.fit_transform(filtered_frame)
tf_vec_bunny.shape

(242444, 39987)

In [71]:
number_of_topics = 10
random_seed = 99
nmfmodel_bunny = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_bunny.fit(tf_vec_bunny)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [72]:
top_n_words = 10
tf_feature_names_bunny = tfidfvectorizer_bunny.get_feature_names()
show_topics2(nmfmodel_bunny,tf_feature_names_bunny,top_n_words)

Topic #0: thank much oh okay ok awesome ah lt god omg
Topic #1: like one get island day villager time game want make
Topic #2: thanks ok much oh okay awesome info cool ah know
Topic #3: yes please omg move oh message ah need plot one
Topic #4: love would much omg come visit amazing idea absolutely see
Topic #5: code dm please dodo looking fruit friend island open shop
Topic #6: good awesome know look oh luck great idea amazing price
Topic #7: lol oh yeah got thought ok right need mine okay
Topic #8: cute omg super look really idea great little dress aww
Topic #9: nice look really oh would job wow great cool work



### bunny day - neutral

In [73]:
sentiment_focus = 'neu'
filtered_frame = df_bunny[df_bunny['sentiment'] == sentiment_focus]['cleaned']

tf_vec_bunny = tfidfvectorizer_bunny.fit_transform(filtered_frame)
tf_vec_bunny.shape

(151479, 27319)

In [74]:
number_of_topics = 10
random_seed = 99
nmfmodel_bunny = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_bunny.fit(tf_vec_bunny)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [75]:
top_n_words = 10
tf_feature_names_bunny = tfidfvectorizer_bunny.get_feature_names()
show_topics2(nmfmodel_bunny,tf_feature_names_bunny,top_n_words)

Topic #0: cherry recipe blossom diy looking trade give peach anyone pear
Topic #1: dm sent looking check turnip ed dodo price selling anyone
Topic #2: code dodo send qr creator bring give open shop new
Topic #3: get recipe villager star way trying rid wait able know
Topic #4: island come nook villager sell visit fruit go open looking
Topic #5: one make villager first think new give move could per
Topic #6: need still fruit many orange craft flower open know pear
Topic #7: day bunny time egg think next first every th remindme
Topic #8: pm sent message request chat amp link time ed downloadable
Topic #9: got today balloon mine think yesterday first star red orange



### Bunny day - negative

In [76]:
sentiment_focus = 'neg'
filtered_frame = df_bunny[df_bunny['sentiment'] == sentiment_focus]['cleaned']

tf_vec_bunny = tfidfvectorizer_bunny.fit_transform(filtered_frame)
tf_vec_bunny.shape

(79619, 23940)

In [77]:
number_of_topics = 10
random_seed = 99
nmfmodel_bunny = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_bunny.fit(tf_vec_bunny)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [78]:
top_n_words = 20
tf_feature_names_bunny = tfidfvectorizer_bunny.get_feature_names()
show_topics2(nmfmodel_bunny,tf_feature_names_bunny,top_n_words)

Topic #0: time one get game like people know think sure would thing really day way back even make could go wrong
Topic #1: recipe cherry blossom balloon day bunny got diy get missing event drop gotten still sakura petal item tree anyone one
Topic #2: problem yeah got code thanks thank fix solution see opposite else okay dm sure lol modern online solved dodo anyone
Topic #3: sorry loss code im know new already ah someone hear closed idea got mean question late dodo post open meant
Topic #4: villager move plot campsite day first empty leave invite house random someone one amiibo talk camper new moved want moving
Topic #5: island tarantula fruit tree bug spawn mystery nook bamboo flower rock mile sell water code come drop flick visit native
Topic #6: bad want feel really luck lol badly make dont go right leave anyone bot ugh ah people know thing jealous
Topic #7: egg fish water hate bait fishing damn bunny event catch getting wood annoying sky spawn day balloon bug rock get
Topic #8: oh d

# Post Bunny-day

In [81]:
period_start = '2020-04-13' #inclusive
period_stop = '2020-04-23' #exclusive

df_postbunny = df[(df['date'] < period_stop) & (df['date'] >= period_start)]
df_postbunny.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)

df_postbunny.loc[:,'cleaned'] = df_postbunny['body'].apply(clean_text)
df_postbunny.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [82]:
tfidfvectorizer_postbunny = TfidfVectorizer(
#     max_df = 0.99,
#     min_df = 0.01,
#     max_features = 
)

### Post-bunny - positive

In [84]:
sentiment_focus = 'pos'
filtered_frame = df_postbunny[df_postbunny['sentiment'] == sentiment_focus]['cleaned']

tf_vec_postbunny = tfidfvectorizer_postbunny.fit_transform(filtered_frame)
tf_vec_postbunny.shape

(203880, 36267)

In [85]:
number_of_topics = 10
random_seed = 99
nmfmodel_postbunny = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_postbunny.fit(tf_vec_postbunny)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [86]:
top_n_words = 10
tf_feature_names_postbunny = tfidfvectorizer_postbunny.get_feature_names()
show_topics2(nmfmodel_postbunny,tf_feature_names_postbunny,top_n_words)

Topic #0: thank much awesome oh okay ok know omg lt great
Topic #1: island one get villager want day time game know make
Topic #2: thanks ok much oh okay awesome know info cool help
Topic #3: love would come much visit omg idea see absolutely wow
Topic #4: yes please omg ah move oh pls say random course
Topic #5: dm code please dodo looking come shop tip anyone visit
Topic #6: nice really look oh job work wow haha good cool
Topic #7: cute super omg idea look really oh aww great wow
Topic #8: lol oh ok yeah thought got omg know mine made
Topic #9: like would look amazing awesome great really visit come feel



### Post-bunny - neutral

In [87]:
sentiment_focus = 'neu'
filtered_frame = df_postbunny[df_postbunny['sentiment'] == sentiment_focus]['cleaned']

tf_vec_postbunny = tfidfvectorizer_postbunny.fit_transform(filtered_frame)
tf_vec_postbunny.shape

(119272, 25588)

In [88]:
number_of_topics = 10
random_seed = 99
nmfmodel_postbunny = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_postbunny.fit(tf_vec_postbunny)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [89]:
top_n_words = 10
tf_feature_names_postbunny = tfidfvectorizer_postbunny.get_feature_names()
show_topics2(nmfmodel_postbunny,tf_feature_names_postbunny,top_n_words)

Topic #0: dm ed dodo sent check hi offer sending send looking
Topic #1: island visit sell mystery flick fruit open celeste fish tarantula
Topic #2: code dodo qr pm send posted bring creator give new
Topic #3: come visit hi may could mine buy wanna hey back
Topic #4: get recipe rid trying star diy way flower could tree
Topic #5: sent chat pm message request gt msg send working private
Topic #6: one make craft give per recipe catalog first could blue
Topic #7: looking shop rose anyone red nook seed white yellow cosmos
Topic #8: need still fruit many cherry omg make apple tree pear
Topic #9: day villager time got think know move game house go



### Post-bunny - Negative

In [90]:
sentiment_focus = 'neg'
filtered_frame = df_postbunny[df_postbunny['sentiment'] == sentiment_focus]['cleaned']

tf_vec_postbunny = tfidfvectorizer_postbunny.fit_transform(filtered_frame)
tf_vec_postbunny.shape

(62366, 21969)

In [91]:
number_of_topics = 10
random_seed = 99
nmfmodel_postbunny = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_postbunny.fit(tf_vec_postbunny)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [93]:
top_n_words = 15
tf_feature_names_postbunny = tfidfvectorizer_postbunny.get_feature_names()
show_topics2(nmfmodel_postbunny,tf_feature_names_postbunny,top_n_words)

Topic #0: get time game like people think know would thing really day make way even hate
Topic #1: sorry loss know new im hear already late got still code meant someone happened ah
Topic #2: cutting board dresser recipe ironwood diy need make kitchenette craft got iron still table looking
Topic #3: problem thank yeah sure see code thanks well glad else opposite dodo fix think much
Topic #4: villager move plot empty leave house campsite day random ask invite someone talk amiibo moving
Topic #5: island unfortunately tarantula sell flick mystery spawn bug visit fruit flower go tree rock come
Topic #6: bad want feel make lol really luck badly ah look omg give felt need thank
Topic #7: one day got first bunny two per every another recipe next sure missing find take
Topic #8: oh damn shit know god fuck suck thanks really well man hell yeah thank thought
Topic #9: pay dm code bell dodo looking anyone drop nook need ill shop tip come fruit



# Spring update

In [102]:
period_start = '2020-04-23' #inclusive
period_stop = '2020-05-01' #exclusive

df_spring = df[(df['date'] < period_stop) & (df['date'] >= period_start)]
df_spring.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)

df_spring.loc[:,'cleaned'] = df_spring['body'].apply(clean_text)
df_spring.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

In [103]:
tfidfvectorizer_spring = TfidfVectorizer(
#     max_df = 0.99,
#     min_df = 0.01,
#     max_features = 
)

### Spring - positive

In [109]:
sentiment_focus = 'pos'
filtered_frame = df_spring[df_spring['sentiment'] == sentiment_focus]['cleaned']

tf_vec_spring = tfidfvectorizer_spring.fit_transform(filtered_frame)
tf_vec_spring.shape

(169581, 33193)

In [110]:
number_of_topics = 10
random_seed = 99
nmfmodel_spring = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_spring.fit(tf_vec_spring)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [111]:
top_n_words = 10
tf_feature_names_spring = tfidfvectorizer_spring.get_feature_names()
show_topics2(nmfmodel_spring,tf_feature_names_spring,top_n_words)

Topic #0: thank much okay oh awesome ok ah lt great know
Topic #1: island one villager want get day time game know make
Topic #2: thanks ok much oh awesome know okay cool sharing info
Topic #3: love would come visit much omg idea see wow id
Topic #4: yes please omg oh say random pls move ah day
Topic #5: dm please code dodo come interested looking tip anyone visit
Topic #6: nice look really oh job work wow good idea cool
Topic #7: cute super omg really idea look great villager oh think
Topic #8: like look would great amazing awesome come really cool visit
Topic #9: lol oh yeah omg know thought got mine good okay



### Spring - Neutral

In [114]:
sentiment_focus = 'neu'
filtered_frame = df_spring[df_spring['sentiment'] == sentiment_focus]['cleaned']

tf_vec_spring = tfidfvectorizer_spring.fit_transform(filtered_frame)
tf_vec_spring.shape

(98116, 22454)

In [115]:
number_of_topics = 10
random_seed = 99
nmfmodel_spring = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_spring.fit(tf_vec_spring)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [116]:
top_n_words = 10
tf_feature_names_spring = tfidfvectorizer_spring.get_feature_names()
show_topics2(nmfmodel_spring,tf_feature_names_spring,top_n_words)

Topic #0: dm ed dodo offer sent nmt check selling let sending
Topic #1: island visit sell mystery redd flick open someone fruit go
Topic #2: come visit may could hi mine wanna hey let sell
Topic #3: code dodo send pm creator message qr open design give
Topic #4: sent chat message pm request msg send hi working via
Topic #5: one make got give craft could real per buy find
Topic #6: get rid trying recipe star could diy way nook hedge
Topic #7: need still many fruit flower open orange cherry pear tree
Topic #8: day villager time think move first game know wait random
Topic #9: anyone looking got nook buy diy box shop selling bell



### Spring - negative

In [117]:
sentiment_focus = 'neg'
filtered_frame = df_spring[df_spring['sentiment'] == sentiment_focus]['cleaned']

tf_vec_spring = tfidfvectorizer_spring.fit_transform(filtered_frame)
tf_vec_spring.shape

(51108, 20491)

In [118]:
number_of_topics = 10
random_seed = 99
nmfmodel_spring = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_spring.fit(tf_vec_spring)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [119]:
top_n_words = 10
tf_feature_names_spring = tfidfvectorizer_spring.get_feature_names()
show_topics2(nmfmodel_spring,tf_feature_names_spring,top_n_words)

Topic #0: time get game like people day think would know really
Topic #1: sorry loss someone hear already happened im sure code know
Topic #2: fake real painting art redd statue buy sell today tell
Topic #3: problem yeah thank right code haha see issue solution thanks
Topic #4: villager plot move empty leave day house campsite need random
Topic #5: island unfortunately mystery visit come sell someone flick tarantula fruit
Topic #6: pay board cutting dm bell looking diy anyone need code
Topic #7: bad want feel luck really lol dont badly man make
Topic #8: oh damn shit fuck god know suck thanks thank man
Topic #9: one day real take leave per got first two buy



# Post May-day

In [121]:
period_start = '2020-05-01' #inclusive
period_stop = '2020-05-09' #exclusive

df_may = df[(df['date'] < period_stop) & (df['date'] >= period_start)]
df_may.drop(columns=['Unnamed: 0','id','subreddit'],inplace=True)

df_may.loc[:,'cleaned'] = df_may['body'].apply(clean_text)
df_may.dropna(axis=0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view

In [122]:
tfidfvectorizer_may = TfidfVectorizer(
#     max_df = 0.99,
#     min_df = 0.01,
#     max_features = 
)

### Post May-day - Positive

In [123]:
sentiment_focus = 'pos'
filtered_frame = df_may[df_may['sentiment'] == sentiment_focus]['cleaned']

tf_vec_may = tfidfvectorizer_may.fit_transform(filtered_frame)
tf_vec_may.shape

(149829, 31137)

In [124]:
number_of_topics = 10
random_seed = 99
nmfmodel_may = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_may.fit(tf_vec_may)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [125]:
top_n_words = 10
tf_feature_names_may = tfidfvectorizer_may.get_feature_names()
show_topics2(nmfmodel_may,tf_feature_names_may,top_n_words)

Topic #0: thank much oh ok okay know lt awesome ah helpful
Topic #1: like island one villager get want game time day would
Topic #2: thanks much ok oh know okay awesome ah cool info
Topic #3: love would come much visit omg see idea absolutely hi
Topic #4: yes please omg oh ah move haha say day random
Topic #5: dm please code dodo interested looking anyone come flower tip
Topic #6: cute super omg idea really villager look aww think little
Topic #7: nice really job look work oh would super wow haha
Topic #8: lol oh ok yeah know thought got omg made mine
Topic #9: amazing look awesome great good wow job cool idea oh



### Post may-day - neutral

In [126]:
sentiment_focus = 'neu'
filtered_frame = df_may[df_may['sentiment'] == sentiment_focus]['cleaned']

tf_vec_may = tfidfvectorizer_may.fit_transform(filtered_frame)
tf_vec_may.shape

(82711, 20469)

In [127]:
number_of_topics = 10
random_seed = 99
nmfmodel_may = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_may.fit(tf_vec_may)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [128]:
top_n_words = 10
tf_feature_names_may = tfidfvectorizer_may.get_feature_names()
show_topics2(nmfmodel_may,tf_feature_names_may,top_n_words)

Topic #0: dm dodo offer selling nmt sent working ed hi nmts
Topic #1: island visit sell redd open mystery star fruit flick celeste
Topic #2: come visit may could water hi mine wanna hey shop
Topic #3: one make craft got per day catalog white find blue
Topic #4: code dodo send pm creator open design message qr shop
Topic #5: sent chat message pm request sending msg send check via
Topic #6: get rid trying recipe star diy item fruit many could
Topic #7: looking anyone buy diy tip bell nmt box flower selling
Topic #8: need still many fruit flower open water bell orange know
Topic #9: villager time day got know think move first game go



### Post may-day - negative

In [129]:
sentiment_focus = 'neg'
filtered_frame = df_may[df_may['sentiment'] == sentiment_focus]['cleaned']

tf_vec_may = tfidfvectorizer_may.fit_transform(filtered_frame)
tf_vec_may.shape

(42790, 19238)

In [130]:
number_of_topics = 10
random_seed = 99
nmfmodel_may = NMF(
    n_components=number_of_topics,
    max_iter=2000,
    random_state=random_seed)
nmfmodel_may.fit(tf_vec_may)

NMF(alpha=0.0, beta_loss='frobenius', init=None, l1_ratio=0.0, max_iter=2000,
    n_components=10, random_state=99, shuffle=False, solver='cd', tol=0.0001,
    verbose=0)

In [131]:
top_n_words = 10
tf_feature_names_may = tfidfvectorizer_may.get_feature_names()
show_topics2(nmfmodel_may,tf_feature_names_may,top_n_words)

Topic #0: get time like game people know think really would make
Topic #1: sorry already im someone know got happened code hear loss
Topic #2: pay dm bell looking diy anyone dodo code flower need
Topic #3: problem know thanks tree yeah let solution thank friend reply
Topic #4: fake redd painting real art statue buy selling looking buying
Topic #5: villager plot empty move leave day campsite random house box
Topic #6: island unfortunately come mystery visit sell star spawn flick tree
Topic #7: bad want feel dont luck lol make badly anyone really
Topic #8: one day leave got take first per two yet rock
Topic #9: oh damn shit fuck god suck thanks thank know hell



# Overall Topic!

In [135]:
all_cleaned = df['body'].apply(clean_text)

nan
nan
nan
nan
nan


In [140]:
all_cleaned.dropna(inplace=True)

In [136]:
countvectorizer = CountVectorizer(
#     max_df=0.95,
#     min_df=2,
#     max_features=n_features,
)

In [141]:
count_vec = countvectorizer.fit_transform(all_cleaned)

In [142]:
number_of_topics = 10
random_seed = 99
ldamodel = LatentDirichletAllocation(
    n_components=number_of_topics,
    max_iter=50,
    learning_method='online',
    learning_offset=50.,
    random_state=random_seed)

In [None]:
ldamodel.fit(count_vec)

In [None]:
top_n_words = 10
ct_feature_names = countvectorizer.get_feature_names()
show_topics(ldamodel,ct_feature_names,top_n_words)

In [None]:
page_all = pyLDAvis.sklearn.prepare(ldamodel, count_vec, countvectorizer)

In [None]:
pyLDAvis.save_html(page, 'lda_all.html')