In [6]:
import sklearn.feature_extraction.text as text
from sklearn import decomposition
import numpy as np
import pandas as pd

In [7]:
reviews_df = pd.read_csv("data/appstore_all_reviews_clean_tomo.csv")

In [8]:
reviews = list(reviews_df['text_tokenized_lemmatized'].dropna())

In [9]:
# This step performs the vectorization,
# tf-idf, stop word extraction, and normalization.
# It assumes docs is a Python list,
#with reviews as its elements.
cv = text.TfidfVectorizer(reviews, stop_words='english')
doc_term_matrix = cv.fit_transform(reviews)
 
# The tokens can be extracted as:
vocab = cv.get_feature_names()
 
# Next we perform the NMF with 20 topics
num_topics = 20
 
#doctopic is the W matrix
decomp = decomposition.NMF(n_components = num_topics,
         init = 'nndsvd')
doctopic = decomp.fit_transform(doc_term_matrix)
 
# Now, we loop through each row of the T matrix
# i.e. each topic,
# and collect the top 25 words from each topic.
n_top_words = 25
topic_words = []
for topic in decomp.components_:
    idx = np.argsort(topic)[::-1][0:n_top_words]
    topic_words.append([vocab[i] for i in idx])

In [20]:
topic_words_df = pd.DataFrame(topic_words).T;
topic_words_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,app,easy,password,work,time,great,love,appointment,log,update,card,iphone,information,good,login,like,need,message,info,crash
1,useless,use,paste,doesn,waste,job,fingertip,doctor,let,latest,insurance,data,access,far,just,really,right,error,medical,open
2,pay,navigate,manager,doe,try,app,app,make,account,working,wallet,health,claim,really,website,feature,fingertip,try,contact,try
3,website,helpful,use,doesnt,don,health,right,result,website,new,claim,io,medical,way,screen,new,updated,later,plan,iphone
4,just,convenient,field,fine,use,friendly,able,prescription,won,fix,view,version,fingertip,health,touch,look,fixed,send,enter,constantly
5,useful,fast,secure,just,say,way,having,email,say,crashing,apple,record,plan,pretty,allow,nice,know,tried,doctor,fix
6,company,really,user,touch,tried,thanks,accessible,refill,phone,updated,add,support,provides,app,able,better,look,saying,emergency,io
7,customer,quick,pasting,don,complete,user,feature,test,computer,version,access,medical,user,track,mobile,user,improvement,getting,family,doe
8,provider,super,remember,used,half,look,navigate,lab,able,won,feature,ipad,family,interface,fine,thing,provides,reply,doesn,wont
9,point,understand,username,bad,day,problem,best,able,try,recent,coverage,apple,able,job,issue,having,load,fix,humana,won
