# Load the Data

In [1]:
import json
import pickle

import gensim
from gensim.models import CoherenceModel
from tqdm import tqdm_notebook

tqdm_notebook().pandas()

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))






In [2]:
with open("processed_data/token.json") as f:
    tokens = json.load(f)

# Prepare Corpus and Dictionary

In [3]:
dictionary = gensim.corpora.Dictionary(tokens)
dictionary.save("processed_data/dictionary")

len(dictionary)

In [3]:
# dictionary = gensim.corpora.Dictionary.load('processed_data/dictionary')
dictionary.filter_extremes(no_below=20, no_above=0.5, keep_n=None)

In [5]:
corpus = [0] * len(tokens)

for i in tqdm_notebook(range(len(tokens))):
    corpus[i] = dictionary.doc2bow(tokens[i])

with open("processed_data/corpus.json", "w") as f:
    json.dump(corpus, f)
    
with open("processed_data/corpus.pkl", "wb") as f:
    pickle.dump(corpus, f)

In [6]:
# with open("processed_data/corpus.json") as f:
#     corpus = json.load(f)

# with open("processed_data/corpus.pkl", "rb") as f:
#     corpus = pickle.load(f)

# Conduct Grid Search on LDA Models

In [8]:
for passes in [1, 4]:
    for num_topics in [5, 10, 15, 20, 25, 30]:
        print("Num topics:", num_topics, ", passes:", passes)
        lda_model = gensim.models.ldamulticore.LdaMulticore(corpus=corpus,
                                                            id2word=dictionary,
                                                            num_topics=num_topics, 
                                                            random_state=1,
                                                            chunksize=2000,
                                                            passes=passes,
                                                            workers=7,
                                                            per_word_topics=True)

        coherence = CoherenceModel(model=lda_model, texts=tokens, dictionary=dictionary, coherence='c_v').get_coherence()
        print("    Coherence:", coherence)
        lda_model.save("models/lda/"+str(passes)+"-passes_" + str(num_topics) + "-topics")

Num topics: 30
    Coherence: 0.5299694323860216


| No. of topics | 1 pass | 4 passes |
| ---- | ----- | ------|
|  5 topics | 0.40667 | 0.40749 |
| 10 topics | 0.43186 | 0.45273 |
| 15 topics | 0.49410 | 0.50778 |
| 20 topics | 0.51291 | 0.53482 |
| 25 topics | 0.51477 | 0.53167 |
| 30 topics | 0.52444 | 0.52997 |



# Display Topics for Top Few Models

In [14]:
from tabulate import tabulate
from IPython.display import display, HTML, Markdown
import re

In [19]:
for num_passes, num_topics in [(4, 20), (4, 25), (4, 30), (1, 30), (1, 25), (1, 20)]:
    model = gensim.models.ldamulticore.LdaMulticore.load('models/lda/' + str(num_passes) + '-passes_' + str(num_topics) + '-topics')
    results = []
    for _, text in model.print_topics():
        results.append([re.findall('[a-z]+', x)[0] for x in text.split(' + ')])
    display(Markdown("# " + str(num_passes) + " passes, " + str(num_topics) + " topics"))
    display(HTML(tabulate(results, tablefmt="html", headers=["Term " + str(x) for x in range(1, 11)])))

# 4 passes, 20 topics

Term 1,Term 2,Term 3,Term 4,Term 5,Term 6,Term 7,Term 8,Term 9,Term 10
egas,time,kid,play,fun,like,go,people,get,lot
coffee,ice,cream,tea,chocolate,like,try,drink,flavor,good
order,wait,food,come,time,minute,service,table,ask,take
pizza,good,crust,order,delivery,ew,airport,slice,izza,flight
great,food,place,service,good,love,friendly,amazing,staff,time
pour,bike,les,que,des,une,pas,est,mais,qui
taco,good,chip,mexican,salsa,food,bean,order,burrito,raman
car,work,service,company,come,need,fix,time,new,guy
store,shop,find,buy,price,location,item,look,like,need
room,stay,hotel,nice,clean,pool,check,class,night,bed


# 4 passes, 25 topics

Term 1,Term 2,Term 3,Term 4,Term 5,Term 6,Term 7,Term 8,Term 9,Term 10
burger,fry,sandwich,cheese,good,order,like,get,try,meat
hair,great,time,thank,look,amazing,good,go,experience,want
sushi,roll,breakfast,egg,fish,order,good,come,like,brunch
dish,steak,restaurant,good,meal,dinner,menu,order,salad,rib
store,buy,shop,find,price,item,look,selection,need,like
pizza,good,order,italian,salad,sauce,cheese,pasta,crust,like
egas,as,car,drive,trip,airport,ride,time,flight,strip
bar,drink,beer,good,place,great,night,nice,music,bartender
cream,ice,chocolate,cake,good,try,flavor,dessert,sweet,like
chicken,sauce,order,good,wing,hicken,fry,salad,try,bowl


# 4 passes, 30 topics

Term 1,Term 2,Term 3,Term 4,Term 5,Term 6,Term 7,Term 8,Term 9,Term 10
pizza,car,crust,good,drive,oil,vehicle,wash,get,izza
customer,service,say,tell,ask,manager,bad,go,rude,want
place,food,good,like,review,star,bad,think,price,eat
salad,sauce,good,cheese,meat,bread,chicken,order,potato,rib
seat,like,music,movie,see,love,ticket,feel,show,wall
sushi,roll,fish,good,fresh,order,crab,shrimp,eat,come
bar,drink,beer,place,good,great,night,nice,bartender,like
location,option,parking,lot,area,find,free,vegan,healthy,fresh
card,look,dress,help,go,walk,like,ask,want,bag
egas,strip,buffet,as,casino,good,trip,price,stay,trip


# 1 passes, 30 topics

Term 1,Term 2,Term 3,Term 4,Term 5,Term 6,Term 7,Term 8,Term 9,Term 10
pour,les,que,des,une,est,pas,mais,qui,tr
tell,say,customer,service,ask,go,manager,time,call,pay
work,service,recommend,need,time,great,thank,professional,job,good
nail,massage,time,get,salon,job,pedicure,go,place,come
burger,fry,good,cheese,order,like,onion,sandwich,try,get
breakfast,sandwich,egg,good,brunch,order,toast,come,bacon,place
bowl,noodle,wing,good,sauce,order,pork,raman,like,try
salad,good,sauce,meat,cheese,bread,rib,potato,chicken,like
restaurant,dinner,menu,wine,good,meal,steak,dessert,dish,food
line,people,get,wait,time,like,go,long,guy,club


# 1 passes, 25 topics

Term 1,Term 2,Term 3,Term 4,Term 5,Term 6,Term 7,Term 8,Term 9,Term 10
room,hotel,stay,nice,pool,clean,check,egas,night,casino
store,find,price,shop,buy,item,selection,like,good,look
sushi,roll,order,egg,fish,like,good,come,eat,taste
chicken,good,food,order,dish,rice,soup,place,noodle,come
work,service,need,great,company,time,recommend,come,job,fix
good,salad,dish,steak,meal,order,sauce,restaurant,rib,dinner
dog,movie,pet,bike,animal,vet,theater,cat,bring,popcorn
cream,ice,chocolate,cake,good,try,like,flavor,sweet,dessert
order,wait,come,food,time,ask,minute,table,service,take
tell,say,call,ask,customer,go,time,pay,service,day


# 1 passes, 20 topics

Term 1,Term 2,Term 3,Term 4,Term 5,Term 6,Term 7,Term 8,Term 9,Term 10
egas,time,like,play,fun,kid,people,go,lot,get
coffee,ice,tea,cream,like,place,drink,try,good,chocolate
order,wait,come,time,food,minute,ask,service,table,take
pizza,car,good,drive,service,crust,vehicle,get,time,wash
great,food,place,good,service,love,friendly,amazing,staff,time
pour,les,que,des,une,pas,est,mais,qui,plus
taco,good,order,sauce,like,wing,chicken,try,chip,bowl
work,company,fix,come,need,day,service,call,time,new
store,price,find,shop,buy,item,look,like,location,selection
room,hotel,stay,nice,clean,pool,staff,check,class,bed
