### Data preparation and Exploration

In [22]:
import pandas as pd 
import numpy as np
import json
import re
from tqdm import tqdm,tqdm_gui
from tqdm._tqdm_notebook import tqdm_notebook

In [23]:
PATH = './data/'

In [24]:
with open(PATH+'review.json') as f:
    review_data = pd.DataFrame(json.loads(line) for line in f)

In [25]:
with open(PATH+'business.json') as f:
    business_data = pd.DataFrame(json.loads(line) for line in f)

In [26]:
rest_biz = business_data[business_data['categories'].apply(str).str.contains('Restaurants')]

In [27]:
rest_biz = rest_biz.drop(columns=['stars'])

In [28]:
rest_biz.sort_values(by='review_count',ascending=False,inplace=True)

In [29]:
rest_biz.groupby(by=['state'])['review_count'].sum().sort_values(ascending=False)

state
NV     949953
AZ     837216
ON     414411
NC     180487
OH     154726
PA     143283
QC      98978
WI      69050
BW      24934
EDH     23747
IL      22186
SC       5981
MLN      1101
HLD       589
C         168
ELN       117
FIF       110
NYK       101
WLN        87
NY         73
NI         58
WA         40
01         24
PKN        24
ST         24
ESX        11
BY         10
KHL         7
RCC         7
XGL         6
3           5
HH          4
CA          4
WHT         4
ABE         3
ZET         3
Name: review_count, dtype: int64

In [30]:
preprocessed_data = rest_biz.merge(review_data,how='inner')
preprocessed_data = preprocessed_data[preprocessed_data['state'].isin(['NV'])]

In [31]:
preprocessed_data.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,...,review_count,state,cool,date,funny,review_id,stars,text,useful,user_id
0,3655 Las Vegas Blvd S,"{'Alcohol': 'full_bar', 'HasTV': False, 'Noise...",4JNXUYY8wbaaDmk3BPzlWw,"[French, Steakhouses, Breakfast & Brunch, Rest...",Las Vegas,"{'Monday': '7:00-23:00', 'Tuesday': '7:00-23:0...",1,36.112827,-115.172581,Mon Ami Gabi,...,6979,NV,0,2011-02-22,0,WE9eUYf5EV8AxJjl8QZRtA,5,"Very chic. Although, the menu items doesnt SCR...",0,K6FpHYwcJYznoXXu8ySZHw
1,3655 Las Vegas Blvd S,"{'Alcohol': 'full_bar', 'HasTV': False, 'Noise...",4JNXUYY8wbaaDmk3BPzlWw,"[French, Steakhouses, Breakfast & Brunch, Rest...",Las Vegas,"{'Monday': '7:00-23:00', 'Tuesday': '7:00-23:0...",1,36.112827,-115.172581,Mon Ami Gabi,...,6979,NV,0,2015-04-15,0,7Fl41hKa0wjO3TlvHKD3lw,4,Cute french bistro with great service. I came ...,1,3SGQKsO1J-jcRIp3WNxCeA
2,3655 Las Vegas Blvd S,"{'Alcohol': 'full_bar', 'HasTV': False, 'Noise...",4JNXUYY8wbaaDmk3BPzlWw,"[French, Steakhouses, Breakfast & Brunch, Rest...",Las Vegas,"{'Monday': '7:00-23:00', 'Tuesday': '7:00-23:0...",1,36.112827,-115.172581,Mon Ami Gabi,...,6979,NV,0,2013-11-11,0,Jm4iOfsltS3T59puoV6r8Q,5,"The food is very good, the prices fair and the...",0,LPT8XlpXlHGAp0Ri4Hu4Rw
3,3655 Las Vegas Blvd S,"{'Alcohol': 'full_bar', 'HasTV': False, 'Noise...",4JNXUYY8wbaaDmk3BPzlWw,"[French, Steakhouses, Breakfast & Brunch, Rest...",Las Vegas,"{'Monday': '7:00-23:00', 'Tuesday': '7:00-23:0...",1,36.112827,-115.172581,Mon Ami Gabi,...,6979,NV,3,2015-12-28,2,m6lwvXPCdpKHVp05Bjkhlw,5,One of my favorite go-to brunch spots on the L...,8,3NnPbhmv_vEfPTBp2pnn9Q
4,3655 Las Vegas Blvd S,"{'Alcohol': 'full_bar', 'HasTV': False, 'Noise...",4JNXUYY8wbaaDmk3BPzlWw,"[French, Steakhouses, Breakfast & Brunch, Rest...",Las Vegas,"{'Monday': '7:00-23:00', 'Tuesday': '7:00-23:0...",1,36.112827,-115.172581,Mon Ami Gabi,...,6979,NV,0,2015-09-16,0,nkb40j7kBe2qAci1xpyd3g,5,Simply amazing steak and frites. Got the blue ...,0,xl4rsQqpibUNhR8Jqxp4OQ


In [32]:
date_sorted = preprocessed_data.sort_values(by='date',ascending=False)

In [33]:
data_20150101 = date_sorted[date_sorted['date']>'2017-01-01']

In [34]:
data_20150101 = data_20150101.reset_index().drop('index',axis=1)

In [35]:
data_20150101.groupby(by=['state'])['state'].count()

state
NV    126225
Name: state, dtype: int64

In [36]:
data_20150101.shape

(126225, 22)

In [39]:
preprocessed_dataviz = data_20150101.loc[:,['business_id','user_id','text']]

In [40]:
# The data for this analysis include restaurant reviews across two states Nevada and Arizona for the past 2.5 years

In [41]:
preprocessed_dataviz.to_feather('./data/preprocessed_data')

### Tokenization 

In [7]:
import pandas as pd
import spacy
from gensim.models import Phrases
from gensim.models.word2vec import LineSentence
from gensim import models,corpora
from tqdm import tqdm,tqdm_gui
from tqdm._tqdm_notebook import tqdm_notebook

In [3]:
preprocessed_data = pd.read_feather('preprocessed_data')
preprocessed_data = preprocessed_data.reset_index().drop('index',axis=1)

In [5]:
nlp = spacy.load('en')

1. Convert the reviews into sentence streams
2. Construct a bigram model
3. Using a bigram model, construct 

Here, we want to create a stream of sentences. And use that to create our bigram model.

In [8]:
doc = nlp.pipe(preprocessed_data['text'],n_threads=-1)
text_out = []
removal = []
for review in tqdm(doc):
    review_sent = []
    for sent in review.sents:
        sentence = []
        for token in sent:        
            if token.is_alpha and token.is_stop == False and token.pos_ not in removal:
                lemma = token.lemma_
                sentence.append(lemma)
        review_sent.append(sentence)
    text_out.append(review_sent)

126225it [24:55, 84.42it/s]


In [38]:
total_text = []
for text in text_out:
    total_text +=text
common_terms = ['by' ,'in', 'of' ,'on' ,'or', 'to','the']
bigram_model = Phrases(total_text,scoring='npmi',common_terms=common_terms,threshold=0.5,min_count=100)

INFO : collecting all words and their counts
INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
INFO : PROGRESS: at sentence #10000, processed 57199 words and 38175 word types
INFO : PROGRESS: at sentence #20000, processed 115696 words and 68327 word types
INFO : PROGRESS: at sentence #30000, processed 173238 words and 94673 word types
INFO : PROGRESS: at sentence #40000, processed 231888 words and 119857 word types
INFO : PROGRESS: at sentence #50000, processed 291563 words and 143556 word types
INFO : PROGRESS: at sentence #60000, processed 350225 words and 165111 word types
INFO : PROGRESS: at sentence #70000, processed 409282 words and 186121 word types
INFO : PROGRESS: at sentence #80000, processed 467365 words and 205914 word types
INFO : PROGRESS: at sentence #90000, processed 526147 words and 225305 word types
INFO : PROGRESS: at sentence #100000, processed 584434 words and 244128 word types
INFO : PROGRESS: at sentence #110000, processed 642385 words and 26214

INFO : PROGRESS: at sentence #980000, processed 5746383 words and 1298087 word types
INFO : PROGRESS: at sentence #990000, processed 5804679 words and 1307425 word types
INFO : PROGRESS: at sentence #1000000, processed 5865678 words and 1317433 word types
INFO : PROGRESS: at sentence #1010000, processed 5926033 words and 1327325 word types
INFO : PROGRESS: at sentence #1020000, processed 5984415 words and 1336298 word types
INFO : PROGRESS: at sentence #1030000, processed 6043695 words and 1345358 word types
INFO : PROGRESS: at sentence #1040000, processed 6102974 words and 1354640 word types
INFO : collected 1361489 word types from a corpus of 6146670 words (unigram + bigrams) and 1047276 sentences
INFO : using 1361489 counts as vocab in Phrases<0 vocab, min_count=100, threshold=0.5, max_vocab_size=40000000>


1. Words at review level

In [39]:
processed_review =[]
for text in text_out:
    review =[]
    for t in text:
        review +=t
    processed_review.append(review)
processed_bigram_reviews = [ bigram_model[review] for review in tqdm(processed_review)]



  0%|          | 539/126225 [00:00<00:23, 5367.84it/s][A
  1%|          | 1092/126225 [00:00<00:22, 5444.56it/s][A
  1%|▏         | 1625/126225 [00:00<00:23, 5406.53it/s][A
  2%|▏         | 2124/126225 [00:00<00:23, 5299.37it/s][A
  2%|▏         | 2664/126225 [00:00<00:23, 5318.71it/s][A
  3%|▎         | 3227/126225 [00:00<00:22, 5370.53it/s][A
  3%|▎         | 3772/126225 [00:00<00:22, 5380.53it/s][A
  3%|▎         | 4276/126225 [00:00<00:22, 5333.14it/s][A
  4%|▍         | 4789/126225 [00:00<00:22, 5308.70it/s][A
  4%|▍         | 5309/126225 [00:01<00:22, 5296.71it/s][A
  5%|▍         | 5816/126225 [00:01<00:22, 5255.62it/s][A
  5%|▌         | 6317/126225 [00:01<00:22, 5213.55it/s][A
  5%|▌         | 6846/126225 [00:01<00:22, 5217.86it/s][A
  6%|▌         | 7364/126225 [00:01<00:22, 5216.31it/s][A
  6%|▌         | 7873/126225 [00:01<00:22, 5199.15it/s][A
  7%|▋         | 8378/126225 [00:01<00:22, 5183.42it/s][A
  7%|▋         | 8934/126225 [00:01<00:22, 5206.48it/s]

 54%|█████▍    | 68075/126225 [00:13<00:11, 5033.41it/s][A
 54%|█████▍    | 68564/126225 [00:13<00:11, 5032.24it/s][A
 55%|█████▍    | 69073/126225 [00:13<00:11, 5032.64it/s][A
 55%|█████▌    | 69568/126225 [00:13<00:11, 5031.97it/s][A
 56%|█████▌    | 70101/126225 [00:13<00:11, 5034.08it/s][A
 56%|█████▌    | 70619/126225 [00:14<00:11, 5035.11it/s][A
 56%|█████▋    | 71129/126225 [00:14<00:10, 5035.22it/s][A
 57%|█████▋    | 71638/126225 [00:14<00:10, 5033.32it/s][A
 57%|█████▋    | 72137/126225 [00:14<00:10, 5033.03it/s][A
 58%|█████▊    | 72647/126225 [00:14<00:10, 5033.47it/s][A
 58%|█████▊    | 73149/126225 [00:14<00:10, 5033.01it/s][A
 58%|█████▊    | 73691/126225 [00:14<00:10, 5035.67it/s][A
 59%|█████▉    | 74230/126225 [00:14<00:10, 5038.24it/s][A
 59%|█████▉    | 74752/126225 [00:14<00:10, 5034.59it/s][A
 60%|█████▉    | 75254/126225 [00:14<00:10, 5033.82it/s][A
 60%|██████    | 75759/126225 [00:15<00:10, 5033.83it/s][A
 60%|██████    | 76261/126225 [00:15<00:

In [40]:
processed_bigram_reviews[100]

['ichiza',
 'good',
 'the',
 'thing',
 '-PRON-',
 'like',
 'wagyu',
 'yakitori',
 '-PRON-',
 'order',
 'yakitori',
 'chicken',
 'thigh',
 'wagyu',
 'tongue',
 'ok',
 'wagyu_beef',
 'good',
 'pork_belly',
 'meh',
 'duck',
 'meh',
 'grill',
 'octopus',
 'ok',
 '-PRON-',
 'will',
 'come']

In [41]:
# filtering for stop words
from spacy.lang.en.stop_words import STOP_WORDS 
STOP_WORDS

{'a',
 'about',
 'above',
 'across',
 'after',
 'afterwards',
 'again',
 'against',
 'all',
 'almost',
 'alone',
 'along',
 'already',
 'also',
 'although',
 'always',
 'am',
 'among',
 'amongst',
 'amount',
 'an',
 'and',
 'another',
 'any',
 'anyhow',
 'anyone',
 'anything',
 'anyway',
 'anywhere',
 'are',
 'around',
 'as',
 'at',
 'back',
 'be',
 'became',
 'because',
 'become',
 'becomes',
 'becoming',
 'been',
 'before',
 'beforehand',
 'behind',
 'being',
 'below',
 'beside',
 'besides',
 'between',
 'beyond',
 'both',
 'bottom',
 'but',
 'by',
 'ca',
 'call',
 'can',
 'cannot',
 'could',
 'did',
 'do',
 'does',
 'doing',
 'done',
 'down',
 'due',
 'during',
 'each',
 'eight',
 'either',
 'eleven',
 'else',
 'elsewhere',
 'empty',
 'enough',
 'even',
 'ever',
 'every',
 'everyone',
 'everything',
 'everywhere',
 'except',
 'few',
 'fifteen',
 'fifty',
 'first',
 'five',
 'for',
 'former',
 'formerly',
 'forty',
 'four',
 'from',
 'front',
 'full',
 'further',
 'get',
 'give',
 'g

In [51]:
final_bigrams = []
stopwordlist = list(STOP_WORDS)
for r in tqdm(processed_bigram_reviews):
    review = []
    for word in r:
        if word not in stopwordlist and word != '-PRON-':
            review.append(word)
    final_bigrams.append(review)
            
        


  0%|          | 0/126225 [00:00<?, ?it/s][A
  0%|          | 417/126225 [00:00<00:30, 4118.68it/s][A
  1%|          | 976/126225 [00:00<00:25, 4838.65it/s][A
  1%|          | 1541/126225 [00:00<00:24, 5105.13it/s][A
  2%|▏         | 2072/126225 [00:00<00:24, 5156.09it/s][A
  2%|▏         | 2646/126225 [00:00<00:23, 5270.23it/s][A
  3%|▎         | 3253/126225 [00:00<00:22, 5402.76it/s][A
  3%|▎         | 3835/126225 [00:00<00:22, 5462.48it/s][A
  3%|▎         | 4363/126225 [00:00<00:22, 5438.83it/s][A
  4%|▍         | 4908/126225 [00:00<00:22, 5439.72it/s][A
  4%|▍         | 5455/126225 [00:01<00:22, 5442.07it/s][A
  5%|▍         | 5991/126225 [00:01<00:22, 5419.83it/s][A
  5%|▌         | 6523/126225 [00:01<00:22, 5385.45it/s][A
  6%|▌         | 7118/126225 [00:01<00:21, 5424.74it/s][A
  6%|▌         | 7661/126225 [00:01<00:21, 5415.14it/s][A
  7%|▋         | 8210/126225 [00:01<00:21, 5419.62it/s][A
  7%|▋         | 8799/126225 [00:01<00:21, 5448.34it/s][A
  7%|▋     

 61%|██████    | 76442/126225 [00:14<00:09, 5423.74it/s][A
 61%|██████    | 76984/126225 [00:14<00:09, 5421.89it/s][A
 61%|██████▏   | 77564/126225 [00:14<00:08, 5424.42it/s][A
 62%|██████▏   | 78172/126225 [00:14<00:08, 5428.44it/s][A
 62%|██████▏   | 78781/126225 [00:14<00:08, 5432.94it/s][A
 63%|██████▎   | 79358/126225 [00:14<00:08, 5433.41it/s][A
 63%|██████▎   | 79927/126225 [00:14<00:08, 5430.04it/s][A
 64%|██████▍   | 80515/126225 [00:14<00:08, 5432.98it/s][A
 64%|██████▍   | 81075/126225 [00:14<00:08, 5430.74it/s][A
 65%|██████▍   | 81621/126225 [00:15<00:08, 5429.43it/s][A
 65%|██████▌   | 82161/126225 [00:15<00:08, 5427.63it/s][A
 66%|██████▌   | 82734/126225 [00:15<00:08, 5429.46it/s][A
 66%|██████▌   | 83282/126225 [00:15<00:07, 5429.81it/s][A
 66%|██████▋   | 83828/126225 [00:15<00:07, 5428.99it/s][A
 67%|██████▋   | 84380/126225 [00:15<00:07, 5429.46it/s][A
 67%|██████▋   | 84959/126225 [00:15<00:07, 5431.69it/s][A
 68%|██████▊   | 85514/126225 [00:15<00:

In [53]:
final_bigrams[100]

['ichiza',
 'good',
 'thing',
 'like',
 'wagyu',
 'yakitori',
 'order',
 'yakitori',
 'chicken',
 'thigh',
 'wagyu',
 'tongue',
 'ok',
 'wagyu_beef',
 'good',
 'pork_belly',
 'meh',
 'duck',
 'meh',
 'grill',
 'octopus',
 'ok',
 'come']

In [54]:
dictionary = corpora.Dictionary(final_bigrams)
doc_term_matrix = [dictionary.doc2bow(doc) for doc in tqdm(final_bigrams)]

INFO : adding document #0 to Dictionary(0 unique tokens: [])
INFO : adding document #10000 to Dictionary(18314 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #20000 to Dictionary(26863 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #30000 to Dictionary(34093 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #40000 to Dictionary(40726 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #50000 to Dictionary(47229 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #60000 to Dictionary(52963 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #70000 to Dictionary(58508 unique tokens: ['amazing', 'bartender', 'busy', 'butt', 'customer']...)
INFO : adding document #80000 to Dictionary(63699 unique tokens: ['amazing', 'bartend

In [55]:
import logging
logging.basicConfig(format='%(levelname)s : %(message)s', level=logging.DEBUG)

In [56]:
##### Step-3 : Define multicore lda model and enjoy!!!
num_topics =50
Lda = models.LdaMulticore
lda= Lda(doc_term_matrix, num_topics=num_topics,id2word = dictionary, 
         passes=20,chunksize=4000,random_state=42)

INFO : using symmetric alpha at 0.02
INFO : using symmetric eta at 0.02
INFO : using serial LDA version on this node
INFO : running online LDA training, 50 topics, 20 passes over the supplied corpus of 126225 documents, updating every 140000 documents, evaluating every ~126225 documents, iterating 50x with a convergence threshold of 0.001000
INFO : training LDA model using 35 processes
DEBUG : worker process entering E-step loop
DEBUG : getting a new job
DEBUG : getting a new job
DEBUG : worker process entering E-step loop
DEBUG : worker process entering E-step loop
DEBUG : getting a new job
DEBUG : getting a new job
DEBUG : worker process entering E-step loop
DEBUG : worker process entering E-step loop
DEBUG : getting a new job
DEBUG : worker process entering E-step loop
DEBUG : getting a new job
DEBUG : worker process entering E-step loop
DEBUG : worker process entering E-step loop
DEBUG : getting a new job
DEBUG : getting a new job
DEBUG : worker process entering E-step loop
DEBUG :

DEBUG : getting a new job
DEBUG : result put
DEBUG : processing chunk #17 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #18 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 2146/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : processing chunk #19 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #20 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 2133/4000 documents converged within 50 iterations
DEBUG : getting a new job
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 2087/4000 documents converged within 50 iterations
DEBUG : processing chunk #21 of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : performing inference on a chunk of 4000 documents
DEBUG

INFO : PROGRESS: pass 1, dispatched chunk #10 = documents up to #44000/126225, outstanding queue size 11
INFO : PROGRESS: pass 1, dispatched chunk #11 = documents up to #48000/126225, outstanding queue size 12
INFO : PROGRESS: pass 1, dispatched chunk #12 = documents up to #52000/126225, outstanding queue size 13
INFO : PROGRESS: pass 1, dispatched chunk #13 = documents up to #56000/126225, outstanding queue size 14
DEBUG : processing chunk #0 of 4000 documents
INFO : PROGRESS: pass 1, dispatched chunk #14 = documents up to #60000/126225, outstanding queue size 15
DEBUG : performing inference on a chunk of 4000 documents
INFO : PROGRESS: pass 1, dispatched chunk #15 = documents up to #64000/126225, outstanding queue size 16
INFO : PROGRESS: pass 1, dispatched chunk #16 = documents up to #68000/126225, outstanding queue size 17
INFO : PROGRESS: pass 1, dispatched chunk #17 = documents up to #72000/126225, outstanding queue size 18
INFO : PROGRESS: pass 1, dispatched chunk #18 = document

DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 2593/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 2590/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 2558/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 2612/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 2567/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 2617/4000 documents converged within 50 iterations
DEBUG : 2584/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : r

DEBUG : getting a new job
DEBUG : processed chunk, queuing the result
DEBUG : processing chunk #12 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : result put
DEBUG : 2673/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #13 of 4000 documents
DEBUG : processing chunk #14 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : getting a new job
DEBUG : processing chunk #15 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 2667/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #16 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : getting a new job
DEBUG : 2615/4000 documents converged within 50 iterations
DEBUG

INFO : PROGRESS: pass 3, dispatched chunk #6 = documents up to #28000/126225, outstanding queue size 7
INFO : PROGRESS: pass 3, dispatched chunk #7 = documents up to #32000/126225, outstanding queue size 8
DEBUG : processing chunk #0 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
INFO : PROGRESS: pass 3, dispatched chunk #8 = documents up to #36000/126225, outstanding queue size 9
INFO : PROGRESS: pass 3, dispatched chunk #9 = documents up to #40000/126225, outstanding queue size 10
INFO : PROGRESS: pass 3, dispatched chunk #10 = documents up to #44000/126225, outstanding queue size 11
INFO : PROGRESS: pass 3, dispatched chunk #11 = documents up to #48000/126225, outstanding queue size 12
INFO : PROGRESS: pass 3, dispatched chunk #12 = documents up to #52000/126225, outstanding queue size 13
INFO : PROGRESS: pass 3, dispatched chunk #13 = documents up to #56000/126225, outstanding queue size 14
DEBUG : processing chunk #1 of 4000 documents
DEBUG : performin

DEBUG : result put
DEBUG : getting a new job
DEBUG : 2841/4000 documents converged within 50 iterations
DEBUG : processing chunk #30 of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #31 of 2225 documents
DEBUG : performing inference on a chunk of 2225 documents
DEBUG : 2837/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 2873/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 2840/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : 2810/4000 documents converged within 50 iterations
DEBUG : getting a new job
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG

DEBUG : result put
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 2954/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 2932/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #10 of 4000 documents
DEBUG : processing chunk #11 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 2973/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #12 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : result put
DEBUG : 2921/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : processing chunk #13 of 400

INFO : PROGRESS: pass 5, dispatched chunk #2 = documents up to #12000/126225, outstanding queue size 3
INFO : PROGRESS: pass 5, dispatched chunk #3 = documents up to #16000/126225, outstanding queue size 4
INFO : PROGRESS: pass 5, dispatched chunk #4 = documents up to #20000/126225, outstanding queue size 5
INFO : PROGRESS: pass 5, dispatched chunk #5 = documents up to #24000/126225, outstanding queue size 6
INFO : PROGRESS: pass 5, dispatched chunk #6 = documents up to #28000/126225, outstanding queue size 7
INFO : PROGRESS: pass 5, dispatched chunk #7 = documents up to #32000/126225, outstanding queue size 8
INFO : PROGRESS: pass 5, dispatched chunk #8 = documents up to #36000/126225, outstanding queue size 9
INFO : PROGRESS: pass 5, dispatched chunk #9 = documents up to #40000/126225, outstanding queue size 10
DEBUG : processing chunk #0 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
INFO : PROGRESS: pass 5, dispatched chunk #10 = documents up to #44000/

DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3061/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #29 of 4000 documents
DEBUG : processing chunk #30 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3116/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : 3063/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : processing chunk #31 of 2225 documents
DEBUG : performing inference on a chunk of 2225 documents
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3059/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3075/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing t

DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3200/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #8 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #9 of 4000 documents
DEBUG : result put
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3131/4000 documents converged within 50 iterations
DEBUG : 3114/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3151/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #10 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #11 of 4000 

INFO : topic diff=0.221716, rho=0.161047
DEBUG : bound: at document #0
INFO : -7.676 per-word bound, 204.5 perplexity estimate based on a held-out corpus of 2225 documents with 88742 words
INFO : PROGRESS: pass 7, dispatched chunk #0 = documents up to #4000/126225, outstanding queue size 1
INFO : PROGRESS: pass 7, dispatched chunk #1 = documents up to #8000/126225, outstanding queue size 2
INFO : PROGRESS: pass 7, dispatched chunk #2 = documents up to #12000/126225, outstanding queue size 3
INFO : PROGRESS: pass 7, dispatched chunk #3 = documents up to #16000/126225, outstanding queue size 4
INFO : PROGRESS: pass 7, dispatched chunk #4 = documents up to #20000/126225, outstanding queue size 5
INFO : PROGRESS: pass 7, dispatched chunk #5 = documents up to #24000/126225, outstanding queue size 6
INFO : PROGRESS: pass 7, dispatched chunk #6 = documents up to #28000/126225, outstanding queue size 7
INFO : PROGRESS: pass 7, dispatched chunk #7 = documents up to #32000/126225, outstanding qu

DEBUG : 3253/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #25 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #26 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3227/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #27 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #28 of 4000 documents
DEBUG : 3233/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3243/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
D

DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #4 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #5 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #6 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #7 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3266/4000 documents converged within 50 iterations
DEBUG : 3358/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3256/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #8 of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBU

INFO : topic #19 (0.020): 0.046*"burger" + 0.023*"place" + 0.023*"good" + 0.017*"fry" + 0.014*"come" + 0.013*"like" + 0.012*"order" + 0.009*"fish" + 0.009*"food" + 0.009*"try"
INFO : topic #8 (0.020): 0.016*"time" + 0.015*"food" + 0.015*"good" + 0.010*"service" + 0.010*"birthday" + 0.010*"place" + 0.009*"restaurant" + 0.009*"favorite" + 0.009*"come" + 0.009*"great"
INFO : topic diff=0.218097, rho=0.157026
DEBUG : bound: at document #0
INFO : -7.633 per-word bound, 198.5 perplexity estimate based on a held-out corpus of 2225 documents with 88742 words
INFO : PROGRESS: pass 9, dispatched chunk #0 = documents up to #4000/126225, outstanding queue size 1
INFO : PROGRESS: pass 9, dispatched chunk #1 = documents up to #8000/126225, outstanding queue size 2
INFO : PROGRESS: pass 9, dispatched chunk #2 = documents up to #12000/126225, outstanding queue size 3
INFO : PROGRESS: pass 9, dispatched chunk #3 = documents up to #16000/126225, outstanding queue size 4
INFO : PROGRESS: pass 9, dispatch

DEBUG : processing chunk #25 of 4000 documents
DEBUG : processing chunk #26 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3316/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : processing chunk #27 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #28 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #29 of 4000 documents
DEBUG : 3336/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #30 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #31 of 2225 documents
DEBUG : processed chunk, queuing the result
DEBUG : performing inference on a chunk o

INFO : PROGRESS: pass 10, dispatched chunk #29 = documents up to #120000/126225, outstanding queue size 30
INFO : PROGRESS: pass 10, dispatched chunk #30 = documents up to #124000/126225, outstanding queue size 31
INFO : PROGRESS: pass 10, dispatched chunk #31 = documents up to #126225/126225, outstanding queue size 32
DEBUG : processing chunk #3 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #4 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #5 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #6 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #7 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3447/4000 documents converged within 50 iterations
DEBUG : processing chunk #8 of 4000 documents
DEBUG : performing inference on a chunk of 4000 docume

INFO : topic #45 (0.020): 0.036*"food" + 0.022*"service" + 0.020*"amazing" + 0.019*"time" + 0.017*"good" + 0.017*"place" + 0.016*"come" + 0.012*"great" + 0.009*"wait" + 0.008*"restaurant"
INFO : topic #35 (0.020): 0.048*"order" + 0.018*"cheese" + 0.017*"chicken" + 0.017*"time" + 0.015*"sandwich" + 0.013*"food" + 0.012*"fry" + 0.012*"good" + 0.010*"come" + 0.009*"like"
INFO : topic #23 (0.020): 0.021*"food" + 0.018*"table" + 0.015*"order" + 0.014*"good" + 0.012*"drink" + 0.012*"ask" + 0.012*"come" + 0.012*"seat" + 0.011*"place" + 0.011*"great"
INFO : topic #8 (0.020): 0.016*"time" + 0.015*"birthday" + 0.014*"food" + 0.014*"good" + 0.010*"service" + 0.009*"place" + 0.009*"favorite" + 0.009*"restaurant" + 0.009*"come" + 0.008*"great"
INFO : topic diff=0.211772, rho=0.153292
DEBUG : bound: at document #0
INFO : -7.599 per-word bound, 193.9 perplexity estimate based on a held-out corpus of 2225 documents with 88742 words
INFO : PROGRESS: pass 11, dispatched chunk #0 = documents up to #4000/

DEBUG : getting a new job
DEBUG : processing chunk #23 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3388/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #24 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3374/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : 3419/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #25 of 4000 documents
DEBUG : 3447/4000 documents converged within 50 iterations
DEBUG : getting a new job
DEBUG : processing chunk #26 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG :

INFO : PROGRESS: pass 12, dispatched chunk #25 = documents up to #104000/126225, outstanding queue size 26
INFO : PROGRESS: pass 12, dispatched chunk #26 = documents up to #108000/126225, outstanding queue size 27
INFO : PROGRESS: pass 12, dispatched chunk #27 = documents up to #112000/126225, outstanding queue size 28DEBUG : processing chunk #3 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents

INFO : PROGRESS: pass 12, dispatched chunk #28 = documents up to #116000/126225, outstanding queue size 29
INFO : PROGRESS: pass 12, dispatched chunk #29 = documents up to #120000/126225, outstanding queue size 30
INFO : PROGRESS: pass 12, dispatched chunk #30 = documents up to #124000/126225, outstanding queue size 31
INFO : PROGRESS: pass 12, dispatched chunk #31 = documents up to #126225/126225, outstanding queue size 32
DEBUG : processing chunk #4 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : performing inference on a chunk of 

DEBUG : getting a new job
DEBUG : 3341/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3438/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : updating topics
INFO : topic #34 (0.020): 0.036*"vegan" + 0.017*"great" + 0.015*"place" + 0.015*"good" + 0.011*"food" + 0.011*"time" + 0.010*"service" + 0.008*"enjoy" + 0.008*"delicious" + 0.008*"meal"
INFO : topic #42 (0.020): 0.029*"order" + 0.023*"ask" + 0.023*"tell" + 0.019*"manager" + 0.013*"come" + 0.013*"food" + 0.011*"want" + 0.010*"bad" + 0.009*"like" + 0.009*"charge"
INFO : topic #23 (0.020): 0.021*"food" + 0.019*"table" + 0.014*"order" + 0.014*"good" + 0.014*"drink" + 0.014*"seat" + 0.012*"ask" + 0.012*"come" + 0.011*"place" + 0.010*"great"
INFO : topic #24 (0.020): 0.044*"good" + 0.032*"food" + 0.017*"place" + 0.014*"time" + 0.013*"come" + 0.010*"eat

DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #20 of 4000 documents
DEBUG : processing chunk #21 of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3445/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #22 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #23 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3460/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : 3466/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #24 of 4000 documents


INFO : PROGRESS: pass 14, dispatched chunk #21 = documents up to #88000/126225, outstanding queue size 22
INFO : PROGRESS: pass 14, dispatched chunk #22 = documents up to #92000/126225, outstanding queue size 23
INFO : PROGRESS: pass 14, dispatched chunk #23 = documents up to #96000/126225, outstanding queue size 24
INFO : PROGRESS: pass 14, dispatched chunk #24 = documents up to #100000/126225, outstanding queue size 25DEBUG : processing chunk #2 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents

INFO : PROGRESS: pass 14, dispatched chunk #25 = documents up to #104000/126225, outstanding queue size 26
INFO : PROGRESS: pass 14, dispatched chunk #26 = documents up to #108000/126225, outstanding queue size 27
INFO : PROGRESS: pass 14, dispatched chunk #27 = documents up to #112000/126225, outstanding queue size 28
INFO : PROGRESS: pass 14, dispatched chunk #28 = documents up to #116000/126225, outstanding queue size 29
INFO : PROGRESS: pass 14, dispatched chunk 

DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3522/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 3450/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : 3488/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3422/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : 3435/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : result put
DEBUG : getting a new job
DEBUG : updating topics
INFO : topic #19 (0.020): 0.061*"burger" + 0.025*"good" + 0.023*"fry" + 0.021*"place" + 0.013*"like" + 0.013*"come" + 0.012*"order" + 0.010*"fish" 

DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #17 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3499/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #18 of 4000 documents
DEBUG : 3502/4000 documents converged within 50 iterations
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #19 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3506/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #20 of 4000 documents
DEBUG : processing chunk #21 of 4000 documents
DEBUG : result put
DEBUG : perf

DEBUG : processing chunk #1 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
INFO : PROGRESS: pass 16, dispatched chunk #18 = documents up to #76000/126225, outstanding queue size 19
INFO : PROGRESS: pass 16, dispatched chunk #19 = documents up to #80000/126225, outstanding queue size 20
INFO : PROGRESS: pass 16, dispatched chunk #20 = documents up to #84000/126225, outstanding queue size 21
INFO : PROGRESS: pass 16, dispatched chunk #21 = documents up to #88000/126225, outstanding queue size 22
INFO : PROGRESS: pass 16, dispatched chunk #22 = documents up to #92000/126225, outstanding queue size 23
INFO : PROGRESS: pass 16, dispatched chunk #23 = documents up to #96000/126225, outstanding queue size 24
INFO : PROGRESS: pass 16, dispatched chunk #24 = documents up to #100000/126225, outstanding queue size 25
INFO : PROGRESS: pass 16, dispatched chunk #25 = documents up to #104000/126225, outstanding queue size 26
DEBUG : processing chunk #2 of 4000 documents


DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3486/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3541/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 1943/2225 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 3519/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3523/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3487/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 3475/4000 docum

DEBUG : 3554/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #15 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3563/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #16 of 4000 documents
DEBUG : getting a new job
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3564/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #17 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3525/4000 documents converged within 50 iterations
DEBUG : processing chunk #18 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : gettin

INFO : PROGRESS: pass 18, dispatched chunk #13 = documents up to #56000/126225, outstanding queue size 14
DEBUG : processing chunk #1 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
INFO : PROGRESS: pass 18, dispatched chunk #14 = documents up to #60000/126225, outstanding queue size 15
INFO : PROGRESS: pass 18, dispatched chunk #15 = documents up to #64000/126225, outstanding queue size 16
INFO : PROGRESS: pass 18, dispatched chunk #16 = documents up to #68000/126225, outstanding queue size 17
INFO : PROGRESS: pass 18, dispatched chunk #17 = documents up to #72000/126225, outstanding queue size 18
INFO : PROGRESS: pass 18, dispatched chunk #18 = documents up to #76000/126225, outstanding queue size 19
INFO : PROGRESS: pass 18, dispatched chunk #19 = documents up to #80000/126225, outstanding queue size 20
INFO : PROGRESS: pass 18, dispatched chunk #20 = documents up to #84000/126225, outstanding queue size 21
INFO : PROGRESS: pass 18, dispatched chunk #21 =

DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #31 of 2225 documents
DEBUG : performing inference on a chunk of 2225 documents
DEBUG : processed chunk, queuing the result
DEBUG : 3570/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3540/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : 3561/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : result put
DEBUG : getting a new job
DEBUG : getting a new job
DEBUG : 3530/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3541/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : processed chunk, queuing the result
DEBUG : getting a new job
DEBUG : 3591/4000 documents converged within 50 iterations
DEBUG : result put
DEBUG : proces

DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3557/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : 3552/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : processing chunk #13 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : processing chunk #14 of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3572/4000 documents converged within 50 iterations
DEBUG : processed chunk, queuing the result
DEBUG : result put
DEBUG : getting a new job
DEBUG : processing chunk #15 of 4000 documents
DEBUG : processing chunk #16 of 4000 documents
DEBUG : processed chunk, queuing the result
DEBUG : performing inference on a chunk of 4000 documents
DEBUG : 3563/4000 documents converged within 50 iterations
D

In [57]:
lda.save('lda_final2')
dictionary.save('dictionary2')
corpora.MmCorpus.serialize('doc_term_matrix.mm2', doc_term_matrix)

INFO : saving LdaState object under lda_final2.state, separately None
DEBUG : {'kw': {}, 'mode': 'wb', 'uri': 'lda_final2.state'}
DEBUG : encoding_wrapper: {'errors': 'strict', 'encoding': None, 'mode': 'wb', 'fileobj': <_io.BufferedWriter name='lda_final2.state'>}
INFO : saved lda_final2.state
DEBUG : {'kw': {}, 'mode': 'wb', 'uri': 'lda_final2.id2word'}
DEBUG : encoding_wrapper: {'errors': 'strict', 'encoding': None, 'mode': 'wb', 'fileobj': <_io.BufferedWriter name='lda_final2.id2word'>}
INFO : saving LdaMulticore object under lda_final2, separately ['expElogbeta', 'sstats']
INFO : storing np array 'expElogbeta' to lda_final2.expElogbeta.npy
INFO : not storing attribute dispatcher
INFO : not storing attribute id2word
INFO : not storing attribute state
DEBUG : {'kw': {}, 'mode': 'wb', 'uri': 'lda_final2'}
DEBUG : encoding_wrapper: {'errors': 'strict', 'encoding': None, 'mode': 'wb', 'fileobj': <_io.BufferedWriter name='lda_final2'>}
INFO : saved lda_final2
INFO : saving Dictionary ob