In [25]:
from __future__ import print_function, division
import pandas as pd 
import numpy as np
# gensim
from gensim import corpora, models, similarities, matutils
# sklearn
from sklearn import datasets
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.cluster import KMeans
# logging for gensim (set to INFO)

from matplotlib import pyplot as plt
%matplotlib inline
plt.style.use('fivethirtyeight')


import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [2]:
dtypes = {
    'id': np.int32,
    'year': np.int32,
    'month': np.int8,
    'id': 'object',
    'url': 'object',
    'word_count': np.float16,
    'article': 'object'
    ''
}
raw_df = pd.read_csv('./data/11_17_final.csv', names=['ind', 'year', 'month', 'id', 'url', 'word_count', 'article'], dtype=dtypes, index_col=False)

In [3]:
raw_df.drop_duplicates(subset='id', inplace=True)
raw_df = raw_df.dropna().reset_index()

In [4]:
subset_size = 20000
subset_df = raw_df.groupby('year').apply(lambda x: x.sample(subset_size)).reset_index(drop=True)

In [5]:
article_text = subset_df['article']

In [6]:
# Create a CountVectorizer for parsing/counting words
count_vectorizer = CountVectorizer(ngram_range=(1, 2),  
                                   stop_words='english', token_pattern="\\b[a-z][a-z]+\\b")
count_vectorizer.fit(article_text)

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
        dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
        lowercase=True, max_df=1.0, max_features=None, min_df=1,
        ngram_range=(1, 2), preprocessor=None, stop_words='english',
        strip_accents=None, token_pattern='\\b[a-z][a-z]+\\b',
        tokenizer=None, vocabulary=None)

In [7]:
# Create the term-document matrix
# Transpose it so the terms are the rows
counts = count_vectorizer.transform(article_text).transpose()

In [8]:
# Convert sparse matrix of counts to a gensim corpus
corpus = matutils.Sparse2Corpus(counts)

In [None]:
# We need to save a mapping (dict) of row id to word (token) for later use by gensim:

id2word = dict((v, k) for k, v in count_vectorizer.vocabulary_.items())

In [None]:
# Create lda model (equivalent to "fit" in sklearn)
NUM_TOPICS= 18
lda = models.LdaModel(corpus=corpus, num_topics=NUM_TOPICS, id2word=id2word, passes=10, eval_every=None)
# lda = models.LdaMulticore(corpus=corpus, num_topics=15, id2word=id2word, passes=10)

# IMPORTANT NOTE: There's an eval_every parameter that should control how often gensim runs a perplexity evaluation,
# which is a time expensive step. It seems to me that it isn't working properly, and the only way to really
# get a speed up is to set it to None.

2018-03-08 05:14:13,260 : INFO : using symmetric alpha at 0.05555555555555555
2018-03-08 05:14:13,261 : INFO : using symmetric eta at 0.05555555555555555
2018-03-08 05:14:16,344 : INFO : using serial LDA version on this node
2018-03-08 05:15:13,752 : INFO : running online (multi-pass) LDA training, 18 topics, 10 passes over the supplied corpus of 140000 documents, updating model once every 2000 documents, evaluating perplexity every 0 documents, iterating 50x with a convergence threshold of 0.001000
2018-03-08 05:15:13,952 : INFO : PROGRESS: pass 0, at document #2000/140000
2018-03-08 05:15:25,874 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:16:20,104 : INFO : topic #1 (0.056): 0.008*"said" + 0.004*"mr" + 0.002*"year" + 0.002*"like" + 0.001*"people" + 0.001*"new" + 0.001*"years" + 0.001*"ms" + 0.001*"time" + 0.001*"just"
2018-03-08 05:16:20,333 : INFO : topic #15 (0.056): 0.007*"said" + 0.006*"mr" + 0.002*"new" + 0.002*"year" + 0.002*"like

2018-03-08 05:22:16,600 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:23:11,122 : INFO : topic #11 (0.056): 0.006*"said" + 0.002*"people" + 0.002*"mr" + 0.002*"like" + 0.002*"new" + 0.001*"ms" + 0.001*"just" + 0.001*"years" + 0.001*"time" + 0.001*"did"
2018-03-08 05:23:11,445 : INFO : topic #0 (0.056): 0.010*"said" + 0.003*"people" + 0.003*"police" + 0.003*"government" + 0.002*"killed" + 0.002*"forces" + 0.002*"military" + 0.002*"security" + 0.002*"mr" + 0.002*"al"
2018-03-08 05:23:11,684 : INFO : topic #9 (0.056): 0.005*"said" + 0.004*"new" + 0.003*"mr" + 0.003*"like" + 0.002*"company" + 0.001*"google" + 0.001*"year" + 0.001*"york" + 0.001*"new york" + 0.001*"people"
2018-03-08 05:23:11,984 : INFO : topic #10 (0.056): 0.006*"said" + 0.006*"game" + 0.004*"season" + 0.003*"points" + 0.003*"team" + 0.003*"games" + 0.002*"second" + 0.002*"play" + 0.002*"win" + 0.002*"year"
2018-03-08 05:23:12,279 : INFO : topic #3 (0.056): 0.010*"said" + 0.006

2018-03-08 05:30:03,661 : INFO : topic #10 (0.056): 0.007*"game" + 0.007*"said" + 0.004*"season" + 0.004*"team" + 0.004*"points" + 0.003*"games" + 0.003*"play" + 0.003*"coach" + 0.003*"second" + 0.002*"players"
2018-03-08 05:30:03,975 : INFO : topic #15 (0.056): 0.014*"mr" + 0.011*"said" + 0.003*"news" + 0.002*"case" + 0.002*"police" + 0.002*"court" + 0.002*"new" + 0.002*"law" + 0.002*"company" + 0.002*"did"
2018-03-08 05:30:04,320 : INFO : topic #12 (0.056): 0.008*"mr" + 0.004*"new" + 0.004*"said" + 0.003*"music" + 0.003*"like" + 0.002*"film" + 0.002*"school" + 0.002*"theater" + 0.002*"ms" + 0.002*"york"
2018-03-08 05:30:05,122 : INFO : topic diff=0.376388, rho=0.277350
2018-03-08 05:30:05,390 : INFO : PROGRESS: pass 0, at document #28000/140000
2018-03-08 05:30:17,155 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:31:11,645 : INFO : topic #1 (0.056): 0.011*"said" + 0.007*"ms" + 0.004*"like" + 0.003*"people" + 0.003*"just" + 0.003*"mr" + 0.

2018-03-08 05:36:52,576 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.004*"company" + 0.003*"million" + 0.003*"market" + 0.003*"companies" + 0.003*"billion" + 0.002*"sales" + 0.002*"new"
2018-03-08 05:36:53,393 : INFO : topic diff=0.380798, rho=0.229416
2018-03-08 05:36:53,681 : INFO : PROGRESS: pass 0, at document #40000/140000
2018-03-08 05:37:05,247 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:37:59,973 : INFO : topic #12 (0.056): 0.008*"mr" + 0.004*"new" + 0.003*"film" + 0.003*"music" + 0.003*"said" + 0.003*"like" + 0.002*"art" + 0.002*"theater" + 0.002*"movie" + 0.002*"ms"
2018-03-08 05:38:00,211 : INFO : topic #9 (0.056): 0.004*"like" + 0.004*"said" + 0.004*"company" + 0.003*"new" + 0.002*"mr" + 0.002*"online" + 0.002*"web" + 0.002*"apple" + 0.002*"use" + 0.002*"facebook"
2018-03-08 05:38:00,432 : INFO : topic #14 (0.056): 0.006*"said" + 0.005*"european" + 0.004*"bank" + 0.004*"china" + 0.003*"governmen

2018-03-08 05:43:52,304 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:44:46,836 : INFO : topic #16 (0.056): 0.027*"mr" + 0.008*"said" + 0.006*"president" + 0.005*"obama" + 0.004*"political" + 0.004*"party" + 0.003*"campaign" + 0.003*"mr obama" + 0.002*"israel" + 0.002*"election"
2018-03-08 05:44:47,223 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"new" + 0.003*"music" + 0.003*"film" + 0.003*"like" + 0.002*"said" + 0.002*"art" + 0.002*"theater" + 0.002*"movie" + 0.002*"ms"
2018-03-08 05:44:47,633 : INFO : topic #7 (0.056): 0.006*"mr" + 0.004*"north" + 0.003*"south" + 0.003*"korea" + 0.002*"said" + 0.002*"korean" + 0.002*"north korea" + 0.001*"kim" + 0.001*"south korea" + 0.001*"party"
2018-03-08 05:44:47,885 : INFO : topic #1 (0.056): 0.012*"said" + 0.007*"ms" + 0.005*"like" + 0.004*"people" + 0.003*"just" + 0.003*"time" + 0.003*"don" + 0.003*"life" + 0.003*"years" + 0.003*"school"
2018-03-08 05:44:48,132 : INFO : topic #14 (0.056): 0.005*

2018-03-08 05:51:33,224 : INFO : topic #7 (0.056): 0.004*"mr" + 0.004*"north" + 0.004*"south" + 0.003*"korea" + 0.002*"korean" + 0.002*"said" + 0.001*"north korea" + 0.001*"south korea" + 0.001*"kim" + 0.001*"french"
2018-03-08 05:51:33,561 : INFO : topic #17 (0.056): 0.007*"said" + 0.006*"city" + 0.004*"new" + 0.003*"street" + 0.003*"building" + 0.003*"mr" + 0.002*"house" + 0.002*"park" + 0.002*"like" + 0.002*"york"
2018-03-08 05:51:33,910 : INFO : topic #9 (0.056): 0.005*"like" + 0.004*"company" + 0.004*"said" + 0.003*"new" + 0.002*"online" + 0.002*"use" + 0.002*"people" + 0.002*"google" + 0.002*"mr" + 0.002*"business"
2018-03-08 05:51:34,712 : INFO : topic diff=0.374240, rho=0.176777
2018-03-08 05:51:34,974 : INFO : PROGRESS: pass 0, at document #66000/140000
2018-03-08 05:51:45,662 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:52:40,236 : INFO : topic #15 (0.056): 0.018*"mr" + 0.013*"said" + 0.004*"case" + 0.004*"court" + 0.003*"police"

2018-03-08 05:58:20,419 : INFO : topic #11 (0.056): 0.003*"article" + 0.002*"said" + 0.002*"church" + 0.002*"gay" + 0.002*"nytimes" + 0.002*"times" + 0.001*"com" + 0.001*"nytimes com" + 0.001*"news" + 0.001*"people"
2018-03-08 05:58:21,223 : INFO : topic diff=0.367322, rho=0.162221
2018-03-08 05:58:21,571 : INFO : PROGRESS: pass 0, at document #78000/140000
2018-03-08 05:58:32,162 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 05:59:26,852 : INFO : topic #11 (0.056): 0.003*"article" + 0.002*"said" + 0.002*"church" + 0.002*"gay" + 0.002*"nytimes" + 0.002*"times" + 0.001*"com" + 0.001*"nytimes com" + 0.001*"news" + 0.001*"people"
2018-03-08 05:59:27,168 : INFO : topic #17 (0.056): 0.007*"said" + 0.006*"city" + 0.004*"new" + 0.003*"street" + 0.003*"building" + 0.003*"mr" + 0.002*"park" + 0.002*"house" + 0.002*"like" + 0.002*"york"
2018-03-08 05:59:27,514 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.005*"mr" + 0.004*"new" + 0.003*"la

2018-03-08 06:05:18,856 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 06:06:13,553 : INFO : topic #5 (0.056): 0.004*"team" + 0.004*"said" + 0.004*"world" + 0.002*"sports" + 0.002*"time" + 0.002*"players" + 0.002*"cup" + 0.002*"football" + 0.002*"year" + 0.002*"like"
2018-03-08 06:06:13,919 : INFO : topic #13 (0.056): 0.006*"new" + 0.003*"said" + 0.003*"mr" + 0.002*"million" + 0.002*"new york" + 0.002*"york" + 0.002*"group" + 0.002*"media" + 0.002*"director" + 0.002*"like"
2018-03-08 06:06:14,266 : INFO : topic #14 (0.056): 0.005*"bank" + 0.005*"china" + 0.005*"european" + 0.004*"said" + 0.004*"government" + 0.003*"chinese" + 0.003*"financial" + 0.003*"europe" + 0.003*"countries" + 0.002*"central"
2018-03-08 06:06:14,565 : INFO : topic #16 (0.056): 0.026*"mr" + 0.007*"said" + 0.006*"president" + 0.005*"party" + 0.004*"political" + 0.004*"obama" + 0.003*"campaign" + 0.003*"russia" + 0.002*"mr obama" + 0.002*"clinton"
2018-03-08 06:06:14,946 : IN

2018-03-08 06:12:59,771 : INFO : topic #6 (0.056): 0.005*"said" + 0.004*"game" + 0.004*"run" + 0.004*"season" + 0.003*"mets" + 0.003*"yankees" + 0.003*"hit" + 0.002*"baseball" + 0.002*"home" + 0.002*"runs"
2018-03-08 06:13:00,204 : INFO : topic #15 (0.056): 0.021*"mr" + 0.014*"said" + 0.004*"police" + 0.004*"case" + 0.004*"court" + 0.003*"department" + 0.002*"did" + 0.002*"judge" + 0.002*"investigation" + 0.002*"law"
2018-03-08 06:13:00,394 : INFO : topic #14 (0.056): 0.006*"bank" + 0.005*"china" + 0.005*"european" + 0.004*"said" + 0.004*"government" + 0.003*"financial" + 0.003*"chinese" + 0.003*"europe" + 0.002*"countries" + 0.002*"banks"
2018-03-08 06:13:01,198 : INFO : topic diff=0.350000, rho=0.140028
2018-03-08 06:13:01,455 : INFO : PROGRESS: pass 0, at document #104000/140000
2018-03-08 06:13:11,796 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 06:14:06,495 : INFO : topic #11 (0.056): 0.003*"article" + 0.003*"church" + 0.003*"que" + 0.00

2018-03-08 06:19:43,461 : INFO : topic #7 (0.056): 0.005*"la" + 0.004*"en" + 0.004*"el" + 0.003*"los" + 0.003*"south" + 0.002*"north" + 0.002*"korea" + 0.002*"food" + 0.002*"las" + 0.001*"korean"
2018-03-08 06:19:44,265 : INFO : topic diff=0.331084, rho=0.132453
2018-03-08 06:19:44,520 : INFO : PROGRESS: pass 0, at document #116000/140000
2018-03-08 06:19:54,799 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 06:20:49,534 : INFO : topic #16 (0.056): 0.029*"mr" + 0.012*"trump" + 0.008*"mr trump" + 0.006*"president" + 0.006*"said" + 0.006*"clinton" + 0.005*"party" + 0.004*"campaign" + 0.004*"political" + 0.004*"obama"
2018-03-08 06:20:49,876 : INFO : topic #0 (0.056): 0.011*"said" + 0.004*"government" + 0.004*"people" + 0.003*"police" + 0.003*"state" + 0.003*"military" + 0.003*"islamic" + 0.003*"killed" + 0.002*"officials" + 0.002*"united"
2018-03-08 06:20:50,251 : INFO : topic #1 (0.056): 0.013*"said" + 0.007*"ms" + 0.006*"like" + 0.005*"people" 

2018-03-08 06:26:39,976 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 06:27:34,545 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"said" + 0.003*"facebook" + 0.002*"use" + 0.002*"online" + 0.002*"people" + 0.002*"technology" + 0.002*"make"
2018-03-08 06:27:34,822 : INFO : topic #5 (0.056): 0.004*"team" + 0.004*"world" + 0.003*"said" + 0.002*"sports" + 0.002*"time" + 0.002*"year" + 0.002*"players" + 0.002*"club" + 0.002*"won" + 0.002*"football"
2018-03-08 06:27:35,264 : INFO : topic #2 (0.056): 0.008*"said" + 0.005*"states" + 0.005*"united" + 0.005*"united states" + 0.004*"american" + 0.003*"officials" + 0.002*"agency" + 0.002*"power" + 0.002*"water" + 0.002*"climate"
2018-03-08 06:27:35,567 : INFO : topic #13 (0.056): 0.006*"new" + 0.003*"said" + 0.003*"mr" + 0.003*"art" + 0.002*"million" + 0.002*"museum" + 0.002*"new york" + 0.002*"york" + 0.002*"media" + 0.002*"director"
2018-03-08 06:27:36,005 : INFO : topic

2018-03-08 06:34:19,994 : INFO : topic #7 (0.056): 0.005*"korea" + 0.004*"north" + 0.003*"south" + 0.003*"north korea" + 0.003*"la" + 0.003*"food" + 0.002*"korean" + 0.002*"cooking" + 0.002*"wine" + 0.002*"kim"
2018-03-08 06:34:20,300 : INFO : topic #4 (0.056): 0.011*"percent" + 0.007*"year" + 0.006*"said" + 0.006*"company" + 0.005*"million" + 0.004*"companies" + 0.003*"market" + 0.003*"billion" + 0.002*"years" + 0.002*"business"
2018-03-08 06:34:20,786 : INFO : topic #11 (0.056): 0.004*"times" + 0.004*"article" + 0.002*"church" + 0.002*"gay" + 0.002*"news" + 0.001*"com" + 0.001*"sex" + 0.001*"nytimes" + 0.001*"comments" + 0.001*"readers"
2018-03-08 06:34:21,132 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.003*"street" + 0.003*"building" + 0.003*"mr" + 0.002*"york" + 0.002*"new york" + 0.002*"house" + 0.002*"park"
2018-03-08 06:34:21,934 : INFO : topic diff=0.323278, rho=0.119523
2018-03-08 06:34:22,261 : INFO : PROGRESS: pass 1, at document #2000/140000
20

2018-03-08 06:41:07,623 : INFO : topic #12 (0.056): 0.009*"mr" + 0.004*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.002*"art" + 0.002*"york" + 0.002*"new york"
2018-03-08 06:41:07,963 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"financial" + 0.003*"europe" + 0.003*"banks" + 0.002*"debt"
2018-03-08 06:41:08,765 : INFO : topic diff=0.295630, rho=0.117851
2018-03-08 06:41:09,021 : INFO : PROGRESS: pass 1, at document #14000/140000
2018-03-08 06:41:19,543 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 06:42:14,226 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"people" + 0.004*"military" + 0.003*"police" + 0.003*"officials" + 0.003*"security" + 0.003*"killed" + 0.002*"country" + 0.002*"state"
2018-03-08 06:42:14,687 : INFO : topic #17 (0.056): 0.007*"said" + 0.006*"city" + 0.005*"new" + 0.0

2018-03-08 06:47:55,022 : INFO : topic diff=0.292600, rho=0.117851
2018-03-08 06:47:55,283 : INFO : PROGRESS: pass 1, at document #26000/140000
2018-03-08 06:48:05,553 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 06:49:00,208 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"american" + 0.003*"officials" + 0.003*"nuclear" + 0.002*"power" + 0.002*"agency" + 0.002*"energy"
2018-03-08 06:49:00,554 : INFO : topic #5 (0.056): 0.004*"team" + 0.004*"said" + 0.003*"world" + 0.002*"players" + 0.002*"sports" + 0.002*"year" + 0.002*"time" + 0.002*"football" + 0.002*"won" + 0.002*"club"
2018-03-08 06:49:00,960 : INFO : topic #12 (0.056): 0.009*"mr" + 0.004*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.002*"york" + 0.002*"new york" + 0.002*"art"
2018-03-08 06:49:01,478 : INFO : topic #13 (0.056): 0.006*"new" + 0.003*"said" + 0.003*"mr" + 0.002*"art" + 0.00

2018-03-08 06:55:44,882 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"police" + 0.003*"security" + 0.003*"officials" + 0.003*"killed" + 0.002*"united" + 0.002*"country"
2018-03-08 06:55:45,345 : INFO : topic #11 (0.056): 0.003*"church" + 0.002*"gay" + 0.002*"article" + 0.002*"times" + 0.002*"sex" + 0.001*"com" + 0.001*"news" + 0.001*"catholic" + 0.001*"book" + 0.001*"writes"
2018-03-08 06:55:45,684 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.004*"mr" + 0.003*"law" + 0.003*"federal" + 0.003*"tax" + 0.003*"republican" + 0.003*"house" + 0.002*"people"
2018-03-08 06:55:46,004 : INFO : topic #8 (0.056): 0.008*"dr" + 0.007*"university" + 0.004*"new" + 0.003*"medical" + 0.003*"health" + 0.003*"school" + 0.003*"york" + 0.003*"study" + 0.003*"new york" + 0.002*"college"
2018-03-08 06:55:46,805 : INFO : topic diff=0.288807, rho=0.117851
2018-03-08 06:55:47,078 : INFO : PROGRESS: pass 1, at document #40000/1

2018-03-08 07:02:29,306 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.004*"mr" + 0.003*"law" + 0.003*"federal" + 0.003*"house" + 0.002*"republican" + 0.002*"health" + 0.002*"tax"
2018-03-08 07:02:29,721 : INFO : topic #10 (0.056): 0.008*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.003*"players" + 0.002*"second"
2018-03-08 07:02:30,521 : INFO : topic diff=0.278264, rho=0.117851
2018-03-08 07:02:30,861 : INFO : PROGRESS: pass 1, at document #52000/140000
2018-03-08 07:02:40,802 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:03:35,441 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"american" + 0.003*"officials" + 0.003*"nuclear" + 0.002*"agency" + 0.002*"power" + 0.002*"energy"
2018-03-08 07:03:35,646 : INFO : topic #6 (0.056): 0.006*"said" + 0.005*"game" + 0.004*"season" + 0.004*"ru

2018-03-08 07:09:13,493 : INFO : topic diff=0.273483, rho=0.117851
2018-03-08 07:09:13,747 : INFO : PROGRESS: pass 1, at document #64000/140000
2018-03-08 07:09:23,619 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:10:18,168 : INFO : topic #5 (0.056): 0.004*"team" + 0.004*"said" + 0.003*"world" + 0.002*"sports" + 0.002*"players" + 0.002*"year" + 0.002*"cup" + 0.002*"time" + 0.002*"club" + 0.002*"won"
2018-03-08 07:10:18,511 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"american" + 0.003*"officials" + 0.002*"agency" + 0.002*"oil" + 0.002*"energy" + 0.002*"power"
2018-03-08 07:10:18,853 : INFO : topic #9 (0.056): 0.005*"like" + 0.005*"company" + 0.003*"said" + 0.003*"new" + 0.003*"online" + 0.002*"use" + 0.002*"people" + 0.002*"web" + 0.002*"internet" + 0.002*"technology"
2018-03-08 07:10:19,191 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company

2018-03-08 07:17:01,466 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"million" + 0.005*"company" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"sales"
2018-03-08 07:17:01,836 : INFO : topic #9 (0.056): 0.005*"like" + 0.005*"company" + 0.003*"said" + 0.003*"new" + 0.003*"online" + 0.002*"use" + 0.002*"people" + 0.002*"technology" + 0.002*"business" + 0.002*"internet"
2018-03-08 07:17:02,249 : INFO : topic #13 (0.056): 0.007*"new" + 0.003*"mr" + 0.003*"said" + 0.003*"art" + 0.003*"new york" + 0.003*"york" + 0.003*"million" + 0.003*"museum" + 0.002*"director" + 0.002*"group"
2018-03-08 07:17:02,700 : INFO : topic #1 (0.056): 0.015*"said" + 0.007*"ms" + 0.006*"like" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.003*"don" + 0.003*"years" + 0.003*"know" + 0.003*"life"
2018-03-08 07:17:03,503 : INFO : topic diff=0.285533, rho=0.117851
2018-03-08 07:17:03,767 : INFO : PROGRESS: pass 1, at document #78000/140000
2018-03-0

2018-03-08 07:23:45,304 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"europe" + 0.003*"financial" + 0.003*"countries" + 0.002*"central"
2018-03-08 07:23:45,731 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"like" + 0.002*"theater" + 0.002*"movie" + 0.001*"art" + 0.001*"ms" + 0.001*"york"
2018-03-08 07:23:46,227 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"new" + 0.003*"health" + 0.003*"medical" + 0.003*"college" + 0.003*"students" + 0.003*"school" + 0.003*"study" + 0.002*"york"
2018-03-08 07:23:47,029 : INFO : topic diff=0.284538, rho=0.117851
2018-03-08 07:23:47,280 : INFO : PROGRESS: pass 1, at document #90000/140000
2018-03-08 07:23:57,108 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:24:51,866 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case"

2018-03-08 07:30:28,601 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"like" + 0.002*"theater" + 0.002*"movie" + 0.001*"york" + 0.001*"new york" + 0.001*"ms"
2018-03-08 07:30:29,402 : INFO : topic diff=0.279414, rho=0.117851
2018-03-08 07:30:29,653 : INFO : PROGRESS: pass 1, at document #102000/140000
2018-03-08 07:30:39,447 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:31:34,068 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"said" + 0.003*"sports" + 0.002*"players" + 0.002*"year" + 0.002*"time" + 0.002*"cup" + 0.002*"won" + 0.002*"club"
2018-03-08 07:31:34,324 : INFO : topic #16 (0.056): 0.028*"mr" + 0.006*"president" + 0.006*"said" + 0.005*"party" + 0.004*"obama" + 0.004*"political" + 0.004*"trump" + 0.003*"campaign" + 0.003*"clinton" + 0.002*"russia"
2018-03-08 07:31:34,622 : INFO : topic #4 (0.056): 0.013*"percent" + 0.007*"said" + 0.007*"year" + 0.005*"company" + 0.005*"mi

2018-03-08 07:37:20,688 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:38:15,432 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"park" + 0.002*"house"
2018-03-08 07:38:15,874 : INFO : topic #7 (0.056): 0.005*"la" + 0.004*"en" + 0.004*"el" + 0.003*"food" + 0.003*"los" + 0.002*"south" + 0.002*"restaurant" + 0.002*"north" + 0.002*"korea" + 0.002*"wine"
2018-03-08 07:38:16,177 : INFO : topic #13 (0.056): 0.007*"new" + 0.004*"art" + 0.003*"mr" + 0.003*"said" + 0.003*"million" + 0.003*"museum" + 0.003*"new york" + 0.003*"york" + 0.002*"director" + 0.002*"media"
2018-03-08 07:38:16,615 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"said" + 0.002*"sports" + 0.002*"year" + 0.002*"players" + 0.002*"time" + 0.002*"won" + 0.002*"club" + 0.002*"race"
2018-03-08 07:38:16,978 : INFO : topic #1 (0.056): 0.014*"said" + 0.007*"ms

2018-03-08 07:44:58,875 : INFO : topic #1 (0.056): 0.014*"said" + 0.007*"ms" + 0.006*"like" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"know" + 0.003*"think" + 0.003*"years"
2018-03-08 07:44:59,257 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"house" + 0.002*"park"
2018-03-08 07:44:59,522 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"european" + 0.005*"bank" + 0.004*"government" + 0.004*"chinese" + 0.003*"said" + 0.003*"financial" + 0.003*"europe" + 0.002*"britain" + 0.002*"union"
2018-03-08 07:45:00,328 : INFO : topic diff=0.291212, rho=0.117851
2018-03-08 07:45:00,593 : INFO : PROGRESS: pass 1, at document #128000/140000
2018-03-08 07:45:10,478 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:46:05,018 : INFO : topic #6 (0.056): 0.005*"said" + 0.004*"game" + 0.004*"season" + 0.003*"run" + 0.003*"m

2018-03-08 07:51:42,316 : INFO : topic #7 (0.056): 0.004*"korea" + 0.004*"food" + 0.003*"north" + 0.003*"la" + 0.003*"south" + 0.002*"korean" + 0.002*"wine" + 0.002*"north korea" + 0.002*"cooking" + 0.002*"restaurant"
2018-03-08 07:51:43,117 : INFO : topic diff=0.304457, rho=0.117851
2018-03-08 07:51:43,456 : INFO : PROGRESS: pass 1, at document #140000/140000
2018-03-08 07:51:53,236 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:52:47,874 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.003*"students" + 0.003*"new" + 0.003*"health" + 0.003*"school" + 0.003*"medical" + 0.003*"study" + 0.003*"college" + 0.002*"patients"
2018-03-08 07:52:48,298 : INFO : topic #12 (0.056): 0.008*"mr" + 0.004*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.002*"ms" + 0.002*"york" + 0.002*"new york"
2018-03-08 07:52:48,668 : INFO : topic #16 (0.056): 0.033*"mr" + 0.019*"trump" + 0.011*"mr trump" + 0.010*"pre

2018-03-08 07:58:27,947 : INFO : PROGRESS: pass 2, at document #12000/140000
2018-03-08 07:58:38,053 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 07:59:32,635 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.003*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"mets" + 0.003*"hit" + 0.002*"home" + 0.002*"runs"
2018-03-08 07:59:33,060 : INFO : topic #11 (0.056): 0.003*"times" + 0.003*"church" + 0.003*"article" + 0.002*"gay" + 0.002*"sex" + 0.002*"com" + 0.001*"news" + 0.001*"book" + 0.001*"readers" + 0.001*"writes"
2018-03-08 07:59:33,471 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"billion" + 0.004*"market" + 0.003*"companies" + 0.002*"years" + 0.002*"business"
2018-03-08 07:59:33,881 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"people" + 0.004*"military" + 0.003*"officials" + 0.003*"police" + 0.003*"security" + 0.003

2018-03-08 08:06:18,094 : INFO : topic #13 (0.056): 0.006*"new" + 0.004*"mr" + 0.003*"said" + 0.003*"art" + 0.003*"museum" + 0.002*"new york" + 0.002*"york" + 0.002*"million" + 0.002*"director" + 0.002*"group"
2018-03-08 08:06:18,553 : INFO : topic #16 (0.056): 0.030*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"trump" + 0.005*"political" + 0.004*"party" + 0.004*"obama" + 0.004*"campaign" + 0.003*"mr trump" + 0.002*"election"
2018-03-08 08:06:18,882 : INFO : topic #12 (0.056): 0.009*"mr" + 0.004*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"york" + 0.001*"new york" + 0.001*"ms"
2018-03-08 08:06:19,182 : INFO : topic #14 (0.056): 0.005*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"said" + 0.003*"chinese" + 0.003*"financial" + 0.003*"banks" + 0.003*"europe" + 0.003*"debt"
2018-03-08 08:06:19,983 : INFO : topic diff=0.260914, rho=0.117041
2018-03-08 08:06:20,244 : INFO : PROGRESS: pass 2, at document #26000/1400

2018-03-08 08:13:01,706 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"officials" + 0.003*"police" + 0.002*"killed" + 0.002*"united" + 0.002*"american"
2018-03-08 08:13:02,186 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"officials" + 0.003*"american" + 0.003*"nuclear" + 0.002*"energy" + 0.002*"power" + 0.002*"agency"
2018-03-08 08:13:02,992 : INFO : topic diff=0.256969, rho=0.117041
2018-03-08 08:13:03,257 : INFO : PROGRESS: pass 2, at document #38000/140000
2018-03-08 08:13:13,295 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 08:14:07,943 : INFO : topic #15 (0.056): 0.020*"mr" + 0.015*"said" + 0.004*"case" + 0.004*"police" + 0.004*"court" + 0.002*"did" + 0.002*"department" + 0.002*"law" + 0.002*"ms" + 0.002*"judge"
2018-03-08 08:14:08,236 : INFO : topic #17 (0.056): 0.007*"said" + 0.006*"city" + 0.005*"ne

2018-03-08 08:19:46,343 : INFO : topic diff=0.264260, rho=0.117041
2018-03-08 08:19:46,595 : INFO : PROGRESS: pass 2, at document #50000/140000
2018-03-08 08:19:56,486 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 08:20:51,003 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"film" + 0.003*"music" + 0.003*"new" + 0.002*"like" + 0.002*"theater" + 0.002*"movie" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 08:20:51,465 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"officials" + 0.003*"police" + 0.002*"killed" + 0.002*"united" + 0.002*"syria"
2018-03-08 08:20:51,818 : INFO : topic #13 (0.056): 0.007*"new" + 0.004*"mr" + 0.004*"said" + 0.003*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"million" + 0.003*"museum" + 0.002*"director" + 0.002*"group"
2018-03-08 08:20:52,257 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.004*"case" + 0.004*"police" + 0

2018-03-08 08:27:33,850 : INFO : topic #16 (0.056): 0.030*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"obama" + 0.005*"party" + 0.004*"political" + 0.003*"campaign" + 0.003*"mr obama" + 0.002*"election" + 0.002*"israel"
2018-03-08 08:27:34,271 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"police" + 0.003*"officials" + 0.003*"killed" + 0.002*"syria" + 0.002*"united"
2018-03-08 08:27:34,733 : INFO : topic #7 (0.056): 0.004*"food" + 0.003*"north" + 0.003*"south" + 0.003*"korea" + 0.002*"wine" + 0.002*"restaurant" + 0.002*"korean" + 0.001*"north korea" + 0.001*"kim" + 0.001*"meat"
2018-03-08 08:27:35,165 : INFO : topic #15 (0.056): 0.019*"mr" + 0.015*"said" + 0.004*"police" + 0.004*"case" + 0.004*"court" + 0.002*"judge" + 0.002*"did" + 0.002*"department" + 0.002*"ms" + 0.002*"law"
2018-03-08 08:27:35,968 : INFO : topic diff=0.254277, rho=0.117041
2018-03-08 08:27:36,220 : INFO : PROGRESS: pass 2, at document

2018-03-08 08:34:16,663 : INFO : topic #9 (0.056): 0.005*"like" + 0.005*"company" + 0.003*"said" + 0.003*"new" + 0.003*"online" + 0.002*"people" + 0.002*"use" + 0.002*"technology" + 0.002*"business" + 0.002*"internet"
2018-03-08 08:34:17,014 : INFO : topic #13 (0.056): 0.007*"new" + 0.004*"mr" + 0.004*"said" + 0.003*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.003*"director" + 0.002*"group"
2018-03-08 08:34:17,817 : INFO : topic diff=0.252134, rho=0.117041
2018-03-08 08:34:18,072 : INFO : PROGRESS: pass 2, at document #76000/140000
2018-03-08 08:34:27,935 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 08:35:22,560 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"house" + 0.002*"park" + 0.002*"york" + 0.002*"new york"
2018-03-08 08:35:22,841 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"european" + 0.005*"bank" + 0.004*"governmen

2018-03-08 08:41:01,150 : INFO : topic diff=0.267471, rho=0.117041
2018-03-08 08:41:01,407 : INFO : PROGRESS: pass 2, at document #88000/140000
2018-03-08 08:41:11,265 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 08:42:05,860 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"mets" + 0.003*"hit" + 0.003*"baseball" + 0.002*"runs" + 0.002*"league"
2018-03-08 08:42:06,248 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"mr" + 0.003*"law" + 0.003*"federal" + 0.002*"republican" + 0.002*"city" + 0.002*"people" + 0.002*"house"
2018-03-08 08:42:06,603 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"american" + 0.003*"officials" + 0.002*"agency" + 0.002*"oil" + 0.002*"energy" + 0.002*"water"
2018-03-08 08:42:07,045 : INFO : topic #7 (0.056): 0.005*"la" + 0.004*"en" + 0.004*"el" + 0.003*"food" + 0.003*"l

2018-03-08 08:48:48,906 : INFO : topic #10 (0.056): 0.007*"game" + 0.005*"season" + 0.005*"said" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"second" + 0.002*"players"
2018-03-08 08:48:49,262 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"mr" + 0.003*"law" + 0.003*"federal" + 0.002*"republican" + 0.002*"people" + 0.002*"city" + 0.002*"house"
2018-03-08 08:48:49,630 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"park" + 0.002*"york" + 0.002*"new york" + 0.002*"house"
2018-03-08 08:48:49,943 : INFO : topic #16 (0.056): 0.028*"mr" + 0.006*"president" + 0.006*"said" + 0.004*"party" + 0.004*"obama" + 0.004*"political" + 0.003*"campaign" + 0.003*"clinton" + 0.002*"russia" + 0.002*"mr obama"
2018-03-08 08:48:50,744 : INFO : topic diff=0.268341, rho=0.117041
2018-03-08 08:48:50,992 : INFO : PROGRESS: pass 2, at document #102000/140000
2018-

2018-03-08 08:55:31,279 : INFO : topic #11 (0.056): 0.004*"que" + 0.004*"article" + 0.003*"church" + 0.003*"times" + 0.002*"gay" + 0.002*"nytimes" + 0.002*"com" + 0.002*"book" + 0.002*"sex" + 0.001*"nytimes com"
2018-03-08 08:55:31,755 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"new" + 0.003*"students" + 0.003*"health" + 0.003*"medical" + 0.003*"school" + 0.003*"college" + 0.002*"study" + 0.002*"york"
2018-03-08 08:55:32,556 : INFO : topic diff=0.276024, rho=0.117041
2018-03-08 08:55:32,805 : INFO : PROGRESS: pass 2, at document #114000/140000
2018-03-08 08:55:42,680 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 08:56:37,346 : INFO : topic #7 (0.056): 0.005*"la" + 0.004*"en" + 0.004*"el" + 0.003*"food" + 0.003*"los" + 0.002*"restaurant" + 0.002*"wine" + 0.002*"south" + 0.002*"korea" + 0.002*"north"
2018-03-08 08:56:37,637 : INFO : topic #0 (0.056): 0.010*"said" + 0.004*"government" + 0.004*"people" + 0.003*"state" + 0.0

2018-03-08 09:02:14,442 : INFO : topic diff=0.295976, rho=0.117041
2018-03-08 09:02:14,699 : INFO : PROGRESS: pass 2, at document #126000/140000
2018-03-08 09:02:24,499 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 09:03:19,196 : INFO : topic #16 (0.056): 0.031*"mr" + 0.016*"trump" + 0.009*"mr trump" + 0.008*"president" + 0.005*"said" + 0.004*"clinton" + 0.004*"party" + 0.004*"campaign" + 0.004*"political" + 0.003*"obama"
2018-03-08 09:03:19,699 : INFO : topic #7 (0.056): 0.004*"la" + 0.004*"food" + 0.003*"korea" + 0.002*"en" + 0.002*"el" + 0.002*"south" + 0.002*"north" + 0.002*"restaurant" + 0.002*"wine" + 0.002*"los"
2018-03-08 09:03:20,067 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"house" + 0.002*"park"
2018-03-08 09:03:20,510 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"students" + 0.003*"new" + 0.00

2018-03-08 09:10:02,135 : INFO : topic #12 (0.056): 0.008*"mr" + 0.004*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.002*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 09:10:02,474 : INFO : topic #11 (0.056): 0.004*"times" + 0.004*"article" + 0.003*"church" + 0.002*"gay" + 0.002*"book" + 0.002*"news" + 0.002*"sex" + 0.001*"read" + 0.001*"com" + 0.001*"nytimes"
2018-03-08 09:10:02,832 : INFO : topic #16 (0.056): 0.033*"mr" + 0.019*"trump" + 0.011*"mr trump" + 0.010*"president" + 0.005*"said" + 0.004*"political" + 0.004*"party" + 0.004*"campaign" + 0.003*"election" + 0.003*"obama"
2018-03-08 09:10:03,235 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"house" + 0.002*"park"
2018-03-08 09:10:04,042 : INFO : topic diff=0.297258, rho=0.117041
2018-03-08 09:10:04,298 : INFO : PROGRESS: pass 2, at document #140000/140000
2018-03-08 09:

2018-03-08 09:16:47,690 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"house" + 0.002*"york" + 0.002*"new york" + 0.002*"park"
2018-03-08 09:16:48,162 : INFO : topic #13 (0.056): 0.006*"new" + 0.004*"mr" + 0.003*"said" + 0.003*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.002*"million" + 0.002*"director" + 0.002*"work"
2018-03-08 09:16:48,964 : INFO : topic diff=0.262536, rho=0.116248
2018-03-08 09:16:49,219 : INFO : PROGRESS: pass 3, at document #12000/140000
2018-03-08 09:16:59,158 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 09:17:53,770 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"online" + 0.003*"said" + 0.002*"people" + 0.002*"use" + 0.002*"facebook" + 0.002*"technology" + 0.002*"internet"
2018-03-08 09:17:54,104 : INFO : topic #7 (0.056): 0.004*"food" + 0.002*"korea" + 0.002*"north" + 0.002*"south" + 0.002

2018-03-08 09:23:32,607 : INFO : topic diff=0.264451, rho=0.116248
2018-03-08 09:23:32,866 : INFO : PROGRESS: pass 3, at document #24000/140000
2018-03-08 09:23:42,978 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 09:24:37,676 : INFO : topic #17 (0.056): 0.006*"said" + 0.006*"city" + 0.005*"new" + 0.004*"mr" + 0.004*"street" + 0.003*"building" + 0.003*"house" + 0.002*"park" + 0.002*"york" + 0.002*"new york"
2018-03-08 09:24:37,968 : INFO : topic #8 (0.056): 0.008*"dr" + 0.007*"university" + 0.004*"students" + 0.004*"new" + 0.003*"school" + 0.003*"health" + 0.003*"medical" + 0.003*"college" + 0.003*"study" + 0.002*"york"
2018-03-08 09:24:38,249 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"mets" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.002*"runs" + 0.002*"team"
2018-03-08 09:24:38,693 : INFO : topic #16 (0.056): 0.030*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"trump" + 0.004*"

2018-03-08 09:31:21,516 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"run" + 0.004*"season" + 0.003*"yankees" + 0.003*"mets" + 0.003*"hit" + 0.003*"baseball" + 0.003*"runs" + 0.002*"team"
2018-03-08 09:31:21,860 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"officials" + 0.002*"police" + 0.002*"united" + 0.002*"killed" + 0.002*"american"
2018-03-08 09:31:22,377 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 09:31:22,780 : INFO : topic #15 (0.056): 0.020*"mr" + 0.015*"said" + 0.004*"case" + 0.004*"police" + 0.004*"court" + 0.002*"did" + 0.002*"department" + 0.002*"ms" + 0.002*"law" + 0.002*"judge"
2018-03-08 09:31:23,585 : INFO : topic diff=0.252705, rho=0.116248
2018-03-08 09:31:23,868 : INFO : PROGRESS: pass 3, at document #38000/140000
2018-03-08 0

2018-03-08 09:38:04,458 : INFO : topic #4 (0.056): 0.013*"percent" + 0.009*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.002*"sales" + 0.002*"years"
2018-03-08 09:38:04,891 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"film" + 0.003*"music" + 0.003*"new" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 09:38:05,692 : INFO : topic diff=0.260265, rho=0.116248
2018-03-08 09:38:05,949 : INFO : PROGRESS: pass 3, at document #50000/140000
2018-03-08 09:38:15,794 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 09:39:10,495 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"united" + 0.004*"states" + 0.004*"united states" + 0.003*"officials" + 0.003*"american" + 0.003*"nuclear" + 0.002*"agency" + 0.002*"power" + 0.002*"energy"
2018-03-08 09:39:10,965 : INFO : topic #13 (0.056): 0.007*"new" + 0.005*"mr" + 0.004*"said" + 0.003*"

2018-03-08 09:44:48,216 : INFO : PROGRESS: pass 3, at document #62000/140000
2018-03-08 09:44:57,849 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 09:45:52,653 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"film" + 0.003*"new" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 09:45:53,070 : INFO : topic #10 (0.056): 0.008*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.003*"points" + 0.003*"play" + 0.003*"coach" + 0.003*"games" + 0.002*"players" + 0.002*"second"
2018-03-08 09:45:53,477 : INFO : topic #13 (0.056): 0.007*"new" + 0.005*"mr" + 0.004*"said" + 0.003*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.002*"director" + 0.002*"group"
2018-03-08 09:45:53,852 : INFO : topic #5 (0.056): 0.004*"team" + 0.003*"world" + 0.003*"said" + 0.002*"sports" + 0.002*"players" + 0.002*"year" + 0.002*"club" + 0.002*"cup" + 0.002*"won" + 0.002*

2018-03-08 09:52:35,016 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"new" + 0.004*"students" + 0.003*"health" + 0.003*"medical" + 0.003*"school" + 0.003*"college" + 0.003*"york" + 0.002*"new york"
2018-03-08 09:52:35,495 : INFO : topic #12 (0.056): 0.009*"mr" + 0.004*"music" + 0.003*"film" + 0.003*"new" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"ms" + 0.001*"festival" + 0.001*"dance"
2018-03-08 09:52:35,904 : INFO : topic #11 (0.056): 0.003*"article" + 0.003*"church" + 0.003*"gay" + 0.002*"book" + 0.002*"times" + 0.002*"nytimes" + 0.002*"com" + 0.002*"nytimes com" + 0.001*"sex" + 0.001*"news"
2018-03-08 09:52:36,427 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"officials" + 0.003*"american" + 0.002*"agency" + 0.002*"oil" + 0.002*"energy" + 0.002*"water"
2018-03-08 09:52:37,228 : INFO : topic diff=0.248861, rho=0.116248
2018-03-08 09:52:37,484 : INFO : PROGRESS: pass 3, at document #76000/

2018-03-08 09:59:18,910 : INFO : topic #11 (0.056): 0.004*"que" + 0.004*"article" + 0.003*"church" + 0.002*"gay" + 0.002*"times" + 0.002*"book" + 0.002*"nytimes" + 0.002*"com" + 0.001*"nytimes com" + 0.001*"news"
2018-03-08 09:59:19,323 : INFO : topic #7 (0.056): 0.004*"la" + 0.004*"food" + 0.003*"en" + 0.003*"el" + 0.002*"south" + 0.002*"los" + 0.002*"north" + 0.002*"restaurant" + 0.002*"wine" + 0.002*"korea"
2018-03-08 09:59:20,124 : INFO : topic diff=0.264153, rho=0.116248
2018-03-08 09:59:20,378 : INFO : PROGRESS: pass 3, at document #88000/140000
2018-03-08 09:59:30,234 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 10:00:24,935 : INFO : topic #10 (0.056): 0.007*"game" + 0.005*"said" + 0.005*"season" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"second" + 0.002*"players"
2018-03-08 10:00:25,245 : INFO : topic #7 (0.056): 0.005*"la" + 0.004*"en" + 0.003*"el" + 0.003*"food" + 0.003*"los" + 0.002*"sou

2018-03-08 10:06:02,774 : INFO : topic diff=0.262375, rho=0.116248
2018-03-08 10:06:03,032 : INFO : PROGRESS: pass 3, at document #100000/140000
2018-03-08 10:06:12,824 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 10:07:07,395 : INFO : topic #4 (0.056): 0.013*"percent" + 0.007*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.004*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"new"
2018-03-08 10:07:07,726 : INFO : topic #10 (0.056): 0.007*"game" + 0.005*"season" + 0.005*"said" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"second" + 0.002*"players"
2018-03-08 10:07:08,099 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"said" + 0.003*"online" + 0.002*"people" + 0.002*"use" + 0.002*"technology" + 0.002*"business" + 0.002*"companies"
2018-03-08 10:07:08,464 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"c

2018-03-08 10:13:49,093 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"students" + 0.003*"new" + 0.003*"health" + 0.003*"school" + 0.003*"medical" + 0.003*"college" + 0.002*"study" + 0.002*"york"
2018-03-08 10:13:49,487 : INFO : topic #11 (0.056): 0.004*"que" + 0.004*"article" + 0.003*"church" + 0.003*"times" + 0.002*"gay" + 0.002*"book" + 0.002*"nytimes" + 0.002*"com" + 0.002*"sex" + 0.001*"nytimes com"
2018-03-08 10:13:49,937 : INFO : topic #17 (0.056): 0.006*"city" + 0.005*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"park" + 0.002*"house"
2018-03-08 10:13:50,302 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"people" + 0.003*"federal" + 0.002*"republican" + 0.002*"city" + 0.002*"public"
2018-03-08 10:13:51,103 : INFO : topic diff=0.273053, rho=0.116248
2018-03-08 10:13:51,351 : INFO : PROGRESS: pass 3, at document #114000/140000
2018-03-0

2018-03-08 10:20:32,473 : INFO : topic #12 (0.056): 0.008*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 10:20:32,880 : INFO : topic #1 (0.056): 0.015*"said" + 0.007*"like" + 0.007*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"know" + 0.003*"years" + 0.003*"think"
2018-03-08 10:20:33,684 : INFO : topic diff=0.292884, rho=0.116248
2018-03-08 10:20:33,943 : INFO : PROGRESS: pass 3, at document #126000/140000
2018-03-08 10:20:43,738 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 10:21:38,400 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"online" + 0.002*"facebook" + 0.002*"said" + 0.002*"people" + 0.002*"use" + 0.002*"technology" + 0.002*"companies"
2018-03-08 10:21:38,804 : INFO : topic #13 (0.056): 0.007*"new" + 0.005*"mr" + 0.004*"art" + 0.003*"said" + 0.003*"york" + 0.003*"

2018-03-08 10:27:15,804 : INFO : topic diff=0.287500, rho=0.116248
2018-03-08 10:27:16,069 : INFO : PROGRESS: pass 3, at document #138000/140000
2018-03-08 10:27:25,926 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 10:28:20,744 : INFO : topic #11 (0.056): 0.004*"times" + 0.004*"article" + 0.003*"church" + 0.002*"gay" + 0.002*"book" + 0.002*"news" + 0.002*"read" + 0.002*"sex" + 0.001*"com" + 0.001*"nytimes"
2018-03-08 10:28:21,167 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"students" + 0.003*"school" + 0.003*"new" + 0.003*"health" + 0.003*"medical" + 0.003*"study" + 0.003*"college" + 0.002*"patients"
2018-03-08 10:28:21,591 : INFO : topic #1 (0.056): 0.014*"said" + 0.007*"like" + 0.007*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"know" + 0.003*"think" + 0.003*"years"
2018-03-08 10:28:21,997 : INFO : topic #16 (0.056): 0.033*"mr" + 0.019*"trump" + 0.011*"mr trump" + 0.010*"president" + 0.005*

2018-03-08 10:35:04,316 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.003*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"mets" + 0.003*"hit" + 0.002*"home" + 0.002*"runs"
2018-03-08 10:35:04,730 : INFO : topic #1 (0.056): 0.015*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"know" + 0.003*"years" + 0.003*"think"
2018-03-08 10:35:05,052 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"chinese" + 0.004*"said" + 0.003*"financial" + 0.003*"europe" + 0.003*"banks" + 0.002*"union"
2018-03-08 10:35:05,500 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"play" + 0.001*"ms" + 0.001*"york"
2018-03-08 10:35:06,303 : INFO : topic diff=0.259899, rho=0.115470
2018-03-08 10:35:06,560 : INFO : PROGRESS: pass 4, at document #12000/140000
2018-03-08 10:35:16,489 : I

2018-03-08 10:41:49,480 : INFO : topic #8 (0.056): 0.008*"dr" + 0.007*"university" + 0.004*"students" + 0.004*"new" + 0.003*"school" + 0.003*"health" + 0.003*"medical" + 0.003*"college" + 0.003*"study" + 0.002*"york"
2018-03-08 10:41:49,851 : INFO : topic #15 (0.056): 0.020*"mr" + 0.015*"said" + 0.004*"case" + 0.004*"police" + 0.004*"court" + 0.003*"did" + 0.002*"ms" + 0.002*"department" + 0.002*"law" + 0.002*"judge"
2018-03-08 10:41:50,654 : INFO : topic diff=0.261895, rho=0.115470
2018-03-08 10:41:50,912 : INFO : PROGRESS: pass 4, at document #24000/140000
2018-03-08 10:42:00,827 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 10:42:55,450 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"mets" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.002*"runs" + 0.002*"team"
2018-03-08 10:42:55,891 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"mil

2018-03-08 10:48:33,414 : INFO : topic diff=0.246733, rho=0.115470
2018-03-08 10:48:33,672 : INFO : PROGRESS: pass 4, at document #36000/140000
2018-03-08 10:48:43,645 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 10:49:38,276 : INFO : topic #4 (0.056): 0.014*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.002*"sales" + 0.002*"years"
2018-03-08 10:49:38,637 : INFO : topic #10 (0.056): 0.007*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"coach" + 0.003*"games" + 0.003*"players" + 0.002*"second"
2018-03-08 10:49:39,048 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 10:49:39,365 : INFO : topic #5 (0.056): 0.004*"team" + 0.003*"said" + 0.003*"world" + 0.002*"players" + 0.00

2018-03-08 10:56:20,870 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.004*"police" + 0.004*"case" + 0.004*"court" + 0.002*"did" + 0.002*"department" + 0.002*"judge" + 0.002*"ms" + 0.002*"law"
2018-03-08 10:56:21,293 : INFO : topic #8 (0.056): 0.008*"dr" + 0.007*"university" + 0.004*"new" + 0.004*"students" + 0.003*"school" + 0.003*"health" + 0.003*"medical" + 0.003*"college" + 0.003*"study" + 0.002*"york"
2018-03-08 10:56:21,703 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"mr" + 0.003*"law" + 0.003*"federal" + 0.003*"house" + 0.002*"tax" + 0.002*"health" + 0.002*"republican"
2018-03-08 10:56:21,935 : INFO : topic #14 (0.056): 0.005*"china" + 0.005*"european" + 0.005*"bank" + 0.005*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"europe" + 0.003*"financial" + 0.003*"banks" + 0.002*"countries"
2018-03-08 10:56:22,736 : INFO : topic diff=0.258092, rho=0.115470
2018-03-08 10:56:22,986 : INFO : PROGRESS: pass 4, at document #50000/140000
2

2018-03-08 11:03:04,485 : INFO : topic #13 (0.056): 0.007*"new" + 0.005*"mr" + 0.004*"said" + 0.003*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.002*"director" + 0.002*"group"
2018-03-08 11:03:04,911 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"new" + 0.004*"students" + 0.003*"school" + 0.003*"medical" + 0.003*"health" + 0.003*"college" + 0.002*"york" + 0.002*"new york"
2018-03-08 11:03:05,712 : INFO : topic diff=0.248660, rho=0.115470
2018-03-08 11:03:05,958 : INFO : PROGRESS: pass 4, at document #62000/140000
2018-03-08 11:03:15,635 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 11:04:10,193 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.003*"mets" + 0.003*"league" + 0.002*"runs"
2018-03-08 11:04:10,638 : INFO : topic #4 (0.056): 0.014*"percent" + 0.009*"said" + 0.007*"year" + 0.005*"company" +

2018-03-08 11:09:47,920 : INFO : topic diff=0.252949, rho=0.115470
2018-03-08 11:09:48,164 : INFO : PROGRESS: pass 4, at document #74000/140000
2018-03-08 11:09:57,831 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 11:10:52,510 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"officials" + 0.003*"security" + 0.003*"united" + 0.002*"police" + 0.002*"killed" + 0.002*"syria"
2018-03-08 11:10:52,918 : INFO : topic #12 (0.056): 0.009*"mr" + 0.004*"music" + 0.003*"film" + 0.003*"new" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"ms" + 0.001*"festival" + 0.001*"dance"
2018-03-08 11:10:53,335 : INFO : topic #10 (0.056): 0.008*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.003*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"players" + 0.002*"second"
2018-03-08 11:10:53,651 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 

2018-03-08 11:17:35,923 : INFO : topic #7 (0.056): 0.004*"la" + 0.004*"food" + 0.003*"en" + 0.003*"el" + 0.002*"south" + 0.002*"los" + 0.002*"north" + 0.002*"restaurant" + 0.002*"wine" + 0.002*"korea"
2018-03-08 11:17:36,263 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"european" + 0.005*"bank" + 0.004*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"europe" + 0.003*"financial" + 0.003*"countries" + 0.002*"union"
2018-03-08 11:17:36,694 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"united" + 0.003*"officials" + 0.003*"security" + 0.002*"police" + 0.002*"killed" + 0.002*"syria"
2018-03-08 11:17:37,122 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"said" + 0.003*"players" + 0.003*"cup" + 0.003*"sports" + 0.002*"year" + 0.002*"won" + 0.002*"club" + 0.002*"time"
2018-03-08 11:17:37,925 : INFO : topic diff=0.261982, rho=0.115470
2018-03-08 11:17:38,176 : INFO : PROGRESS: pass 4, at document #88000/14000

2018-03-08 11:24:19,289 : INFO : topic #16 (0.056): 0.028*"mr" + 0.006*"president" + 0.006*"said" + 0.004*"obama" + 0.004*"party" + 0.004*"political" + 0.003*"campaign" + 0.003*"clinton" + 0.003*"mr obama" + 0.003*"russia"
2018-03-08 11:24:19,644 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.004*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"new"
2018-03-08 11:24:20,447 : INFO : topic diff=0.260274, rho=0.115470
2018-03-08 11:24:20,698 : INFO : PROGRESS: pass 4, at document #100000/140000
2018-03-08 11:24:30,508 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 11:25:25,151 : INFO : topic #4 (0.056): 0.013*"percent" + 0.007*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.004*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"new"
2018-03-08 11:25:25,513 : INFO : topic #13 (0.056): 0.007*"new" + 0.005*"mr" + 0.004*"art" + 0

2018-03-08 11:31:01,898 : INFO : topic diff=0.269999, rho=0.115470
2018-03-08 11:31:02,156 : INFO : PROGRESS: pass 4, at document #112000/140000
2018-03-08 11:31:11,919 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 11:32:06,589 : INFO : topic #7 (0.056): 0.006*"la" + 0.004*"en" + 0.004*"el" + 0.003*"food" + 0.003*"los" + 0.002*"restaurant" + 0.002*"south" + 0.002*"wine" + 0.002*"korea" + 0.002*"north"
2018-03-08 11:32:06,956 : INFO : topic #16 (0.056): 0.029*"mr" + 0.010*"trump" + 0.006*"president" + 0.006*"mr trump" + 0.005*"said" + 0.005*"clinton" + 0.004*"party" + 0.004*"campaign" + 0.004*"obama" + 0.004*"political"
2018-03-08 11:32:07,158 : INFO : topic #6 (0.056): 0.005*"game" + 0.004*"said" + 0.004*"season" + 0.003*"run" + 0.003*"mets" + 0.003*"yankees" + 0.003*"hit" + 0.003*"baseball" + 0.002*"home" + 0.002*"runs"
2018-03-08 11:32:07,625 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"students" + 0.003*"new" + 0.003*

2018-03-08 11:38:48,477 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"united" + 0.004*"states" + 0.004*"united states" + 0.003*"water" + 0.002*"american" + 0.002*"officials" + 0.002*"agency" + 0.002*"energy" + 0.002*"power"
2018-03-08 11:38:48,945 : INFO : topic #4 (0.056): 0.012*"percent" + 0.007*"said" + 0.007*"year" + 0.006*"company" + 0.005*"million" + 0.004*"market" + 0.004*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"business"
2018-03-08 11:38:49,282 : INFO : topic #12 (0.056): 0.008*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 11:38:49,663 : INFO : topic #15 (0.056): 0.021*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case" + 0.004*"court" + 0.003*"department" + 0.003*"ms" + 0.002*"did" + 0.002*"investigation" + 0.002*"judge"
2018-03-08 11:38:50,464 : INFO : topic diff=0.290600, rho=0.115470
2018-03-08 11:38:50,796 : INFO : PROGRESS: pass 4, at document #1

2018-03-08 11:45:31,869 : INFO : topic #2 (0.056): 0.009*"said" + 0.005*"united" + 0.005*"states" + 0.005*"united states" + 0.003*"north" + 0.003*"american" + 0.002*"water" + 0.002*"officials" + 0.002*"climate" + 0.002*"nuclear"
2018-03-08 11:45:32,102 : INFO : topic #14 (0.056): 0.007*"china" + 0.005*"european" + 0.005*"chinese" + 0.004*"bank" + 0.004*"government" + 0.003*"said" + 0.003*"europe" + 0.003*"financial" + 0.003*"union" + 0.002*"britain"
2018-03-08 11:45:32,902 : INFO : topic diff=0.285398, rho=0.115470
2018-03-08 11:45:33,168 : INFO : PROGRESS: pass 4, at document #138000/140000
2018-03-08 11:45:43,115 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 11:46:37,800 : INFO : topic #16 (0.056): 0.033*"mr" + 0.019*"trump" + 0.011*"mr trump" + 0.010*"president" + 0.005*"said" + 0.004*"political" + 0.004*"party" + 0.004*"campaign" + 0.003*"election" + 0.003*"obama"
2018-03-08 11:46:38,195 : INFO : topic #3 (0.056): 0.009*"said" + 0.006*"sta

2018-03-08 11:52:15,907 : INFO : topic diff=0.254797, rho=0.114708
2018-03-08 11:52:16,167 : INFO : PROGRESS: pass 5, at document #10000/140000
2018-03-08 11:52:26,116 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 11:53:20,879 : INFO : topic #3 (0.056): 0.009*"said" + 0.006*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.003*"people" + 0.003*"house" + 0.003*"tax" + 0.002*"republican"
2018-03-08 11:53:21,180 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"online" + 0.003*"said" + 0.002*"people" + 0.002*"use" + 0.002*"facebook" + 0.002*"technology" + 0.002*"internet"
2018-03-08 11:53:21,497 : INFO : topic #11 (0.056): 0.003*"times" + 0.003*"article" + 0.003*"church" + 0.002*"gay" + 0.002*"book" + 0.002*"sex" + 0.001*"com" + 0.001*"news" + 0.001*"read" + 0.001*"readers"
2018-03-08 11:53:21,874 : INFO : topic #16 (0.056): 0.032*"mr" + 0.012*"trump" + 0.009*"president" + 0.007*"mr trump" + 0.0

2018-03-08 12:00:05,070 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"nuclear" + 0.003*"officials" + 0.003*"american" + 0.002*"energy" + 0.002*"power" + 0.002*"water"
2018-03-08 12:00:05,501 : INFO : topic #13 (0.056): 0.007*"new" + 0.005*"mr" + 0.004*"said" + 0.004*"art" + 0.003*"museum" + 0.003*"york" + 0.003*"new york" + 0.002*"million" + 0.002*"director" + 0.002*"work"
2018-03-08 12:00:05,833 : INFO : topic #11 (0.056): 0.003*"church" + 0.002*"times" + 0.002*"article" + 0.002*"gay" + 0.002*"book" + 0.002*"sex" + 0.001*"com" + 0.001*"news" + 0.001*"writes" + 0.001*"read"
2018-03-08 12:00:06,242 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.004*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"prices"
2018-03-08 12:00:07,044 : INFO : topic diff=0.259721, rho=0.114708
2018-03-08 12:00:07,302 : INFO : PROGRESS: pass 5, at document #24000/

2018-03-08 12:06:48,451 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"years" + 0.003*"know" + 0.003*"way"
2018-03-08 12:06:48,693 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"said" + 0.003*"online" + 0.003*"web" + 0.002*"people" + 0.002*"use" + 0.002*"site" + 0.002*"facebook"
2018-03-08 12:06:49,509 : INFO : topic diff=0.244821, rho=0.114708
2018-03-08 12:06:49,767 : INFO : PROGRESS: pass 5, at document #36000/140000
2018-03-08 12:06:59,732 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 12:07:54,299 : INFO : topic #7 (0.056): 0.004*"food" + 0.002*"restaurant" + 0.002*"north" + 0.002*"wine" + 0.002*"south" + 0.002*"korea" + 0.001*"korean" + 0.001*"kim" + 0.001*"la" + 0.001*"chef"
2018-03-08 12:07:54,491 : INFO : topic #14 (0.056): 0.005*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"said" + 0.00

2018-03-08 12:13:31,166 : INFO : topic diff=0.255761, rho=0.114708
2018-03-08 12:13:31,421 : INFO : PROGRESS: pass 5, at document #48000/140000
2018-03-08 12:13:41,412 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 12:14:36,171 : INFO : topic #5 (0.056): 0.004*"team" + 0.003*"world" + 0.003*"said" + 0.003*"sports" + 0.002*"players" + 0.002*"year" + 0.002*"club" + 0.002*"won" + 0.002*"race" + 0.002*"united"
2018-03-08 12:14:36,595 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.003*"house" + 0.002*"tax" + 0.002*"health" + 0.002*"people"
2018-03-08 12:14:37,000 : INFO : topic #16 (0.056): 0.031*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"obama" + 0.005*"party" + 0.004*"political" + 0.004*"campaign" + 0.003*"romney" + 0.003*"mr obama" + 0.003*"election"
2018-03-08 12:14:37,213 : INFO : topic #14 (0.056): 0.005*"china" + 0.005*"european" + 0.005*"bank" + 0.005*"government" 

2018-03-08 12:21:18,696 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.005*"mr" + 0.004*"street" + 0.003*"building" + 0.003*"house" + 0.002*"park" + 0.002*"york" + 0.002*"new york"
2018-03-08 12:21:19,089 : INFO : topic #4 (0.056): 0.013*"percent" + 0.009*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.003*"sales" + 0.002*"years"
2018-03-08 12:21:19,401 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"said" + 0.003*"new" + 0.003*"online" + 0.002*"web" + 0.002*"people" + 0.002*"use" + 0.002*"site" + 0.002*"internet"
2018-03-08 12:21:19,864 : INFO : topic #10 (0.056): 0.008*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.003*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"players" + 0.002*"second"
2018-03-08 12:21:20,663 : INFO : topic diff=0.246795, rho=0.114708
2018-03-08 12:21:20,919 : INFO : PROGRESS: pass 5, at document #62000/140000
2018-03-08

2018-03-08 12:28:01,732 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"years" + 0.003*"know" + 0.003*"way"
2018-03-08 12:28:02,004 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.003*"runs" + 0.002*"league" + 0.002*"mets"
2018-03-08 12:28:02,804 : INFO : topic diff=0.251080, rho=0.114708
2018-03-08 12:28:03,054 : INFO : PROGRESS: pass 5, at document #74000/140000
2018-03-08 12:28:12,679 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 12:29:07,476 : INFO : topic #13 (0.056): 0.008*"new" + 0.006*"mr" + 0.004*"said" + 0.004*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.003*"director" + 0.002*"group"
2018-03-08 12:29:07,799 : INFO : topic #16 (0.056): 0.030*"mr" + 0.006*"president" + 0.006*"said" + 0.005*"obama" + 0.004*"party" + 0.004*

2018-03-08 12:34:44,802 : INFO : topic diff=0.269377, rho=0.114708
2018-03-08 12:34:45,053 : INFO : PROGRESS: pass 5, at document #86000/140000
2018-03-08 12:34:54,794 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 12:35:49,523 : INFO : topic #16 (0.056): 0.029*"mr" + 0.006*"president" + 0.006*"said" + 0.005*"obama" + 0.004*"party" + 0.004*"political" + 0.003*"campaign" + 0.003*"russia" + 0.003*"mr obama" + 0.002*"election"
2018-03-08 12:35:49,843 : INFO : topic #6 (0.056): 0.005*"game" + 0.005*"said" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"hit" + 0.003*"mets" + 0.003*"baseball" + 0.002*"runs" + 0.002*"league"
2018-03-08 12:35:50,307 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.002*"city" + 0.002*"republican" + 0.002*"people" + 0.002*"house"
2018-03-08 12:35:50,694 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case" + 0.

2018-03-08 12:42:31,858 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.003*"said" + 0.002*"players" + 0.002*"cup" + 0.002*"year" + 0.002*"club" + 0.002*"won" + 0.002*"league"
2018-03-08 12:42:32,153 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"united" + 0.004*"states" + 0.004*"united states" + 0.002*"officials" + 0.002*"american" + 0.002*"agency" + 0.002*"energy" + 0.002*"water" + 0.002*"oil"
2018-03-08 12:42:32,458 : INFO : topic #16 (0.056): 0.028*"mr" + 0.006*"president" + 0.006*"said" + 0.004*"obama" + 0.004*"party" + 0.004*"political" + 0.003*"campaign" + 0.003*"clinton" + 0.003*"mr obama" + 0.003*"russia"
2018-03-08 12:42:32,895 : INFO : topic #7 (0.056): 0.007*"la" + 0.006*"en" + 0.005*"el" + 0.004*"los" + 0.003*"food" + 0.002*"se" + 0.002*"las" + 0.002*"del" + 0.002*"south" + 0.002*"restaurant"
2018-03-08 12:42:33,694 : INFO : topic diff=0.258303, rho=0.114708
2018-03-08 12:42:34,029 : INFO : PROGRESS: pass 5, at document #100000/140000
2018-03

2018-03-08 12:49:12,584 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"chinese" + 0.003*"said" + 0.003*"europe" + 0.003*"financial" + 0.002*"union" + 0.002*"countries"
2018-03-08 12:49:12,926 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.003*"people" + 0.002*"republican" + 0.002*"city" + 0.002*"public"
2018-03-08 12:49:13,728 : INFO : topic diff=0.267983, rho=0.114708
2018-03-08 12:49:13,979 : INFO : PROGRESS: pass 5, at document #112000/140000
2018-03-08 12:49:23,676 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 12:50:18,349 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"united" + 0.004*"states" + 0.004*"united states" + 0.003*"water" + 0.002*"american" + 0.002*"officials" + 0.002*"energy" + 0.002*"agency" + 0.002*"power"
2018-03-08 12:50:18,792 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*

2018-03-08 12:55:54,770 : INFO : topic diff=0.288093, rho=0.114708
2018-03-08 12:55:55,048 : INFO : PROGRESS: pass 5, at document #124000/140000
2018-03-08 12:56:04,965 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 12:56:59,638 : INFO : topic #11 (0.056): 0.004*"article" + 0.003*"times" + 0.003*"church" + 0.002*"gay" + 0.002*"que" + 0.002*"book" + 0.002*"nytimes" + 0.002*"com" + 0.002*"news" + 0.001*"nytimes com"
2018-03-08 12:56:59,878 : INFO : topic #6 (0.056): 0.004*"game" + 0.004*"said" + 0.004*"season" + 0.003*"run" + 0.003*"mets" + 0.003*"yankees" + 0.003*"hit" + 0.003*"baseball" + 0.002*"home" + 0.002*"series"
2018-03-08 12:57:00,265 : INFO : topic #17 (0.056): 0.006*"city" + 0.005*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"house" + 0.002*"park"
2018-03-08 12:57:00,633 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.002*"players" + 0.002*"

2018-03-08 13:03:42,457 : INFO : topic #9 (0.056): 0.006*"company" + 0.004*"like" + 0.003*"new" + 0.003*"facebook" + 0.003*"president trump" + 0.003*"online" + 0.003*"use" + 0.002*"people" + 0.002*"said" + 0.002*"technology"
2018-03-08 13:03:42,894 : INFO : topic #4 (0.056): 0.011*"percent" + 0.007*"said" + 0.006*"year" + 0.006*"company" + 0.005*"million" + 0.004*"companies" + 0.003*"market" + 0.003*"billion" + 0.002*"business" + 0.002*"years"
2018-03-08 13:03:43,319 : INFO : topic #1 (0.056): 0.014*"said" + 0.007*"like" + 0.007*"ms" + 0.006*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"know" + 0.003*"think" + 0.003*"years"
2018-03-08 13:03:43,668 : INFO : topic #12 (0.056): 0.008*"mr" + 0.003*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.002*"ms" + 0.001*"play" + 0.001*"series"
2018-03-08 13:03:44,468 : INFO : topic diff=0.283369, rho=0.114708
2018-03-08 13:03:44,742 : INFO : PROGRESS: pass 5, at document #138000/140000
2018-

2018-03-08 13:10:26,282 : INFO : topic #11 (0.056): 0.003*"times" + 0.003*"article" + 0.003*"church" + 0.002*"gay" + 0.002*"book" + 0.002*"sex" + 0.001*"com" + 0.001*"news" + 0.001*"read" + 0.001*"readers"
2018-03-08 13:10:26,684 : INFO : topic #4 (0.056): 0.012*"percent" + 0.007*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"billion" + 0.004*"market" + 0.003*"companies" + 0.002*"years" + 0.002*"business"
2018-03-08 13:10:27,483 : INFO : topic diff=0.252769, rho=0.113961
2018-03-08 13:10:27,737 : INFO : PROGRESS: pass 6, at document #10000/140000
2018-03-08 13:10:37,682 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 13:11:32,360 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"play" + 0.001*"ms" + 0.001*"york"
2018-03-08 13:11:32,688 : INFO : topic #7 (0.056): 0.004*"food" + 0.002*"korea" + 0.002*"north" + 0.002*"la" + 0.002*"south" 

2018-03-08 13:17:10,694 : INFO : topic diff=0.247173, rho=0.113961
2018-03-08 13:17:10,957 : INFO : PROGRESS: pass 6, at document #22000/140000
2018-03-08 13:17:20,873 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 13:18:15,489 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.005*"mr" + 0.004*"street" + 0.003*"building" + 0.003*"house" + 0.002*"park" + 0.002*"york" + 0.002*"new york"
2018-03-08 13:18:15,877 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"years" + 0.003*"know" + 0.003*"way"
2018-03-08 13:18:16,235 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"mets" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.002*"runs" + 0.002*"team"
2018-03-08 13:18:16,680 : INFO : topic #5 (0.056): 0.004*"team" + 0.003*"world" + 0.003*"said" + 0.003*"players" + 0.002*"sports" + 0.002*"yea

2018-03-08 13:24:58,378 : INFO : topic #15 (0.056): 0.020*"mr" + 0.015*"said" + 0.004*"case" + 0.004*"police" + 0.004*"court" + 0.002*"did" + 0.002*"department" + 0.002*"ms" + 0.002*"law" + 0.002*"judge"
2018-03-08 13:24:58,562 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"run" + 0.004*"season" + 0.003*"yankees" + 0.003*"mets" + 0.003*"baseball" + 0.003*"hit" + 0.003*"runs" + 0.002*"team"
2018-03-08 13:24:58,938 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.002*"tax" + 0.002*"house" + 0.002*"people" + 0.002*"republican"
2018-03-08 13:24:59,299 : INFO : topic #16 (0.056): 0.032*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"obama" + 0.005*"romney" + 0.004*"campaign" + 0.004*"party" + 0.004*"political" + 0.003*"mr romney" + 0.003*"trump"
2018-03-08 13:25:00,099 : INFO : topic diff=0.242954, rho=0.113961
2018-03-08 13:25:00,356 : INFO : PROGRESS: pass 6, at document #36000/140000
2018-03-08 13:25:

2018-03-08 13:31:40,803 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.003*"mets" + 0.003*"runs" + 0.002*"team"
2018-03-08 13:31:41,008 : INFO : topic #14 (0.056): 0.005*"china" + 0.005*"bank" + 0.005*"european" + 0.005*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"financial" + 0.003*"europe" + 0.003*"banks" + 0.002*"countries"
2018-03-08 13:31:41,807 : INFO : topic diff=0.253904, rho=0.113961
2018-03-08 13:31:42,130 : INFO : PROGRESS: pass 6, at document #48000/140000
2018-03-08 13:31:51,953 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 13:32:46,551 : INFO : topic #5 (0.056): 0.004*"team" + 0.003*"world" + 0.003*"said" + 0.003*"sports" + 0.002*"players" + 0.002*"year" + 0.002*"club" + 0.002*"won" + 0.002*"race" + 0.002*"united"
2018-03-08 13:32:46,850 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.00

2018-03-08 13:38:24,089 : INFO : PROGRESS: pass 6, at document #60000/140000
2018-03-08 13:38:33,857 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 13:39:28,508 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"officials" + 0.003*"united" + 0.002*"killed" + 0.002*"syria" + 0.002*"police"
2018-03-08 13:39:29,024 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"film" + 0.003*"music" + 0.003*"new" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"dance" + 0.001*"york"
2018-03-08 13:39:29,369 : INFO : topic #11 (0.056): 0.003*"church" + 0.003*"article" + 0.003*"gay" + 0.002*"book" + 0.002*"times" + 0.002*"com" + 0.002*"sex" + 0.002*"nytimes" + 0.001*"news" + 0.001*"nytimes com"
2018-03-08 13:39:29,822 : INFO : topic #10 (0.056): 0.008*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.003*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.

2018-03-08 13:46:11,256 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.004*"police" + 0.004*"case" + 0.004*"court" + 0.002*"judge" + 0.002*"department" + 0.002*"did" + 0.002*"ms" + 0.002*"law"
2018-03-08 13:46:11,715 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"said" + 0.003*"cup" + 0.003*"sports" + 0.002*"players" + 0.002*"club" + 0.002*"year" + 0.002*"won" + 0.002*"united"
2018-03-08 13:46:11,967 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.003*"runs" + 0.002*"league" + 0.002*"mets"
2018-03-08 13:46:12,416 : INFO : topic #13 (0.056): 0.008*"new" + 0.006*"mr" + 0.004*"said" + 0.004*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.003*"director" + 0.002*"group"
2018-03-08 13:46:13,217 : INFO : topic diff=0.249256, rho=0.113961
2018-03-08 13:46:13,465 : INFO : PROGRESS: pass 6, at document #74000/140000
2018-03-08 13:46:23,136 : INF

2018-03-08 13:52:54,234 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"film" + 0.003*"new" + 0.002*"theater" + 0.002*"like" + 0.002*"movie" + 0.001*"ms" + 0.001*"dance" + 0.001*"festival"
2018-03-08 13:52:54,741 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.003*"officials" + 0.002*"american" + 0.002*"oil" + 0.002*"energy" + 0.002*"agency" + 0.002*"water"
2018-03-08 13:52:55,548 : INFO : topic diff=0.267303, rho=0.113961
2018-03-08 13:52:55,806 : INFO : PROGRESS: pass 6, at document #86000/140000
2018-03-08 13:53:05,619 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 13:54:00,391 : INFO : topic #7 (0.056): 0.004*"la" + 0.004*"food" + 0.003*"en" + 0.003*"el" + 0.002*"south" + 0.002*"los" + 0.002*"north" + 0.002*"restaurant" + 0.002*"wine" + 0.002*"korea"
2018-03-08 13:54:00,787 : INFO : topic #10 (0.056): 0.008*"game" + 0.005*"said" + 0.005*"season" + 0.004*"team" + 0.003*"po

2018-03-08 13:59:37,661 : INFO : topic diff=0.250766, rho=0.113961
2018-03-08 13:59:37,914 : INFO : PROGRESS: pass 6, at document #98000/140000
2018-03-08 13:59:47,664 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 14:00:42,270 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"online" + 0.003*"said" + 0.002*"people" + 0.002*"use" + 0.002*"technology" + 0.002*"companies" + 0.002*"business"
2018-03-08 14:00:42,605 : INFO : topic #7 (0.056): 0.007*"la" + 0.006*"en" + 0.005*"el" + 0.004*"los" + 0.003*"food" + 0.002*"se" + 0.002*"las" + 0.002*"del" + 0.002*"south" + 0.002*"restaurant"
2018-03-08 14:00:42,904 : INFO : topic #16 (0.056): 0.028*"mr" + 0.006*"president" + 0.006*"said" + 0.004*"obama" + 0.004*"party" + 0.004*"political" + 0.003*"campaign" + 0.003*"clinton" + 0.003*"mr obama" + 0.003*"russia"
2018-03-08 14:00:43,091 : INFO : topic #14 (0.056): 0.006*"china" + 0.006*"bank" + 0.005*"european" + 0.004*"governme

2018-03-08 14:07:23,764 : INFO : topic #16 (0.056): 0.029*"mr" + 0.009*"trump" + 0.006*"president" + 0.005*"mr trump" + 0.005*"said" + 0.005*"clinton" + 0.004*"party" + 0.004*"campaign" + 0.004*"obama" + 0.004*"political"
2018-03-08 14:07:24,071 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"united" + 0.004*"states" + 0.004*"united states" + 0.003*"water" + 0.002*"american" + 0.002*"officials" + 0.002*"energy" + 0.002*"agency" + 0.002*"nuclear"
2018-03-08 14:07:24,344 : INFO : topic #0 (0.056): 0.010*"said" + 0.004*"government" + 0.004*"people" + 0.003*"military" + 0.003*"state" + 0.003*"islamic" + 0.003*"united" + 0.003*"officials" + 0.002*"killed" + 0.002*"security"
2018-03-08 14:07:24,686 : INFO : topic #7 (0.056): 0.006*"la" + 0.005*"en" + 0.004*"el" + 0.003*"food" + 0.003*"los" + 0.002*"restaurant" + 0.002*"south" + 0.002*"wine" + 0.002*"las" + 0.002*"se"
2018-03-08 14:07:25,486 : INFO : topic diff=0.266025, rho=0.113961
2018-03-08 14:07:25,754 : INFO : PROGRESS: pass 6, at docu

2018-03-08 14:14:06,108 : INFO : topic #7 (0.056): 0.004*"la" + 0.004*"food" + 0.003*"en" + 0.003*"el" + 0.002*"los" + 0.002*"restaurant" + 0.002*"korea" + 0.002*"wine" + 0.002*"south" + 0.002*"north"
2018-03-08 14:14:06,514 : INFO : topic #17 (0.056): 0.006*"city" + 0.005*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"park" + 0.002*"house"
2018-03-08 14:14:07,315 : INFO : topic diff=0.285958, rho=0.113961
2018-03-08 14:14:07,579 : INFO : PROGRESS: pass 6, at document #124000/140000
2018-03-08 14:14:17,472 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 14:15:12,114 : INFO : topic #16 (0.056): 0.031*"mr" + 0.015*"trump" + 0.009*"mr trump" + 0.007*"president" + 0.005*"said" + 0.005*"clinton" + 0.004*"party" + 0.004*"campaign" + 0.004*"political" + 0.003*"obama"
2018-03-08 14:15:12,520 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.002*"players" + 0.002

2018-03-08 14:20:49,980 : INFO : topic diff=0.284376, rho=0.113961
2018-03-08 14:20:50,242 : INFO : PROGRESS: pass 6, at document #136000/140000
2018-03-08 14:20:59,983 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 14:21:54,757 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.005*"students" + 0.003*"school" + 0.003*"health" + 0.003*"new" + 0.003*"medical" + 0.003*"study" + 0.003*"college" + 0.002*"patients"
2018-03-08 14:21:55,179 : INFO : topic #10 (0.056): 0.007*"game" + 0.005*"season" + 0.005*"said" + 0.004*"team" + 0.003*"points" + 0.003*"play" + 0.003*"games" + 0.002*"players" + 0.002*"second" + 0.002*"coach"
2018-03-08 14:21:55,536 : INFO : topic #0 (0.056): 0.010*"said" + 0.004*"government" + 0.004*"people" + 0.003*"military" + 0.003*"state" + 0.003*"united" + 0.002*"officials" + 0.002*"security" + 0.002*"country" + 0.002*"killed"
2018-03-08 14:21:55,923 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"spor

2018-03-08 14:28:38,053 : INFO : topic #3 (0.056): 0.009*"said" + 0.006*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.003*"people" + 0.003*"house" + 0.003*"tax" + 0.002*"republican"
2018-03-08 14:28:38,493 : INFO : topic #13 (0.056): 0.007*"new" + 0.006*"mr" + 0.004*"art" + 0.003*"said" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.002*"million" + 0.002*"work" + 0.002*"director"
2018-03-08 14:28:38,957 : INFO : topic #1 (0.056): 0.015*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"years" + 0.003*"know" + 0.003*"think"
2018-03-08 14:28:39,354 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"house" + 0.002*"york" + 0.002*"new york" + 0.002*"park"
2018-03-08 14:28:40,154 : INFO : topic diff=0.250750, rho=0.113228
2018-03-08 14:28:40,408 : INFO : PROGRESS: pass 7, at document #10000/140000
2018-03-08 14:28:50,318 : INFO :

2018-03-08 14:35:22,345 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.005*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"banks" + 0.003*"financial" + 0.003*"europe" + 0.003*"debt"
2018-03-08 14:35:22,755 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"years" + 0.003*"know" + 0.003*"way"
2018-03-08 14:35:23,558 : INFO : topic diff=0.245276, rho=0.113228
2018-03-08 14:35:23,905 : INFO : PROGRESS: pass 7, at document #22000/140000
2018-03-08 14:35:33,858 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 14:36:28,585 : INFO : topic #5 (0.056): 0.004*"team" + 0.003*"world" + 0.003*"players" + 0.003*"said" + 0.002*"sports" + 0.002*"year" + 0.002*"club" + 0.002*"won" + 0.002*"race" + 0.002*"united"
2018-03-08 14:36:29,045 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"mill

2018-03-08 14:42:06,052 : INFO : topic diff=0.250274, rho=0.113228
2018-03-08 14:42:06,315 : INFO : PROGRESS: pass 7, at document #34000/140000
2018-03-08 14:42:16,139 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 14:43:10,777 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.005*"mr" + 0.004*"street" + 0.003*"building" + 0.003*"house" + 0.002*"park" + 0.002*"york" + 0.002*"new york"
2018-03-08 14:43:10,966 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.004*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"financial" + 0.003*"banks" + 0.003*"europe" + 0.002*"debt"
2018-03-08 14:43:11,162 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"run" + 0.004*"season" + 0.003*"yankees" + 0.003*"mets" + 0.003*"baseball" + 0.003*"hit" + 0.003*"runs" + 0.002*"team"
2018-03-08 14:43:11,518 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"states" + 0.004*"united" + 0.003*"united stat

2018-03-08 14:49:52,479 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"officials" + 0.003*"united" + 0.002*"american" + 0.002*"killed" + 0.002*"police"
2018-03-08 14:49:52,760 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"said" + 0.003*"online" + 0.002*"web" + 0.002*"people" + 0.002*"use" + 0.002*"site" + 0.002*"internet"
2018-03-08 14:49:53,135 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"years" + 0.003*"know" + 0.003*"way"
2018-03-08 14:49:53,349 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.003*"mets" + 0.003*"runs" + 0.002*"team"
2018-03-08 14:49:54,150 : INFO : topic diff=0.251993, rho=0.113228
2018-03-08 14:49:54,408 : INFO : PROGRESS: pass 7, at document #48000/140000
2018-03-08 14:5

2018-03-08 14:56:34,325 : INFO : topic #13 (0.056): 0.007*"new" + 0.006*"mr" + 0.004*"said" + 0.004*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.002*"director" + 0.002*"group"
2018-03-08 14:56:34,760 : INFO : topic #4 (0.056): 0.013*"percent" + 0.009*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.003*"sales" + 0.002*"years"
2018-03-08 14:56:35,559 : INFO : topic diff=0.243673, rho=0.113228
2018-03-08 14:56:35,810 : INFO : PROGRESS: pass 7, at document #60000/140000
2018-03-08 14:56:45,534 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 14:57:40,283 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"security" + 0.003*"officials" + 0.003*"united" + 0.002*"killed" + 0.002*"syria" + 0.002*"police"
2018-03-08 14:57:40,677 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0

2018-03-08 15:03:16,743 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"states" + 0.004*"united" + 0.003*"united states" + 0.003*"officials" + 0.002*"american" + 0.002*"energy" + 0.002*"oil" + 0.002*"agency" + 0.002*"power"
2018-03-08 15:03:17,544 : INFO : topic diff=0.253298, rho=0.113228
2018-03-08 15:03:17,795 : INFO : PROGRESS: pass 7, at document #72000/140000
2018-03-08 15:03:27,570 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 15:04:22,259 : INFO : topic #13 (0.056): 0.008*"new" + 0.006*"mr" + 0.004*"said" + 0.004*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.003*"director" + 0.002*"group"
2018-03-08 15:04:22,746 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.003*"people" + 0.003*"officials" + 0.003*"united" + 0.003*"security" + 0.002*"police" + 0.002*"killed" + 0.002*"syria"
2018-03-08 15:04:23,244 : INFO : topic #16 (0.056): 0.030*"mr" + 0.007*"president" + 0.006*"

2018-03-08 15:10:00,628 : INFO : PROGRESS: pass 7, at document #84000/140000
2018-03-08 15:10:10,404 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 15:11:04,984 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"cup" + 0.003*"players" + 0.003*"sports" + 0.002*"said" + 0.002*"year" + 0.002*"won" + 0.002*"club" + 0.002*"united"
2018-03-08 15:11:05,476 : INFO : topic #13 (0.056): 0.007*"new" + 0.006*"mr" + 0.004*"art" + 0.004*"said" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.002*"director" + 0.002*"group"
2018-03-08 15:11:05,795 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case" + 0.004*"court" + 0.002*"department" + 0.002*"judge" + 0.002*"did" + 0.002*"ms" + 0.002*"law"
2018-03-08 15:11:06,101 : INFO : topic #6 (0.056): 0.005*"game" + 0.005*"said" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"hit" + 0.003*"mets" + 0.003*"baseball" + 0.002*"runs" + 0.002*"le

2018-03-08 15:17:48,364 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.002*"players" + 0.002*"cup" + 0.002*"said" + 0.002*"year" + 0.002*"club" + 0.002*"won" + 0.002*"united"
2018-03-08 15:17:48,786 : INFO : topic #0 (0.056): 0.011*"said" + 0.004*"government" + 0.004*"military" + 0.003*"people" + 0.003*"state" + 0.003*"officials" + 0.003*"united" + 0.003*"islamic" + 0.002*"police" + 0.002*"security"
2018-03-08 15:17:49,250 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.002*"officials" + 0.002*"american" + 0.002*"energy" + 0.002*"oil" + 0.002*"agency" + 0.002*"water"
2018-03-08 15:17:50,049 : INFO : topic diff=0.248867, rho=0.113228
2018-03-08 15:17:50,299 : INFO : PROGRESS: pass 7, at document #98000/140000
2018-03-08 15:18:00,046 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 15:18:54,717 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"stud

2018-03-08 15:24:30,164 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.002*"players" + 0.002*"said" + 0.002*"year" + 0.002*"cup" + 0.002*"won" + 0.002*"club" + 0.002*"united"
2018-03-08 15:24:30,967 : INFO : topic diff=0.256079, rho=0.113228
2018-03-08 15:24:31,293 : INFO : PROGRESS: pass 7, at document #110000/140000
2018-03-08 15:24:40,983 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 15:25:35,624 : INFO : topic #10 (0.056): 0.007*"game" + 0.005*"season" + 0.005*"said" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"second" + 0.002*"players"
2018-03-08 15:25:35,935 : INFO : topic #4 (0.056): 0.013*"percent" + 0.007*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"business"
2018-03-08 15:25:36,297 : INFO : topic #15 (0.056): 0.021*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case" + 0.

2018-03-08 15:32:16,619 : INFO : topic #12 (0.056): 0.008*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"series" + 0.001*"york"
2018-03-08 15:32:16,951 : INFO : topic #7 (0.056): 0.004*"la" + 0.004*"food" + 0.003*"en" + 0.003*"el" + 0.002*"los" + 0.002*"restaurant" + 0.002*"korea" + 0.002*"wine" + 0.002*"south" + 0.002*"north"
2018-03-08 15:32:17,358 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.002*"players" + 0.002*"said" + 0.002*"year" + 0.002*"won" + 0.002*"club" + 0.002*"united" + 0.002*"race"
2018-03-08 15:32:17,750 : INFO : topic #17 (0.056): 0.006*"city" + 0.005*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"york" + 0.002*"new york" + 0.002*"park" + 0.002*"house"
2018-03-08 15:32:18,109 : INFO : topic #15 (0.056): 0.021*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case" + 0.004*"court" + 0.003*"department" + 0.003*"ms" + 0.002*"did" + 0.002*"

2018-03-08 15:38:58,937 : INFO : topic #7 (0.056): 0.004*"food" + 0.003*"la" + 0.003*"korea" + 0.002*"korean" + 0.002*"wine" + 0.002*"north" + 0.002*"south" + 0.002*"restaurant" + 0.002*"cooking" + 0.002*"en"
2018-03-08 15:38:59,200 : INFO : topic #6 (0.056): 0.004*"game" + 0.004*"said" + 0.004*"season" + 0.003*"run" + 0.003*"yankees" + 0.003*"mets" + 0.003*"hit" + 0.003*"baseball" + 0.002*"home" + 0.002*"series"
2018-03-08 15:38:59,687 : INFO : topic #12 (0.056): 0.008*"mr" + 0.003*"new" + 0.003*"music" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.002*"ms" + 0.001*"play" + 0.001*"series"
2018-03-08 15:39:00,495 : INFO : topic diff=0.282348, rho=0.113228
2018-03-08 15:39:00,757 : INFO : PROGRESS: pass 7, at document #136000/140000
2018-03-08 15:39:10,509 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 15:40:05,194 : INFO : topic #5 (0.056): 0.005*"team" + 0.004*"world" + 0.003*"sports" + 0.002*"players" + 0.002*"said" + 0.

2018-03-08 15:45:42,344 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.003*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"mets" + 0.003*"hit" + 0.002*"home" + 0.002*"league"
2018-03-08 15:45:43,143 : INFO : topic diff=0.257791, rho=0.112509
2018-03-08 15:45:43,395 : INFO : PROGRESS: pass 8, at document #8000/140000
2018-03-08 15:45:53,278 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 15:46:47,895 : INFO : topic #7 (0.056): 0.004*"food" + 0.002*"korea" + 0.002*"la" + 0.002*"north" + 0.002*"south" + 0.002*"restaurant" + 0.002*"korean" + 0.002*"wine" + 0.002*"cooking" + 0.001*"kim"
2018-03-08 15:46:48,396 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.004*"mr" + 0.003*"street" + 0.003*"building" + 0.002*"house" + 0.002*"york" + 0.002*"new york" + 0.002*"park"
2018-03-08 15:46:48,706 : INFO : topic #14 (0.056): 0.007*"china" + 0.005*"european" + 0.005*"bank" + 0.004*"government" + 0.004*

2018-03-08 15:53:31,364 : INFO : topic #0 (0.056): 0.011*"said" + 0.005*"government" + 0.004*"military" + 0.004*"people" + 0.003*"officials" + 0.003*"security" + 0.003*"united" + 0.003*"killed" + 0.002*"police" + 0.002*"american"
2018-03-08 15:53:31,663 : INFO : topic #16 (0.056): 0.031*"mr" + 0.008*"president" + 0.007*"trump" + 0.006*"said" + 0.004*"political" + 0.004*"party" + 0.004*"mr trump" + 0.004*"obama" + 0.004*"campaign" + 0.003*"election"
2018-03-08 15:53:31,987 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"bank" + 0.005*"european" + 0.005*"government" + 0.004*"said" + 0.004*"chinese" + 0.003*"banks" + 0.003*"financial" + 0.003*"europe" + 0.003*"debt"
2018-03-08 15:53:32,371 : INFO : topic #11 (0.056): 0.003*"church" + 0.003*"times" + 0.002*"article" + 0.002*"gay" + 0.002*"book" + 0.002*"sex" + 0.001*"com" + 0.001*"news" + 0.001*"read" + 0.001*"writes"
2018-03-08 15:53:32,781 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states

2018-03-08 16:00:14,262 : INFO : topic #15 (0.056): 0.020*"mr" + 0.015*"said" + 0.004*"police" + 0.004*"case" + 0.004*"court" + 0.002*"did" + 0.002*"department" + 0.002*"ms" + 0.002*"law" + 0.002*"judge"
2018-03-08 16:00:14,639 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"new" + 0.003*"film" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"york" + 0.001*"new york"
2018-03-08 16:00:15,048 : INFO : topic #16 (0.056): 0.032*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"obama" + 0.004*"party" + 0.004*"campaign" + 0.004*"political" + 0.004*"romney" + 0.003*"trump" + 0.003*"mr romney"
2018-03-08 16:00:15,849 : INFO : topic diff=0.248273, rho=0.112509
2018-03-08 16:00:16,107 : INFO : PROGRESS: pass 8, at document #34000/140000
2018-03-08 16:00:25,934 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 16:01:20,529 : INFO : topic #7 (0.056): 0.004*"food" + 0.002*"restaurant" + 0.002*"wine" + 0.002*"north" + 0.002*

2018-03-08 16:06:57,473 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"mr" + 0.003*"law" + 0.003*"federal" + 0.002*"tax" + 0.002*"house" + 0.002*"people" + 0.002*"republican"
2018-03-08 16:06:58,276 : INFO : topic diff=0.244675, rho=0.112509
2018-03-08 16:06:58,530 : INFO : PROGRESS: pass 8, at document #46000/140000
2018-03-08 16:07:08,325 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 16:08:02,868 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new" + 0.003*"said" + 0.003*"online" + 0.002*"web" + 0.002*"people" + 0.002*"use" + 0.002*"site" + 0.002*"internet"
2018-03-08 16:08:03,217 : INFO : topic #17 (0.056): 0.006*"city" + 0.006*"said" + 0.005*"new" + 0.005*"mr" + 0.004*"street" + 0.003*"building" + 0.003*"house" + 0.002*"park" + 0.002*"york" + 0.002*"new york"
2018-03-08 16:08:03,610 : INFO : topic #10 (0.056): 0.007*"game" + 0.006*"said" + 0.005*"season" + 0.004*"team" + 0.004*"points" + 0

2018-03-08 16:14:44,184 : INFO : topic #2 (0.056): 0.010*"said" + 0.004*"united" + 0.004*"states" + 0.003*"united states" + 0.003*"officials" + 0.003*"american" + 0.002*"nuclear" + 0.002*"agency" + 0.002*"power" + 0.002*"energy"
2018-03-08 16:14:44,603 : INFO : topic #7 (0.056): 0.004*"food" + 0.003*"north" + 0.003*"south" + 0.002*"korea" + 0.002*"restaurant" + 0.002*"korean" + 0.002*"wine" + 0.001*"kim" + 0.001*"meat" + 0.001*"chef"
2018-03-08 16:14:44,982 : INFO : topic #4 (0.056): 0.013*"percent" + 0.009*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.003*"sales" + 0.002*"years"
2018-03-08 16:14:45,466 : INFO : topic #13 (0.056): 0.007*"new" + 0.006*"mr" + 0.004*"said" + 0.004*"art" + 0.003*"york" + 0.003*"new york" + 0.003*"museum" + 0.003*"million" + 0.002*"director" + 0.002*"group"
2018-03-08 16:14:45,798 : INFO : topic #1 (0.056): 0.016*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"just" + 0.005*"people" + 0.004*

2018-03-08 16:21:27,025 : INFO : topic #14 (0.056): 0.006*"china" + 0.005*"european" + 0.005*"bank" + 0.004*"government" + 0.004*"chinese" + 0.004*"said" + 0.003*"europe" + 0.003*"financial" + 0.003*"countries" + 0.002*"union"
2018-03-08 16:21:27,446 : INFO : topic #16 (0.056): 0.030*"mr" + 0.007*"president" + 0.006*"said" + 0.005*"obama" + 0.005*"party" + 0.004*"political" + 0.003*"campaign" + 0.003*"mr obama" + 0.003*"russia" + 0.002*"election"
2018-03-08 16:21:27,714 : INFO : topic #6 (0.056): 0.005*"said" + 0.005*"game" + 0.004*"season" + 0.004*"run" + 0.003*"yankees" + 0.003*"baseball" + 0.003*"hit" + 0.003*"runs" + 0.003*"league" + 0.002*"team"
2018-03-08 16:21:28,520 : INFO : topic diff=0.251377, rho=0.112509
2018-03-08 16:21:28,770 : INFO : PROGRESS: pass 8, at document #72000/140000
2018-03-08 16:21:38,612 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 16:22:33,194 : INFO : topic #9 (0.056): 0.005*"company" + 0.005*"like" + 0.003*"new"

2018-03-08 16:28:10,016 : INFO : topic #8 (0.056): 0.007*"dr" + 0.007*"university" + 0.004*"students" + 0.004*"new" + 0.003*"health" + 0.003*"school" + 0.003*"medical" + 0.003*"college" + 0.002*"study" + 0.002*"york"
2018-03-08 16:28:10,816 : INFO : topic diff=0.258797, rho=0.112509
2018-03-08 16:28:11,085 : INFO : PROGRESS: pass 8, at document #84000/140000
2018-03-08 16:28:20,824 : INFO : merging changes from 2000 documents into a model of 140000 documents
2018-03-08 16:29:15,485 : INFO : topic #15 (0.056): 0.020*"mr" + 0.014*"said" + 0.005*"police" + 0.004*"case" + 0.004*"court" + 0.002*"department" + 0.002*"judge" + 0.002*"did" + 0.002*"ms" + 0.002*"law"
2018-03-08 16:29:15,828 : INFO : topic #12 (0.056): 0.009*"mr" + 0.004*"music" + 0.003*"film" + 0.003*"new" + 0.002*"theater" + 0.002*"movie" + 0.002*"like" + 0.001*"ms" + 0.001*"dance" + 0.001*"festival"
2018-03-08 16:29:16,244 : INFO : topic #2 (0.056): 0.009*"said" + 0.004*"states" + 0.004*"united" + 0.004*"united states" + 0.00

2018-03-08 16:35:58,677 : INFO : topic #10 (0.056): 0.007*"game" + 0.005*"season" + 0.005*"said" + 0.004*"team" + 0.004*"points" + 0.003*"play" + 0.003*"games" + 0.003*"coach" + 0.002*"second" + 0.002*"players"
2018-03-08 16:35:58,983 : INFO : topic #6 (0.056): 0.005*"game" + 0.004*"said" + 0.004*"season" + 0.004*"run" + 0.003*"mets" + 0.003*"yankees" + 0.003*"hit" + 0.003*"baseball" + 0.002*"runs" + 0.002*"league"
2018-03-08 16:35:59,457 : INFO : topic #4 (0.056): 0.013*"percent" + 0.008*"said" + 0.007*"year" + 0.005*"company" + 0.005*"million" + 0.004*"market" + 0.003*"billion" + 0.003*"companies" + 0.002*"years" + 0.002*"new"
2018-03-08 16:35:59,915 : INFO : topic #3 (0.056): 0.009*"said" + 0.007*"state" + 0.004*"new" + 0.003*"law" + 0.003*"mr" + 0.003*"federal" + 0.002*"people" + 0.002*"city" + 0.002*"republican" + 0.002*"house"
2018-03-08 16:36:00,458 : INFO : topic #12 (0.056): 0.009*"mr" + 0.003*"music" + 0.003*"film" + 0.003*"new" + 0.002*"theater" + 0.002*"movie" + 0.002*"like

In [31]:
# Here are the 20 most important words for each of the 5 topics we found:
all_topics = np.array(lda.print_topics(num_words=15))

2018-03-08 19:42:46,926 : INFO : topic #0 (0.056): 0.010*"said" + 0.004*"government" + 0.004*"people" + 0.003*"military" + 0.003*"united" + 0.003*"state" + 0.003*"country" + 0.002*"officials" + 0.002*"security" + 0.002*"killed" + 0.002*"american" + 0.002*"police" + 0.002*"war" + 0.002*"attack" + 0.002*"islamic"
2018-03-08 19:42:47,396 : INFO : topic #1 (0.056): 0.014*"said" + 0.007*"like" + 0.006*"ms" + 0.005*"people" + 0.005*"just" + 0.004*"time" + 0.004*"don" + 0.003*"know" + 0.003*"years" + 0.003*"think" + 0.003*"way" + 0.003*"life" + 0.003*"women" + 0.003*"day" + 0.003*"going"
2018-03-08 19:42:47,794 : INFO : topic #2 (0.056): 0.009*"said" + 0.005*"united" + 0.005*"states" + 0.004*"united states" + 0.003*"north" + 0.003*"american" + 0.002*"water" + 0.002*"officials" + 0.002*"climate" + 0.002*"nuclear" + 0.002*"power" + 0.002*"agency" + 0.002*"energy" + 0.002*"air" + 0.002*"administration"
2018-03-08 19:42:48,249 : INFO : topic #3 (0.056): 0.009*"said" + 0.005*"state" + 0.004*"new" 

In [32]:
for topic in all_topics:
    print('topic #{}'.format(topic[0]))
    for word_weight in topic[1].split('+'):
        [weight, word] = word_weight.split('*')
        print('{}: {}'.format(word, weight))

topic #0
"said" : 0.010
"government" :  0.004
"people" :  0.004
"military" :  0.003
"united" :  0.003
"state" :  0.003
"country" :  0.003
"officials" :  0.002
"security" :  0.002
"killed" :  0.002
"american" :  0.002
"police" :  0.002
"war" :  0.002
"attack" :  0.002
"islamic":  0.002
topic #1
"said" : 0.014
"like" :  0.007
"ms" :  0.006
"people" :  0.005
"just" :  0.005
"time" :  0.004
"don" :  0.004
"know" :  0.003
"years" :  0.003
"think" :  0.003
"way" :  0.003
"life" :  0.003
"women" :  0.003
"day" :  0.003
"going":  0.003
topic #2
"said" : 0.009
"united" :  0.005
"states" :  0.005
"united states" :  0.004
"north" :  0.003
"american" :  0.003
"water" :  0.002
"officials" :  0.002
"climate" :  0.002
"nuclear" :  0.002
"power" :  0.002
"agency" :  0.002
"energy" :  0.002
"air" :  0.002
"administration":  0.002
topic #3
"said" : 0.009
"state" :  0.005
"new" :  0.004
"law" :  0.003
"people" :  0.003
"federal" :  0.003
"mr" :  0.003
"tax" :  0.003
"house" :  0.003
"health" :  0.003
"ca

In [34]:
# Transform the docs from the word space to the topic space (like "transform" in sklearn)
lda_corpus = lda[corpus]
lda_corpus

<gensim.interfaces.TransformedCorpus at 0x7fc2e1392fd0>

In [None]:
# Store the documents' topic vectors in a list so we can take a peak
lda_docs = [doc for doc in lda_corpus]

In [None]:
# Check out some of the document vectors in the topic space
# for i, k in enumerate(lda_docs[0:20]):
#     print(i,k)

In [None]:
# lda_docs[0][1]
# lda_corpus[0]

In [35]:
def get_corpus_for_year(yr):
    articles_yr = raw_df[raw_df['year'] == yr]['article']
    print('number of articles for year {}: {}'.format(yr, len(articles_yr)))
    counts_yr = count_vectorizer.transform(articles_yr).transpose()
    corpus_yr = matutils.Sparse2Corpus(counts_yr)
    lda_corpus_yr = lda[corpus_yr]
#     lda_docs_yr = [doc for doc in lda_corpus_yr]
    print('done with year {}'.format(yr))
    return lda_corpus_yr

def corpus_to_dic(lda_corpus_yr):
    lda_docs_yr = [{tpl[0]: tpl[1] for tpl in doc} for doc in lda_corpus_yr]
    return lda_docs_yr

In [36]:
def get_avg_weight_for_year(year_corpus, num_topics):
    list_of_sums = np.zeros(num_topics)
    for lda_doc in year_corpus:
        for weight in lda_doc:
            list_of_sums[weight[0]] += weight[1]
    average_weights = list_of_sums/len(year_corpus)
    return average_weights

In [None]:
corpus_2011 = get_corpus_for_year(2011)
corpus_2012 = get_corpus_for_year(2012)
corpus_2013 = get_corpus_for_year(2013)
corpus_2014 = get_corpus_for_year(2014)
corpus_2015 = get_corpus_for_year(2015)
corpus_2016 = get_corpus_for_year(2016)
corpus_2017 = get_corpus_for_year(2017)

number of articles for year 2011: 36103
done with year 2011
number of articles for year 2012: 36057
done with year 2012
number of articles for year 2013: 34795
done with year 2013
number of articles for year 2014: 40790


In [None]:
dicts_2011 = corpus_to_dic(corpus_2011)
dicts_2012 = corpus_to_dic(corpus_2012)
dicts_2013 = corpus_to_dic(corpus_2013)
dicts_2014 = corpus_to_dic(corpus_2014)
dicts_2015 = corpus_to_dic(corpus_2015)
dicts_2016 = corpus_to_dic(corpus_2016)
dicts_2017 = corpus_to_dic(corpus_2017)

In [None]:
avg_weights_2011 = get_avg_weight_for_year(corpus_2011, NUM_TOPICS)
avg_weights_2012 = get_avg_weight_for_year(corpus_2012, NUM_TOPICS)
avg_weights_2013 = get_avg_weight_for_year(corpus_2013, NUM_TOPICS)
avg_weights_2014 = get_avg_weight_for_year(corpus_2014, NUM_TOPICS)
avg_weights_2015 = get_avg_weight_for_year(corpus_2015, NUM_TOPICS)
avg_weights_2016 = get_avg_weight_for_year(corpus_2016, NUM_TOPICS)
avg_weights_2017 = get_avg_weight_for_year(corpus_2017, NUM_TOPICS)

In [None]:
def get_weight_for_doc(doc, topic_id):
    try:
        return doc[topic_id]
    except:
        return 0
    
def get_top_articles_index(year_dict, topic_id ):
    return sorted(enumerate(year_dict), key=lambda x: get_weight_for_doc(x[1], topic_id), reverse=True)

def get_article(year, idx):
    return raw_df[raw_df['year'] == year]['article'].reset_index(drop=True)[idx]

def get_articles_per_topic(year_dict, topic_id, year, lim=5):
    rows = get_top_articles_index(year_dict, topic_id )
#     print(rows[1:10])
    for idx, row in enumerate(rows[0:lim]):
        print('Article {}'.format(idx + 1))
        print('Score: {}'.format(row[1][topic_id]))
        print(get_article(year, row[0])[0:500])

In [52]:
for i in range(0,NUM_TOPICS):
    print('\n\nTOPIC {}'.format(i))
    get_articles_per_topic(dicts_2012, i, 2012, lim=11)




TOPIC 0
Article 1
Score: 0.9352463483810425
ADEN, Yemen (Reuters) — The Yemeni Army battled militants linked to Al Qaeda in the southern town of Zinjibar on Saturday, recapturing important positions in the rebel-held city and killing at least 62 Islamist fighters, a military official said. The official said four government soldiers died and four were wounded in the fighting, part of an offensive that began this month to uproot Islamist militants from southern Yemen. He said many of the dead militants were Somalis. The militant group Ansar
Article 2
Score: 0.9317195415496826
SANA, Yemen (AP) — The death toll from a suicide bombing in southern Yemen rose to 45 on Sunday, officials said, in the latest attack against militias allied with the army. The bomber, suspected of being a member of Al Qaeda, struck late Saturday during a funeral service attended by members of civilian militias that helped the Yemeni Army in a campaign to recapture the town of Jaar from Qaeda militants in June. Th

RECENTLY, as I heard my daughter, a junior in high school, and her friends discuss their plans for the prom, I had a vaguely troubling thought: can a 16-year-old be a cougar? Her best friends wanted to take boys younger than themselves (much younger ... two entire grades younger) to the prom. And one of those boys just happened to be my ninth-grade son. Back in my prom days (when the big slow dance was still “Stairway to Heaven”), I went with a boy who was not just taller than me, but older as w
Article 10
Score: 0.7878271341323853
“WILL he have a normal family?” Andrea asked. After the demise of her long-term relationship, my friend had enlisted a psychic in a search for answers. Never having felt close to her ex’s parents, she was hoping to avoid a repeat with whoever came next. “No one has a normal family,” the psychic said. “Normal is not a word.” Andrea started to say something else, but he cut her off: “Get in the back seat, honey. You’re busting up my flow.” We were seated in a 

Article 1
Score: 0.9639773368835449
Honeywell International said on Friday that its profit rose 10 percent in the third quarter as new products and emerging market growth offset weakness in Europe. The earnings topped Wall Street estimates, but revenue fell short of estimates. The company reduced its full-year revenue guidance and narrowed its earnings estimate. Honeywell’s chief executive, David M. Cote, said in a statement that he was encouraged by the commercial aerospace outlook, increasing spending on infrastructure and oil a
Article 2
Score: 0.9537493586540222
Tiffany & Company reported Thursday that its third-quarter net income fell about 30 percent, citing a higher-than-expected tax rate, economic weakness and high precious metal and diamond costs. The jewelry company’s results missed Wall Street’s expectations, and it cut its full-year earnings forecast. Its shares dropped 6.2 percent. For the quarter ended Oct. 31, Tiffany earned $63.2 million, or 49 cents a share, compared w

Article 1
Score: 0.9318841099739075
PHOENIX — Ike Davis had four hits and became the ninth player in Mets history to hit three home runs in a game. But even that was not enough for the slumping Mets, who lost to the Arizona Diamondbacks, 6-3, on Saturday at Chase Field. “A huge night for him,” Manager Terry Collins said of Davis. “We didn’t give him any help.” Davis led off the second inning and crushed a ball to straightaway center field. In the fourth, he pounded another to roughly the same spot. Davis then led off the sixth and
Article 2
Score: 0.9146016836166382
Josh Beckett had five strikeouts and one confrontation with an umpire in six sharp innings and David Ortiz homered and had three runs batted in to lead the Boston Red Sox to an 11-2 romp over the host Minnesota Twins on Tuesday night. Beckett (2-2) allowed two runs and five hits and Mike Aviles went 4 for 5 with a home run and two R.B.I. for the Red Sox, who had no trouble with this big lead after squandering a nine-run adv

Article 1
Score: 0.9715853333473206
Dr. Marisa Rory Mendel, the daughter of Ilene S. Mendel and Dr. Herbert E. Mendel of Fayetteville, N.Y., was married Saturday evening in Syracuse to Joshua Michael Abraham, a son of Harriet Sostman Abraham and Dr. Jerrold L. Abraham, also of Fayetteville. Rabbi Charles S. Sherman performed the ceremony at Temple Adath Yeshurun. The bride, 28, will continue to use her name professionally. She is a psychiatry resident at Brigham and Women’s Hospital and at Beth Israel Deaconess Medical Center, bot
Article 2
Score: 0.9607828259468079
Dr. Alisse Katherine Hauspurg and Dr. Adam James Janicki were married Saturday at Saltwater Farm Vineyard in Stonington, Conn. The ceremony was led by Jim R. Moss, an uncle of the bridegroom, who became a Universal Life Minister for the event. The bride, 26, and the bridegroom, 25, met at the University of Pennsylvania, from which each graduated magna cum laude. The bride will continue to use her name professionally. Later 

Article 1
Score: 0.9846483469009399
Dirk Nowitzki scored 16 points, driving past Kevin Garnett for the go-ahead layup with 5.1 seconds left and leading the Dallas Mavericks to a 90-85 victory over the host Boston Celtics on Wednesday night. Dallas led most of the game, but Paul Pierce hit a 3-pointer — just his second basket of the night — to tie it at 85-85 with 25 seconds to play. Rajon Rondo scored 24 points for Boston, and Garnett had 16 points with 10 rebounds. NUGGETS 123, NETS 115 Danilo Gallinari scored 22 points and Arron
Article 2
Score: 0.9826453924179077
Danilo Gallinari scored a season-high 28 points and outplayed Tim Duncan down the stretch as the host Denver Nuggets beat the San Antonio Spurs, 112-106, on Tuesday night. Duncan had 31 points, also a season high, and 18 rebounds for the Spurs, who cut a 15-point lead to 3 with 3 minutes 38 seconds left before the Nuggets closed it out for their third straight win. Manu Ginobili scored 16 points for the Spurs, who have lost

Article 1
Score: 0.8950509428977966
‘DARKNESSE VISIBLE’ Inon Barnatan, pianist. Avie AV2256; CD. EACH of the pieces on “Darknesse Visible,” a new recording by the brilliant pianist Inon Barnatan, was inspired by a literary work. The thoughtful program is typical for this insightful musician. But Mr. Barnatan’s extraordinary playing is what makes the release so rewarding. The three pieces of Ravel’s “Gaspard de la Nuit” (which roughly translates as “Treasurer of the Night”) take their titles from poems by Aloysius Bertrand. In this
Article 2
Score: 0.8950509428977966
‘DARKNESSE VISIBLE’ Inon Barnatan, pianist. Avie AV2256; CD. EACH of the pieces on “Darknesse Visible,” a new recording by the brilliant pianist Inon Barnatan, was inspired by a literary work. The thoughtful program is typical for this insightful musician. But Mr. Barnatan’s extraordinary playing is what makes the release so rewarding. The three pieces of Ravel’s “Gaspard de la Nuit” (which roughly translates as “Treasurer 

Tim Allen joined the Boston office of SapientNitro, part of Sapient, in a new post, creative director for experience design. He had been a creative director at R/GA, New York, part of the Interpublic Group of Companies. Association of Hispanic Advertising Agencies, McLean, Va., was renamed AHAA: The Voice of Hispanic Marketing as it expands its membership to include agencies that specialize in tasks aimed at Spanish-speaking consumers like public relations, direct response and promotions as well
Article 7
Score: 0.8255565762519836
Kate Bittman joined the New York office of Rodale as associate director for communications at Men’s Health magazine. She had been public relations manager at The New Yorker magazine, part of the Condé Nast Publications division of Advance Publications. James Brooks joined Forge Worldwide, Boston, as an interactive art director. He had been a senior designer at the Boston office of Digitas, part of the VivaKi division of the Publicis Groupe. Andrew Campbell an

TOPIC 15
Article 1
Score: 0.89580899477005
A New York City police officer accused of providing protected law enforcement information to the head of a drug-dealing organization in Jamaica, Queens, was indicted on Friday by a grand jury in Federal District Court in Brooklyn. Police Officer Devon Daniels was charged with four counts of accessing a computer database without authorization and four counts of making false statements to federal agents during their investigation. He faces up to 10 years in prison on each of the computer charges a
Article 2
Score: 0.8695391416549683
A former New York City police officer was sentenced on Thursday to more than three years in prison in a gun smuggling case that ensnared active-duty and other former members of the Police Department. The former officer, Joseph Trischitta, 43, of Staten Island, was sentenced in Federal District Court in Manhattan to 40 months in prison. He pleaded guilty in February to conspiracy to transport firearms between states, 

Article 1
Score: 0.9498709440231323
EMIGRATION CANYON, UTAH WHAT: A two-bedroom, one-bath house HOW MUCH: $342,500 SIZE: 1,008 square feet PRICE PER SQUARE FOOT: $339.78 SETTING: Emigration Canyon is an unincorporated community of 1,600 people nestled in the Wasatch Mountains, about eight miles from downtown Salt Lake City. This airy remodeled house is one of several along Emigration Canyon Road, the main route winding through the canyon. Nearby parks and nature areas include Red Butte Canyon and Wasatch Mountain State Park. INSID
Article 2
Score: 0.9458158016204834
MINNEAPOLIS WHAT: A five-bedroom house with four baths HOW MUCH: $1,995,000 SIZE: 5,008 square feet PRICE PER SQUARE FOOT: $398.36 SETTING: This house is in a neighborhood called Kenwood, set between Cedar Lake and Lake of the Isles, two lakes in a section of Minneapolis called the Chain of Lakes. Hockey and ice-skating are popular during the winter; canoeing, swimming and fishing take over during the summer. Kenwood is kno

In [60]:
lda_topics = ['Middle East Violence', 'Op-Ed (personal stories)', 'Energy', 'Telephone polling results', 'Finance', 'Bridge Card Game', 'Baseball', 'Culinary', 'Marriages', 'Technology', 'Basketball', 'Random', 'Art', 'Business Acquisitions/Hires', 'International Economics', 'Crime', 'International Politics', 'Real Estate Sales']

In [58]:
topic_words = []
for i in range(0,17):
    topic_words.append(lda.print_topic(i, topn=20))

In [77]:
for topic_list in zip(lda_topics, topic_words):
    print(topic_list[0])
    for word in topic_list[1].split('+'):
        word_weight = word.strip().split('*')
        print('{}\t{}'. format(int(1000* float(word_weight[0])), word_weight[1].strip('"')))
    print('\n')

    

Middle East Violence
10	said
4	government
4	people
3	military
3	united
3	state
3	country
2	officials
2	security
2	killed
2	american
2	police
2	war
2	attack
2	islamic
2	group
2	forces
2	islamic state
2	syria
2	attacks


Op-Ed (personal stories)
14	said
7	like
6	ms
5	people
5	just
4	time
4	don
3	know
3	years
3	think
3	way
3	life
3	women
3	day
3	going
3	really
3	want
3	did
2	work
2	ve


Energy
9	said
5	united
5	states
4	united states
3	north
3	american
2	water
2	officials
2	climate
2	nuclear
2	power
2	agency
2	energy
2	air
2	administration
2	change
1	oil
1	korea
1	safety
1	gas


Telephone polling results
9	said
5	state
4	new
3	law
3	people
3	federal
3	mr
3	tax
3	house
3	health
2	care
2	senate
2	republican
2	states
2	administration
2	republicans
2	public
2	president
2	city
2	year


Finance
11	percent
7	said
6	year
6	company
5	million
4	companies
4	market
3	billion
2	business
2	years
2	according
2	new
2	chief
2	growth
2	investors
2	rate
2	price
2	sales
2	executive
2	prices


Bridge Card Gam

In [54]:
all_years = [avg_weights_2011, avg_weights_2012, avg_weights_2013, avg_weights_2014, avg_weights_2015, avg_weights_2016, avg_weights_2017]
all_years_np = np.array([np.array(x) for x in all_years])

In [81]:
# import seaborn as sns
# import matplotlib.cm as cm
# import matplotlib.style
# NUM_COLORS = 18
# sns.reset_orig()  # get default matplotlib styles back
# # matplotlib.style.use('seaborn')

# clrs = sns.color_palette('Set3', n_colors=NUM_COLORS, desat=.8)  # a list of RGB tuples
# plt.figure(figsize=(15,8))
# for idx, topic in enumerate(all_years_np.T):
#     plt.plot(topic, label=lda_topics[idx], color=clrs[idx])
# #     plt.plot(topic, label=idx)
# plt.xticks(range(7), [2011, 2012, 2013, 2014, 2015, 2016, 2017])
# plt.legend()

In [82]:
for i in range(2011,2018):
    print(i, len(raw_df[raw_df['year'] == i]['article']))

2011 36103
2012 36057
2013 34795
2014 40790
2015 42086
2016 42071
2017 36117


In [2]:
len(raw_df)

NameError: name 'raw_df' is not defined

In [85]:
dicts_2011[2]

{0: 0.139603,
 2: 0.18121177,
 3: 0.025287978,
 4: 0.03069319,
 5: 0.016031202,
 7: 0.285238,
 10: 0.012106614,
 13: 0.030912641,
 14: 0.06993339,
 16: 0.1601656}