In [1]:
import gensim
import logging

# Logging code taken from http://rare-technologies.com/word2vec-tutorial/
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# Loading & Pre-processing Data

In [2]:
from nltk.tokenize import RegexpTokenizer # tokenizing
from nltk.corpus import stopwords  # list of stop words
from nltk.stem.wordnet import WordNetLemmatizer # lemmatizer

In [None]:
## https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/deepir.ipynb
import re

contractions = re.compile(r"'|-|\"")
# all non alphanumeric
symbols = re.compile(r'(\W+)', re.U)
# single character removal
singles = re.compile(r'(\s\S\s)', re.I|re.U)
# separators (any whitespace)
seps = re.compile(r'\s+')

# cleaner (order matters)
def clean(text): 
    text = text.lower()
    text = contractions.sub('', text)
    text = symbols.sub(r' \1 ', text)
    text = singles.sub(' ', text)
    text = seps.sub(' ', text)
    return text

# sentence splitter
alteos = re.compile(r'([!\?])')
def sentences(l):
    l = alteos.sub(r' \1 .', l).rstrip("(\.)*\n")
    return l.split(".")

In [3]:
tokenizer = RegexpTokenizer(r'\w+') # tokens separated by white space -- very naive, fist pass 
stops = set(stopwords.words('english')) # list of english stop words
lemma = WordNetLemmatizer()

def clean(title, rmv_stop_words=False):
    tokens = tokenizer.tokenize(title.lower())  # TODO: smarter tokenizer to allow for code
    # spell check
    # phrase detection
    # entity detection
    if rmv_stop_words:
        tokens = [i for i in tokens if not i in stops] # remove stop words
    normalized = [lemma.lemmatize(token) for token in tokens] # lemma
    return normalized

In [4]:
path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file = 'posts_titles_50M.txt'
corpus = []

# compile and pre-process corpus serially
with open(path+file, 'r') as f:
    for line in f:
        corpus.append(clean(line))

In [16]:
print(len(corpus))
print(corpus[200])

17565207
['csv', 'file', 'import', 'in', 'net']


In [None]:
#dictionary = gensim.corpora.Dictionary(corpus)
#print(len(dictionary))
#dictionary.filter_extremes(no_below=2)#no_above = .5) # can play with this
#len(dictionary)

# Model Training

In [None]:
model_nostop = gensim.models.word2vec.Word2Vec(corpus_nostop, sg=1, size=200, window=5, min_count=2)

In [8]:
num_doc='50M'
sg=1
size=250
window=5
min_count=3
model_full = gensim.models.word2vec.Word2Vec(corpus, sg=sg, size=size, window=window, min_count=min_count)

2018-01-18 10:12:34,471 : INFO : collecting all words and their counts
2018-01-18 10:12:34,472 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2018-01-18 10:12:34,517 : INFO : PROGRESS: at sentence #10000, processed 87251 words, keeping 7064 word types
2018-01-18 10:12:34,564 : INFO : PROGRESS: at sentence #20000, processed 178244 words, keeping 10377 word types
2018-01-18 10:12:34,611 : INFO : PROGRESS: at sentence #30000, processed 268294 words, keeping 12855 word types
2018-01-18 10:12:34,663 : INFO : PROGRESS: at sentence #40000, processed 357470 words, keeping 14950 word types
2018-01-18 10:12:34,730 : INFO : PROGRESS: at sentence #50000, processed 444951 words, keeping 16862 word types
2018-01-18 10:12:34,779 : INFO : PROGRESS: at sentence #60000, processed 531723 words, keeping 18607 word types
2018-01-18 10:12:34,824 : INFO : PROGRESS: at sentence #70000, processed 619805 words, keeping 20183 word types
2018-01-18 10:12:34,869 : INFO : PROGRESS: at se

2018-01-18 10:12:38,211 : INFO : PROGRESS: at sentence #710000, processed 6080504 words, keeping 78585 word types
2018-01-18 10:12:38,272 : INFO : PROGRESS: at sentence #720000, processed 6164893 words, keeping 79263 word types
2018-01-18 10:12:38,333 : INFO : PROGRESS: at sentence #730000, processed 6249699 words, keeping 79909 word types
2018-01-18 10:12:38,414 : INFO : PROGRESS: at sentence #740000, processed 6334495 words, keeping 80596 word types
2018-01-18 10:12:38,478 : INFO : PROGRESS: at sentence #750000, processed 6419771 words, keeping 81260 word types
2018-01-18 10:12:38,535 : INFO : PROGRESS: at sentence #760000, processed 6504866 words, keeping 81904 word types
2018-01-18 10:12:38,611 : INFO : PROGRESS: at sentence #770000, processed 6588967 words, keeping 82578 word types
2018-01-18 10:12:38,663 : INFO : PROGRESS: at sentence #780000, processed 6673573 words, keeping 83240 word types
2018-01-18 10:12:38,710 : INFO : PROGRESS: at sentence #790000, processed 6758292 words,

2018-01-18 10:12:43,277 : INFO : PROGRESS: at sentence #1420000, processed 12068791 words, keeping 121793 word types
2018-01-18 10:12:43,324 : INFO : PROGRESS: at sentence #1430000, processed 12151939 words, keeping 122317 word types
2018-01-18 10:12:43,373 : INFO : PROGRESS: at sentence #1440000, processed 12235349 words, keeping 122851 word types
2018-01-18 10:12:43,422 : INFO : PROGRESS: at sentence #1450000, processed 12318336 words, keeping 123400 word types
2018-01-18 10:12:43,474 : INFO : PROGRESS: at sentence #1460000, processed 12402604 words, keeping 123917 word types
2018-01-18 10:12:43,525 : INFO : PROGRESS: at sentence #1470000, processed 12484821 words, keeping 124491 word types
2018-01-18 10:12:43,585 : INFO : PROGRESS: at sentence #1480000, processed 12568217 words, keeping 125043 word types
2018-01-18 10:12:43,638 : INFO : PROGRESS: at sentence #1490000, processed 12651007 words, keeping 125578 word types
2018-01-18 10:12:43,680 : INFO : PROGRESS: at sentence #1500000,

2018-01-18 10:12:47,082 : INFO : PROGRESS: at sentence #2130000, processed 18085536 words, keeping 158685 word types
2018-01-18 10:12:47,141 : INFO : PROGRESS: at sentence #2140000, processed 18169842 words, keeping 159161 word types
2018-01-18 10:12:47,217 : INFO : PROGRESS: at sentence #2150000, processed 18255896 words, keeping 159695 word types
2018-01-18 10:12:47,271 : INFO : PROGRESS: at sentence #2160000, processed 18341502 words, keeping 160198 word types
2018-01-18 10:12:47,334 : INFO : PROGRESS: at sentence #2170000, processed 18427481 words, keeping 160706 word types
2018-01-18 10:12:47,379 : INFO : PROGRESS: at sentence #2180000, processed 18512852 words, keeping 161238 word types
2018-01-18 10:12:47,424 : INFO : PROGRESS: at sentence #2190000, processed 18598625 words, keeping 161697 word types
2018-01-18 10:12:47,476 : INFO : PROGRESS: at sentence #2200000, processed 18683888 words, keeping 162198 word types
2018-01-18 10:12:47,536 : INFO : PROGRESS: at sentence #2210000,

2018-01-18 10:12:50,764 : INFO : PROGRESS: at sentence #2840000, processed 24158500 words, keeping 192712 word types
2018-01-18 10:12:50,815 : INFO : PROGRESS: at sentence #2850000, processed 24243117 words, keeping 193228 word types
2018-01-18 10:12:50,872 : INFO : PROGRESS: at sentence #2860000, processed 24328944 words, keeping 193725 word types
2018-01-18 10:12:50,920 : INFO : PROGRESS: at sentence #2870000, processed 24414681 words, keeping 194189 word types
2018-01-18 10:12:50,972 : INFO : PROGRESS: at sentence #2880000, processed 24499709 words, keeping 194683 word types
2018-01-18 10:12:51,018 : INFO : PROGRESS: at sentence #2890000, processed 24584538 words, keeping 195148 word types
2018-01-18 10:12:51,067 : INFO : PROGRESS: at sentence #2900000, processed 24670374 words, keeping 195561 word types
2018-01-18 10:12:51,116 : INFO : PROGRESS: at sentence #2910000, processed 24754918 words, keeping 195995 word types
2018-01-18 10:12:51,163 : INFO : PROGRESS: at sentence #2920000,

2018-01-18 10:12:54,908 : INFO : PROGRESS: at sentence #3550000, processed 30224236 words, keeping 213830 word types
2018-01-18 10:12:54,959 : INFO : PROGRESS: at sentence #3560000, processed 30307811 words, keeping 213830 word types
2018-01-18 10:12:55,009 : INFO : PROGRESS: at sentence #3570000, processed 30392288 words, keeping 213830 word types
2018-01-18 10:12:55,075 : INFO : PROGRESS: at sentence #3580000, processed 30476806 words, keeping 213830 word types
2018-01-18 10:12:55,131 : INFO : PROGRESS: at sentence #3590000, processed 30561519 words, keeping 213830 word types
2018-01-18 10:12:55,177 : INFO : PROGRESS: at sentence #3600000, processed 30646247 words, keeping 213830 word types
2018-01-18 10:12:55,231 : INFO : PROGRESS: at sentence #3610000, processed 30730479 words, keeping 213830 word types
2018-01-18 10:12:55,285 : INFO : PROGRESS: at sentence #3620000, processed 30814630 words, keeping 213830 word types
2018-01-18 10:12:55,343 : INFO : PROGRESS: at sentence #3630000,

2018-01-18 10:12:59,417 : INFO : PROGRESS: at sentence #4260000, processed 36263046 words, keeping 213830 word types
2018-01-18 10:12:59,469 : INFO : PROGRESS: at sentence #4270000, processed 36348174 words, keeping 213830 word types
2018-01-18 10:12:59,520 : INFO : PROGRESS: at sentence #4280000, processed 36432178 words, keeping 213830 word types
2018-01-18 10:12:59,573 : INFO : PROGRESS: at sentence #4290000, processed 36517345 words, keeping 213830 word types
2018-01-18 10:12:59,623 : INFO : PROGRESS: at sentence #4300000, processed 36601491 words, keeping 213830 word types
2018-01-18 10:12:59,682 : INFO : PROGRESS: at sentence #4310000, processed 36685749 words, keeping 213830 word types
2018-01-18 10:12:59,738 : INFO : PROGRESS: at sentence #4320000, processed 36770312 words, keeping 213830 word types
2018-01-18 10:12:59,793 : INFO : PROGRESS: at sentence #4330000, processed 36854142 words, keeping 213830 word types
2018-01-18 10:12:59,848 : INFO : PROGRESS: at sentence #4340000,

2018-01-18 10:13:04,006 : INFO : PROGRESS: at sentence #4970000, processed 42224598 words, keeping 213830 word types
2018-01-18 10:13:04,075 : INFO : PROGRESS: at sentence #4980000, processed 42308406 words, keeping 213830 word types
2018-01-18 10:13:04,145 : INFO : PROGRESS: at sentence #4990000, processed 42392760 words, keeping 213830 word types
2018-01-18 10:13:04,223 : INFO : PROGRESS: at sentence #5000000, processed 42477875 words, keeping 213830 word types
2018-01-18 10:13:04,300 : INFO : PROGRESS: at sentence #5010000, processed 42562960 words, keeping 213830 word types
2018-01-18 10:13:04,375 : INFO : PROGRESS: at sentence #5020000, processed 42647763 words, keeping 213830 word types
2018-01-18 10:13:04,435 : INFO : PROGRESS: at sentence #5030000, processed 42731705 words, keeping 213830 word types
2018-01-18 10:13:04,516 : INFO : PROGRESS: at sentence #5040000, processed 42816214 words, keeping 213830 word types
2018-01-18 10:13:04,597 : INFO : PROGRESS: at sentence #5050000,

2018-01-18 10:13:08,475 : INFO : PROGRESS: at sentence #5680000, processed 48288767 words, keeping 213830 word types
2018-01-18 10:13:08,533 : INFO : PROGRESS: at sentence #5690000, processed 48374585 words, keeping 213830 word types
2018-01-18 10:13:08,597 : INFO : PROGRESS: at sentence #5700000, processed 48460548 words, keeping 213830 word types
2018-01-18 10:13:08,657 : INFO : PROGRESS: at sentence #5710000, processed 48546797 words, keeping 213830 word types
2018-01-18 10:13:08,715 : INFO : PROGRESS: at sentence #5720000, processed 48632019 words, keeping 213830 word types
2018-01-18 10:13:08,769 : INFO : PROGRESS: at sentence #5730000, processed 48717477 words, keeping 213830 word types
2018-01-18 10:13:08,823 : INFO : PROGRESS: at sentence #5740000, processed 48803140 words, keeping 213830 word types
2018-01-18 10:13:08,875 : INFO : PROGRESS: at sentence #5750000, processed 48887203 words, keeping 213830 word types
2018-01-18 10:13:08,926 : INFO : PROGRESS: at sentence #5760000,

2018-01-18 10:13:12,471 : INFO : PROGRESS: at sentence #6390000, processed 54341337 words, keeping 213830 word types
2018-01-18 10:13:12,531 : INFO : PROGRESS: at sentence #6400000, processed 54425178 words, keeping 213830 word types
2018-01-18 10:13:12,590 : INFO : PROGRESS: at sentence #6410000, processed 54509664 words, keeping 213830 word types
2018-01-18 10:13:12,648 : INFO : PROGRESS: at sentence #6420000, processed 54594298 words, keeping 213830 word types
2018-01-18 10:13:12,711 : INFO : PROGRESS: at sentence #6430000, processed 54678323 words, keeping 213830 word types
2018-01-18 10:13:12,771 : INFO : PROGRESS: at sentence #6440000, processed 54763432 words, keeping 213830 word types
2018-01-18 10:13:12,833 : INFO : PROGRESS: at sentence #6450000, processed 54848398 words, keeping 213830 word types
2018-01-18 10:13:12,889 : INFO : PROGRESS: at sentence #6460000, processed 54932994 words, keeping 213830 word types
2018-01-18 10:13:12,952 : INFO : PROGRESS: at sentence #6470000,

2018-01-18 10:13:16,642 : INFO : PROGRESS: at sentence #7100000, processed 60360943 words, keeping 236221 word types
2018-01-18 10:13:16,703 : INFO : PROGRESS: at sentence #7110000, processed 60446982 words, keeping 236710 word types
2018-01-18 10:13:16,758 : INFO : PROGRESS: at sentence #7120000, processed 60531800 words, keeping 237152 word types
2018-01-18 10:13:16,811 : INFO : PROGRESS: at sentence #7130000, processed 60616045 words, keeping 237577 word types
2018-01-18 10:13:16,874 : INFO : PROGRESS: at sentence #7140000, processed 60700686 words, keeping 238027 word types
2018-01-18 10:13:16,938 : INFO : PROGRESS: at sentence #7150000, processed 60787138 words, keeping 238489 word types
2018-01-18 10:13:16,986 : INFO : PROGRESS: at sentence #7160000, processed 60871234 words, keeping 238849 word types
2018-01-18 10:13:17,053 : INFO : PROGRESS: at sentence #7170000, processed 60956077 words, keeping 239318 word types
2018-01-18 10:13:17,118 : INFO : PROGRESS: at sentence #7180000,

2018-01-18 10:13:20,781 : INFO : PROGRESS: at sentence #7810000, processed 66393369 words, keeping 266604 word types
2018-01-18 10:13:20,854 : INFO : PROGRESS: at sentence #7820000, processed 66478370 words, keeping 267027 word types
2018-01-18 10:13:20,915 : INFO : PROGRESS: at sentence #7830000, processed 66562919 words, keeping 267462 word types
2018-01-18 10:13:20,974 : INFO : PROGRESS: at sentence #7840000, processed 66646796 words, keeping 267853 word types
2018-01-18 10:13:21,039 : INFO : PROGRESS: at sentence #7850000, processed 66732168 words, keeping 268288 word types
2018-01-18 10:13:21,096 : INFO : PROGRESS: at sentence #7860000, processed 66819316 words, keeping 268685 word types
2018-01-18 10:13:21,157 : INFO : PROGRESS: at sentence #7870000, processed 66906399 words, keeping 269146 word types
2018-01-18 10:13:21,220 : INFO : PROGRESS: at sentence #7880000, processed 66992310 words, keeping 269597 word types
2018-01-18 10:13:21,280 : INFO : PROGRESS: at sentence #7890000,

2018-01-18 10:13:24,795 : INFO : PROGRESS: at sentence #8520000, processed 72485408 words, keeping 295728 word types
2018-01-18 10:13:24,845 : INFO : PROGRESS: at sentence #8530000, processed 72572066 words, keeping 296134 word types
2018-01-18 10:13:24,894 : INFO : PROGRESS: at sentence #8540000, processed 72657369 words, keeping 296559 word types
2018-01-18 10:13:24,948 : INFO : PROGRESS: at sentence #8550000, processed 72742327 words, keeping 296948 word types
2018-01-18 10:13:25,004 : INFO : PROGRESS: at sentence #8560000, processed 72828384 words, keeping 297407 word types
2018-01-18 10:13:25,058 : INFO : PROGRESS: at sentence #8570000, processed 72914920 words, keeping 297796 word types
2018-01-18 10:13:25,118 : INFO : PROGRESS: at sentence #8580000, processed 73000332 words, keeping 298202 word types
2018-01-18 10:13:25,176 : INFO : PROGRESS: at sentence #8590000, processed 73086749 words, keeping 298657 word types
2018-01-18 10:13:25,228 : INFO : PROGRESS: at sentence #8600000,

2018-01-18 10:13:28,724 : INFO : PROGRESS: at sentence #9230000, processed 78593188 words, keeping 324760 word types
2018-01-18 10:13:28,798 : INFO : PROGRESS: at sentence #9240000, processed 78679014 words, keeping 325139 word types
2018-01-18 10:13:28,865 : INFO : PROGRESS: at sentence #9250000, processed 78764841 words, keeping 325558 word types
2018-01-18 10:13:28,937 : INFO : PROGRESS: at sentence #9260000, processed 78851348 words, keeping 325905 word types
2018-01-18 10:13:28,995 : INFO : PROGRESS: at sentence #9270000, processed 78938524 words, keeping 326315 word types
2018-01-18 10:13:29,072 : INFO : PROGRESS: at sentence #9280000, processed 79024383 words, keeping 326711 word types
2018-01-18 10:13:29,126 : INFO : PROGRESS: at sentence #9290000, processed 79110382 words, keeping 327107 word types
2018-01-18 10:13:29,195 : INFO : PROGRESS: at sentence #9300000, processed 79196535 words, keeping 327482 word types
2018-01-18 10:13:29,250 : INFO : PROGRESS: at sentence #9310000,

2018-01-18 10:13:32,864 : INFO : PROGRESS: at sentence #9940000, processed 84712758 words, keeping 352054 word types
2018-01-18 10:13:32,927 : INFO : PROGRESS: at sentence #9950000, processed 84799873 words, keeping 352438 word types
2018-01-18 10:13:32,984 : INFO : PROGRESS: at sentence #9960000, processed 84884837 words, keeping 352793 word types
2018-01-18 10:13:33,046 : INFO : PROGRESS: at sentence #9970000, processed 84969911 words, keeping 353213 word types
2018-01-18 10:13:33,099 : INFO : PROGRESS: at sentence #9980000, processed 85055741 words, keeping 353587 word types
2018-01-18 10:13:33,149 : INFO : PROGRESS: at sentence #9990000, processed 85142305 words, keeping 353974 word types
2018-01-18 10:13:33,206 : INFO : PROGRESS: at sentence #10000000, processed 85228942 words, keeping 354354 word types
2018-01-18 10:13:33,259 : INFO : PROGRESS: at sentence #10010000, processed 85313648 words, keeping 354726 word types
2018-01-18 10:13:33,322 : INFO : PROGRESS: at sentence #100200

2018-01-18 10:13:36,871 : INFO : PROGRESS: at sentence #10640000, processed 90760271 words, keeping 371099 word types
2018-01-18 10:13:36,933 : INFO : PROGRESS: at sentence #10650000, processed 90845285 words, keeping 371099 word types
2018-01-18 10:13:36,987 : INFO : PROGRESS: at sentence #10660000, processed 90930504 words, keeping 371099 word types
2018-01-18 10:13:37,044 : INFO : PROGRESS: at sentence #10670000, processed 91015328 words, keeping 371099 word types
2018-01-18 10:13:37,097 : INFO : PROGRESS: at sentence #10680000, processed 91100569 words, keeping 371099 word types
2018-01-18 10:13:37,156 : INFO : PROGRESS: at sentence #10690000, processed 91183820 words, keeping 371099 word types
2018-01-18 10:13:37,289 : INFO : PROGRESS: at sentence #10700000, processed 91268326 words, keeping 371099 word types
2018-01-18 10:13:37,385 : INFO : PROGRESS: at sentence #10710000, processed 91352766 words, keeping 371099 word types
2018-01-18 10:13:37,452 : INFO : PROGRESS: at sentence #

2018-01-18 10:13:41,500 : INFO : PROGRESS: at sentence #11340000, processed 96713260 words, keeping 371099 word types
2018-01-18 10:13:41,564 : INFO : PROGRESS: at sentence #11350000, processed 96798025 words, keeping 371099 word types
2018-01-18 10:13:41,737 : INFO : PROGRESS: at sentence #11360000, processed 96882943 words, keeping 371099 word types
2018-01-18 10:13:41,805 : INFO : PROGRESS: at sentence #11370000, processed 96968088 words, keeping 371099 word types
2018-01-18 10:13:41,860 : INFO : PROGRESS: at sentence #11380000, processed 97052860 words, keeping 371099 word types
2018-01-18 10:13:41,915 : INFO : PROGRESS: at sentence #11390000, processed 97138907 words, keeping 371099 word types
2018-01-18 10:13:41,974 : INFO : PROGRESS: at sentence #11400000, processed 97223794 words, keeping 371099 word types
2018-01-18 10:13:42,025 : INFO : PROGRESS: at sentence #11410000, processed 97308227 words, keeping 371099 word types
2018-01-18 10:13:42,084 : INFO : PROGRESS: at sentence #

2018-01-18 10:13:46,819 : INFO : PROGRESS: at sentence #12040000, processed 102594249 words, keeping 371099 word types
2018-01-18 10:13:46,936 : INFO : PROGRESS: at sentence #12050000, processed 102678092 words, keeping 371099 word types
2018-01-18 10:13:47,048 : INFO : PROGRESS: at sentence #12060000, processed 102762708 words, keeping 371099 word types
2018-01-18 10:13:47,114 : INFO : PROGRESS: at sentence #12070000, processed 102846785 words, keeping 371099 word types
2018-01-18 10:13:47,182 : INFO : PROGRESS: at sentence #12080000, processed 102931472 words, keeping 371099 word types
2018-01-18 10:13:47,347 : INFO : PROGRESS: at sentence #12090000, processed 103015647 words, keeping 371099 word types
2018-01-18 10:13:47,445 : INFO : PROGRESS: at sentence #12100000, processed 103100601 words, keeping 371099 word types
2018-01-18 10:13:47,520 : INFO : PROGRESS: at sentence #12110000, processed 103184742 words, keeping 371099 word types
2018-01-18 10:13:47,609 : INFO : PROGRESS: at se

2018-01-18 10:13:52,401 : INFO : PROGRESS: at sentence #12730000, processed 108475543 words, keeping 371099 word types
2018-01-18 10:13:52,477 : INFO : PROGRESS: at sentence #12740000, processed 108561307 words, keeping 371099 word types
2018-01-18 10:13:52,601 : INFO : PROGRESS: at sentence #12750000, processed 108646520 words, keeping 371099 word types
2018-01-18 10:13:52,675 : INFO : PROGRESS: at sentence #12760000, processed 108733560 words, keeping 371099 word types
2018-01-18 10:13:52,755 : INFO : PROGRESS: at sentence #12770000, processed 108818406 words, keeping 371099 word types
2018-01-18 10:13:52,831 : INFO : PROGRESS: at sentence #12780000, processed 108904930 words, keeping 371099 word types
2018-01-18 10:13:52,902 : INFO : PROGRESS: at sentence #12790000, processed 108991479 words, keeping 371099 word types
2018-01-18 10:13:52,976 : INFO : PROGRESS: at sentence #12800000, processed 109077323 words, keeping 371099 word types
2018-01-18 10:13:53,051 : INFO : PROGRESS: at se

2018-01-18 10:13:57,138 : INFO : PROGRESS: at sentence #13420000, processed 114366744 words, keeping 371099 word types
2018-01-18 10:13:57,197 : INFO : PROGRESS: at sentence #13430000, processed 114451761 words, keeping 371099 word types
2018-01-18 10:13:57,262 : INFO : PROGRESS: at sentence #13440000, processed 114536539 words, keeping 371099 word types
2018-01-18 10:13:57,354 : INFO : PROGRESS: at sentence #13450000, processed 114621977 words, keeping 371099 word types
2018-01-18 10:13:57,423 : INFO : PROGRESS: at sentence #13460000, processed 114706458 words, keeping 371099 word types
2018-01-18 10:13:57,505 : INFO : PROGRESS: at sentence #13470000, processed 114791040 words, keeping 371099 word types
2018-01-18 10:13:57,598 : INFO : PROGRESS: at sentence #13480000, processed 114876340 words, keeping 371099 word types
2018-01-18 10:13:57,677 : INFO : PROGRESS: at sentence #13490000, processed 114961869 words, keeping 371099 word types
2018-01-18 10:13:57,756 : INFO : PROGRESS: at se

2018-01-18 10:14:02,796 : INFO : PROGRESS: at sentence #14110000, processed 120222529 words, keeping 371099 word types
2018-01-18 10:14:02,871 : INFO : PROGRESS: at sentence #14120000, processed 120306361 words, keeping 371099 word types
2018-01-18 10:14:02,949 : INFO : PROGRESS: at sentence #14130000, processed 120391479 words, keeping 371099 word types
2018-01-18 10:14:03,047 : INFO : PROGRESS: at sentence #14140000, processed 120476501 words, keeping 371099 word types
2018-01-18 10:14:03,163 : INFO : PROGRESS: at sentence #14150000, processed 120560896 words, keeping 371099 word types
2018-01-18 10:14:03,293 : INFO : PROGRESS: at sentence #14160000, processed 120645018 words, keeping 371099 word types
2018-01-18 10:14:03,414 : INFO : PROGRESS: at sentence #14170000, processed 120729626 words, keeping 371099 word types
2018-01-18 10:14:03,510 : INFO : PROGRESS: at sentence #14180000, processed 120814949 words, keeping 371099 word types
2018-01-18 10:14:03,609 : INFO : PROGRESS: at se

2018-01-18 10:14:08,323 : INFO : PROGRESS: at sentence #14800000, processed 126080861 words, keeping 371099 word types
2018-01-18 10:14:08,396 : INFO : PROGRESS: at sentence #14810000, processed 126165528 words, keeping 371099 word types
2018-01-18 10:14:08,464 : INFO : PROGRESS: at sentence #14820000, processed 126250794 words, keeping 371099 word types
2018-01-18 10:14:08,528 : INFO : PROGRESS: at sentence #14830000, processed 126336330 words, keeping 371099 word types
2018-01-18 10:14:08,606 : INFO : PROGRESS: at sentence #14840000, processed 126421328 words, keeping 371099 word types
2018-01-18 10:14:08,667 : INFO : PROGRESS: at sentence #14850000, processed 126505585 words, keeping 371099 word types
2018-01-18 10:14:08,736 : INFO : PROGRESS: at sentence #14860000, processed 126590870 words, keeping 371099 word types
2018-01-18 10:14:08,796 : INFO : PROGRESS: at sentence #14870000, processed 126675640 words, keeping 371099 word types
2018-01-18 10:14:08,858 : INFO : PROGRESS: at se

2018-01-18 10:14:14,241 : INFO : PROGRESS: at sentence #15490000, processed 131986624 words, keeping 371099 word types
2018-01-18 10:14:14,311 : INFO : PROGRESS: at sentence #15500000, processed 132072845 words, keeping 371099 word types
2018-01-18 10:14:14,374 : INFO : PROGRESS: at sentence #15510000, processed 132157895 words, keeping 371099 word types
2018-01-18 10:14:14,476 : INFO : PROGRESS: at sentence #15520000, processed 132243066 words, keeping 371099 word types
2018-01-18 10:14:14,575 : INFO : PROGRESS: at sentence #15530000, processed 132329350 words, keeping 371099 word types
2018-01-18 10:14:14,666 : INFO : PROGRESS: at sentence #15540000, processed 132415365 words, keeping 371099 word types
2018-01-18 10:14:14,965 : INFO : PROGRESS: at sentence #15550000, processed 132500795 words, keeping 371099 word types
2018-01-18 10:14:15,050 : INFO : PROGRESS: at sentence #15560000, processed 132586278 words, keeping 371099 word types
2018-01-18 10:14:15,119 : INFO : PROGRESS: at se

2018-01-18 10:14:20,869 : INFO : PROGRESS: at sentence #16180000, processed 137920115 words, keeping 371099 word types
2018-01-18 10:14:20,959 : INFO : PROGRESS: at sentence #16190000, processed 138006408 words, keeping 371099 word types
2018-01-18 10:14:21,033 : INFO : PROGRESS: at sentence #16200000, processed 138092273 words, keeping 371099 word types
2018-01-18 10:14:21,112 : INFO : PROGRESS: at sentence #16210000, processed 138178103 words, keeping 371099 word types
2018-01-18 10:14:21,202 : INFO : PROGRESS: at sentence #16220000, processed 138264726 words, keeping 371099 word types
2018-01-18 10:14:21,278 : INFO : PROGRESS: at sentence #16230000, processed 138351422 words, keeping 371099 word types
2018-01-18 10:14:21,368 : INFO : PROGRESS: at sentence #16240000, processed 138437802 words, keeping 371099 word types
2018-01-18 10:14:21,560 : INFO : PROGRESS: at sentence #16250000, processed 138522636 words, keeping 371099 word types
2018-01-18 10:14:21,660 : INFO : PROGRESS: at se

2018-01-18 10:14:27,020 : INFO : PROGRESS: at sentence #16870000, processed 143865887 words, keeping 371099 word types
2018-01-18 10:14:27,088 : INFO : PROGRESS: at sentence #16880000, processed 143952815 words, keeping 371099 word types
2018-01-18 10:14:27,158 : INFO : PROGRESS: at sentence #16890000, processed 144038733 words, keeping 371099 word types
2018-01-18 10:14:27,225 : INFO : PROGRESS: at sentence #16900000, processed 144124181 words, keeping 371099 word types
2018-01-18 10:14:27,302 : INFO : PROGRESS: at sentence #16910000, processed 144210325 words, keeping 371099 word types
2018-01-18 10:14:27,364 : INFO : PROGRESS: at sentence #16920000, processed 144297825 words, keeping 371099 word types
2018-01-18 10:14:27,431 : INFO : PROGRESS: at sentence #16930000, processed 144384643 words, keeping 371099 word types
2018-01-18 10:14:27,497 : INFO : PROGRESS: at sentence #16940000, processed 144470736 words, keeping 371099 word types
2018-01-18 10:14:27,566 : INFO : PROGRESS: at se

2018-01-18 10:14:34,018 : INFO : PROGRESS: at sentence #17560000, processed 149810434 words, keeping 371099 word types
2018-01-18 10:14:34,058 : INFO : collected 371099 word types from a corpus of 149855462 raw words and 17565207 sentences
2018-01-18 10:14:34,064 : INFO : Loading a fresh vocabulary
2018-01-18 10:14:37,109 : INFO : min_count=3 retains 244261 unique words (65% of original 371099, drops 126838)
2018-01-18 10:14:37,111 : INFO : min_count=3 leaves 149601786 word corpus (99% of original 149855462, drops 253676)
2018-01-18 10:14:38,020 : INFO : deleting the raw counts dictionary of 371099 items
2018-01-18 10:14:38,071 : INFO : sample=0.001 downsamples 45 most-common words
2018-01-18 10:14:38,072 : INFO : downsampling leaves estimated 120858757 word corpus (80.8% of prior 149601786)
2018-01-18 10:14:38,075 : INFO : estimated required memory for 244261 words and 250 dimensions: 610652500 bytes
2018-01-18 10:14:39,081 : INFO : resetting layer weights
2018-01-18 10:14:43,649 : IN

2018-01-18 10:15:56,793 : INFO : PROGRESS: at 2.99% examples, 246331 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:15:57,844 : INFO : PROGRESS: at 3.03% examples, 246441 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:15:58,876 : INFO : PROGRESS: at 3.07% examples, 246281 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:15:59,890 : INFO : PROGRESS: at 3.11% examples, 246292 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:16:00,927 : INFO : PROGRESS: at 3.16% examples, 246643 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:16:01,990 : INFO : PROGRESS: at 3.20% examples, 246697 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:16:03,017 : INFO : PROGRESS: at 3.25% examples, 246868 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:16:04,029 : INFO : PROGRESS: at 3.29% examples, 247076 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:16:05,048 : INFO : PROGRESS: at 3.33% examples, 246566 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:16:06,074 : INFO : PROGRESS: at 3.37% examples, 246833 words/s, in_q

2018-01-18 10:17:19,727 : INFO : PROGRESS: at 6.38% examples, 245984 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:20,762 : INFO : PROGRESS: at 6.42% examples, 246210 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:21,798 : INFO : PROGRESS: at 6.47% examples, 246433 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:22,834 : INFO : PROGRESS: at 6.52% examples, 246658 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:23,847 : INFO : PROGRESS: at 6.57% examples, 246861 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:24,860 : INFO : PROGRESS: at 6.61% examples, 247064 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:25,903 : INFO : PROGRESS: at 6.66% examples, 247267 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:17:26,913 : INFO : PROGRESS: at 6.71% examples, 247468 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:27,963 : INFO : PROGRESS: at 6.76% examples, 247660 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:17:28,997 : INFO : PROGRESS: at 6.80% examples, 247821 words/s, in_q

2018-01-18 10:18:42,719 : INFO : PROGRESS: at 9.94% examples, 250888 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:18:43,720 : INFO : PROGRESS: at 9.98% examples, 250648 words/s, in_qsize 6, out_qsize 1
2018-01-18 10:18:44,721 : INFO : PROGRESS: at 10.01% examples, 250577 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:18:45,883 : INFO : PROGRESS: at 10.05% examples, 250173 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:18:46,910 : INFO : PROGRESS: at 10.09% examples, 250111 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:18:47,941 : INFO : PROGRESS: at 10.13% examples, 250145 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:18:48,962 : INFO : PROGRESS: at 10.17% examples, 250090 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:18:49,977 : INFO : PROGRESS: at 10.21% examples, 250106 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:18:50,993 : INFO : PROGRESS: at 10.26% examples, 250221 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:18:52,104 : INFO : PROGRESS: at 10.29% examples, 249848 words

2018-01-18 10:20:05,195 : INFO : PROGRESS: at 13.07% examples, 245569 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:20:06,200 : INFO : PROGRESS: at 13.12% examples, 245652 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:20:07,209 : INFO : PROGRESS: at 13.16% examples, 245757 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:20:08,246 : INFO : PROGRESS: at 13.21% examples, 245867 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:20:09,250 : INFO : PROGRESS: at 13.26% examples, 245977 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:20:10,255 : INFO : PROGRESS: at 13.31% examples, 246060 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:20:11,262 : INFO : PROGRESS: at 13.35% examples, 246067 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:20:12,270 : INFO : PROGRESS: at 13.39% examples, 246147 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:20:13,272 : INFO : PROGRESS: at 13.44% examples, 246231 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:20:14,289 : INFO : PROGRESS: at 13.49% examples, 246278 wor

2018-01-18 10:21:26,803 : INFO : PROGRESS: at 16.61% examples, 248771 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:21:27,838 : INFO : PROGRESS: at 16.66% examples, 248853 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:21:28,891 : INFO : PROGRESS: at 16.71% examples, 248924 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:21:29,916 : INFO : PROGRESS: at 16.76% examples, 249011 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:21:30,917 : INFO : PROGRESS: at 16.80% examples, 249073 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:21:31,923 : INFO : PROGRESS: at 16.85% examples, 249151 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:21:32,959 : INFO : PROGRESS: at 16.90% examples, 249211 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:21:33,994 : INFO : PROGRESS: at 16.95% examples, 249291 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:21:35,045 : INFO : PROGRESS: at 17.00% examples, 249361 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:21:36,062 : INFO : PROGRESS: at 17.04% examples, 249431 wor

2018-01-18 10:22:48,436 : INFO : PROGRESS: at 20.26% examples, 252597 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:49,436 : INFO : PROGRESS: at 20.30% examples, 252688 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:50,442 : INFO : PROGRESS: at 20.35% examples, 252744 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:51,445 : INFO : PROGRESS: at 20.40% examples, 252785 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:52,486 : INFO : PROGRESS: at 20.45% examples, 252873 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:53,491 : INFO : PROGRESS: at 20.50% examples, 252961 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:22:54,517 : INFO : PROGRESS: at 20.55% examples, 253037 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:55,527 : INFO : PROGRESS: at 20.59% examples, 253087 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:56,539 : INFO : PROGRESS: at 20.64% examples, 253138 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:22:57,582 : INFO : PROGRESS: at 20.69% examples, 253188 wor

2018-01-18 10:24:10,324 : INFO : PROGRESS: at 23.84% examples, 254144 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:11,341 : INFO : PROGRESS: at 23.88% examples, 254154 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:12,357 : INFO : PROGRESS: at 23.93% examples, 254206 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:13,380 : INFO : PROGRESS: at 23.97% examples, 254241 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:14,423 : INFO : PROGRESS: at 24.02% examples, 254283 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:24:15,442 : INFO : PROGRESS: at 24.07% examples, 254321 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:24:16,472 : INFO : PROGRESS: at 24.12% examples, 254355 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:17,518 : INFO : PROGRESS: at 24.16% examples, 254340 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:18,531 : INFO : PROGRESS: at 24.19% examples, 254243 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:24:19,547 : INFO : PROGRESS: at 24.24% examples, 254256 wor

2018-01-18 10:25:32,061 : INFO : PROGRESS: at 27.48% examples, 255883 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:33,109 : INFO : PROGRESS: at 27.53% examples, 255919 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:34,113 : INFO : PROGRESS: at 27.57% examples, 255959 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:35,115 : INFO : PROGRESS: at 27.62% examples, 256010 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:36,143 : INFO : PROGRESS: at 27.67% examples, 256028 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:37,206 : INFO : PROGRESS: at 27.70% examples, 255919 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:38,223 : INFO : PROGRESS: at 27.74% examples, 255916 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:39,236 : INFO : PROGRESS: at 27.79% examples, 255963 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:40,255 : INFO : PROGRESS: at 27.84% examples, 256020 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:25:41,258 : INFO : PROGRESS: at 27.89% examples, 256071 wor

2018-01-18 10:26:53,830 : INFO : PROGRESS: at 31.19% examples, 258036 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:26:54,843 : INFO : PROGRESS: at 31.23% examples, 258053 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:26:55,896 : INFO : PROGRESS: at 31.28% examples, 258078 words/s, in_qsize 6, out_qsize 1
2018-01-18 10:26:56,959 : INFO : PROGRESS: at 31.32% examples, 258045 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:26:57,974 : INFO : PROGRESS: at 31.36% examples, 258028 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:26:58,976 : INFO : PROGRESS: at 31.40% examples, 257972 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:27:00,020 : INFO : PROGRESS: at 31.43% examples, 257880 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:27:01,039 : INFO : PROGRESS: at 31.47% examples, 257851 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:27:02,090 : INFO : PROGRESS: at 31.50% examples, 257746 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:27:03,110 : INFO : PROGRESS: at 31.54% examples, 257718 wor

2018-01-18 10:28:15,616 : INFO : PROGRESS: at 34.81% examples, 258978 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:28:16,619 : INFO : PROGRESS: at 34.86% examples, 259007 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:28:17,623 : INFO : PROGRESS: at 34.90% examples, 259025 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:28:18,650 : INFO : PROGRESS: at 34.95% examples, 259026 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:28:19,673 : INFO : PROGRESS: at 34.99% examples, 259027 words/s, in_qsize 6, out_qsize 1
2018-01-18 10:28:20,675 : INFO : PROGRESS: at 35.04% examples, 259046 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:28:21,710 : INFO : PROGRESS: at 35.08% examples, 259054 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:28:22,760 : INFO : PROGRESS: at 35.13% examples, 259048 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:28:23,785 : INFO : PROGRESS: at 35.17% examples, 259059 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:28:24,798 : INFO : PROGRESS: at 35.22% examples, 259074 wor

2018-01-18 10:29:37,254 : INFO : PROGRESS: at 38.49% examples, 260214 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:38,291 : INFO : PROGRESS: at 38.54% examples, 260236 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:29:39,335 : INFO : PROGRESS: at 38.58% examples, 260257 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:40,361 : INFO : PROGRESS: at 38.63% examples, 260282 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:41,383 : INFO : PROGRESS: at 38.67% examples, 260282 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:29:42,385 : INFO : PROGRESS: at 38.72% examples, 260306 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:43,393 : INFO : PROGRESS: at 38.77% examples, 260320 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:44,409 : INFO : PROGRESS: at 38.81% examples, 260340 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:45,419 : INFO : PROGRESS: at 38.86% examples, 260362 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:29:46,446 : INFO : PROGRESS: at 38.90% examples, 260369 wor

2018-01-18 10:30:59,366 : INFO : PROGRESS: at 42.15% examples, 260995 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:00,368 : INFO : PROGRESS: at 42.20% examples, 261025 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:01,402 : INFO : PROGRESS: at 42.25% examples, 261054 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:02,414 : INFO : PROGRESS: at 42.30% examples, 261088 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:03,418 : INFO : PROGRESS: at 42.34% examples, 261109 words/s, in_qsize 6, out_qsize 1
2018-01-18 10:31:04,427 : INFO : PROGRESS: at 42.39% examples, 261128 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:05,441 : INFO : PROGRESS: at 42.44% examples, 261138 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:06,443 : INFO : PROGRESS: at 42.48% examples, 261167 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:31:07,447 : INFO : PROGRESS: at 42.53% examples, 261195 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:31:08,453 : INFO : PROGRESS: at 42.58% examples, 261223 wor

2018-01-18 10:32:21,184 : INFO : PROGRESS: at 45.66% examples, 260765 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:32:22,193 : INFO : PROGRESS: at 45.70% examples, 260776 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:23,195 : INFO : PROGRESS: at 45.75% examples, 260788 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:24,214 : INFO : PROGRESS: at 45.79% examples, 260780 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:25,240 : INFO : PROGRESS: at 45.84% examples, 260794 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:26,244 : INFO : PROGRESS: at 45.88% examples, 260806 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:27,277 : INFO : PROGRESS: at 45.93% examples, 260811 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:32:28,280 : INFO : PROGRESS: at 45.98% examples, 260830 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:29,292 : INFO : PROGRESS: at 46.02% examples, 260825 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:32:30,320 : INFO : PROGRESS: at 46.06% examples, 260823 wor

2018-01-18 10:33:42,516 : INFO : PROGRESS: at 49.35% examples, 261711 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:43,537 : INFO : PROGRESS: at 49.39% examples, 261710 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:44,564 : INFO : PROGRESS: at 49.44% examples, 261729 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:45,597 : INFO : PROGRESS: at 49.49% examples, 261746 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:46,613 : INFO : PROGRESS: at 49.53% examples, 261760 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:47,646 : INFO : PROGRESS: at 49.58% examples, 261778 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:33:48,671 : INFO : PROGRESS: at 49.63% examples, 261797 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:49,714 : INFO : PROGRESS: at 49.67% examples, 261813 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:50,729 : INFO : PROGRESS: at 49.72% examples, 261820 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:33:51,746 : INFO : PROGRESS: at 49.77% examples, 261841 wor

2018-01-18 10:35:04,023 : INFO : PROGRESS: at 53.06% examples, 262727 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:05,039 : INFO : PROGRESS: at 53.11% examples, 262745 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:06,043 : INFO : PROGRESS: at 53.16% examples, 262760 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:07,073 : INFO : PROGRESS: at 53.20% examples, 262770 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:08,124 : INFO : PROGRESS: at 53.25% examples, 262781 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:09,136 : INFO : PROGRESS: at 53.30% examples, 262801 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:10,173 : INFO : PROGRESS: at 53.35% examples, 262816 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:11,173 : INFO : PROGRESS: at 53.40% examples, 262838 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:35:12,223 : INFO : PROGRESS: at 53.45% examples, 262843 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:35:13,235 : INFO : PROGRESS: at 53.50% examples, 262863 wor

2018-01-18 10:36:25,554 : INFO : PROGRESS: at 56.83% examples, 263713 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:26,606 : INFO : PROGRESS: at 56.88% examples, 263723 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:36:27,630 : INFO : PROGRESS: at 56.93% examples, 263733 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:28,666 : INFO : PROGRESS: at 56.98% examples, 263746 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:29,670 : INFO : PROGRESS: at 57.02% examples, 263760 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:30,684 : INFO : PROGRESS: at 57.07% examples, 263771 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:31,689 : INFO : PROGRESS: at 57.12% examples, 263778 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:32,708 : INFO : PROGRESS: at 57.16% examples, 263788 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:33,739 : INFO : PROGRESS: at 57.21% examples, 263802 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:36:34,756 : INFO : PROGRESS: at 57.26% examples, 263818 wor

2018-01-18 10:37:47,162 : INFO : PROGRESS: at 60.58% examples, 264616 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:37:48,182 : INFO : PROGRESS: at 60.62% examples, 264630 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:49,189 : INFO : PROGRESS: at 60.67% examples, 264646 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:50,192 : INFO : PROGRESS: at 60.72% examples, 264657 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:51,203 : INFO : PROGRESS: at 60.77% examples, 264673 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:52,210 : INFO : PROGRESS: at 60.81% examples, 264690 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:53,229 : INFO : PROGRESS: at 60.86% examples, 264698 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:37:54,254 : INFO : PROGRESS: at 60.91% examples, 264712 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:55,256 : INFO : PROGRESS: at 60.95% examples, 264712 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:37:56,265 : INFO : PROGRESS: at 61.00% examples, 264722 wor

2018-01-18 10:39:08,787 : INFO : PROGRESS: at 64.39% examples, 265554 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:39:09,824 : INFO : PROGRESS: at 64.44% examples, 265569 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:10,861 : INFO : PROGRESS: at 64.49% examples, 265579 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:11,881 : INFO : PROGRESS: at 64.54% examples, 265591 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:12,893 : INFO : PROGRESS: at 64.58% examples, 265600 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:13,910 : INFO : PROGRESS: at 64.63% examples, 265608 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:14,914 : INFO : PROGRESS: at 64.68% examples, 265613 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:15,919 : INFO : PROGRESS: at 64.72% examples, 265628 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:39:16,930 : INFO : PROGRESS: at 64.77% examples, 265636 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:39:17,950 : INFO : PROGRESS: at 64.82% examples, 265649 wor

2018-01-18 10:40:30,278 : INFO : PROGRESS: at 68.12% examples, 266084 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:31,312 : INFO : PROGRESS: at 68.17% examples, 266094 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:32,315 : INFO : PROGRESS: at 68.22% examples, 266109 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:33,323 : INFO : PROGRESS: at 68.27% examples, 266118 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:34,329 : INFO : PROGRESS: at 68.31% examples, 266128 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:40:35,337 : INFO : PROGRESS: at 68.36% examples, 266137 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:36,369 : INFO : PROGRESS: at 68.41% examples, 266142 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:37,432 : INFO : PROGRESS: at 68.46% examples, 266147 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:38,448 : INFO : PROGRESS: at 68.49% examples, 266118 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:40:39,451 : INFO : PROGRESS: at 68.54% examples, 266123 wor

2018-01-18 10:41:52,080 : INFO : PROGRESS: at 71.86% examples, 266648 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:53,131 : INFO : PROGRESS: at 71.90% examples, 266653 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:54,147 : INFO : PROGRESS: at 71.95% examples, 266663 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:55,188 : INFO : PROGRESS: at 72.00% examples, 266669 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:56,195 : INFO : PROGRESS: at 72.04% examples, 266681 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:57,215 : INFO : PROGRESS: at 72.09% examples, 266691 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:58,246 : INFO : PROGRESS: at 72.14% examples, 266700 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:41:59,255 : INFO : PROGRESS: at 72.19% examples, 266712 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:42:00,273 : INFO : PROGRESS: at 72.24% examples, 266723 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:42:01,285 : INFO : PROGRESS: at 72.28% examples, 266730 wor

2018-01-18 10:43:13,710 : INFO : PROGRESS: at 75.63% examples, 267214 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:43:14,715 : INFO : PROGRESS: at 75.68% examples, 267222 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:15,730 : INFO : PROGRESS: at 75.72% examples, 267229 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:43:16,752 : INFO : PROGRESS: at 75.77% examples, 267239 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:17,759 : INFO : PROGRESS: at 75.82% examples, 267252 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:18,775 : INFO : PROGRESS: at 75.87% examples, 267258 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:19,796 : INFO : PROGRESS: at 75.92% examples, 267268 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:20,811 : INFO : PROGRESS: at 75.96% examples, 267270 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:21,853 : INFO : PROGRESS: at 76.00% examples, 267248 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:43:22,854 : INFO : PROGRESS: at 76.04% examples, 267234 wor

2018-01-18 10:44:35,363 : INFO : PROGRESS: at 79.38% examples, 267743 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:36,370 : INFO : PROGRESS: at 79.42% examples, 267723 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:37,376 : INFO : PROGRESS: at 79.47% examples, 267743 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:38,380 : INFO : PROGRESS: at 79.52% examples, 267746 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:39,439 : INFO : PROGRESS: at 79.57% examples, 267754 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:40,487 : INFO : PROGRESS: at 79.61% examples, 267760 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:41,496 : INFO : PROGRESS: at 79.66% examples, 267757 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:42,530 : INFO : PROGRESS: at 79.71% examples, 267765 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:43,598 : INFO : PROGRESS: at 79.75% examples, 267767 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:44:44,631 : INFO : PROGRESS: at 79.80% examples, 267762 wor

2018-01-18 10:45:56,772 : INFO : PROGRESS: at 83.09% examples, 268048 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:45:57,777 : INFO : PROGRESS: at 83.14% examples, 268050 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:45:58,814 : INFO : PROGRESS: at 83.19% examples, 268057 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:45:59,828 : INFO : PROGRESS: at 83.23% examples, 268067 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:46:00,837 : INFO : PROGRESS: at 83.28% examples, 268074 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:46:01,877 : INFO : PROGRESS: at 83.33% examples, 268080 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:46:02,888 : INFO : PROGRESS: at 83.38% examples, 268091 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:46:03,889 : INFO : PROGRESS: at 83.42% examples, 268095 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:46:04,904 : INFO : PROGRESS: at 83.47% examples, 268105 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:46:05,920 : INFO : PROGRESS: at 83.52% examples, 268106 wor

2018-01-18 10:47:18,604 : INFO : PROGRESS: at 86.73% examples, 268038 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:19,605 : INFO : PROGRESS: at 86.77% examples, 268009 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:47:20,650 : INFO : PROGRESS: at 86.81% examples, 267985 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:21,677 : INFO : PROGRESS: at 86.85% examples, 267985 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:22,695 : INFO : PROGRESS: at 86.90% examples, 267986 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:23,708 : INFO : PROGRESS: at 86.94% examples, 267967 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:24,713 : INFO : PROGRESS: at 86.98% examples, 267958 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:25,731 : INFO : PROGRESS: at 87.02% examples, 267959 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:47:26,772 : INFO : PROGRESS: at 87.07% examples, 267965 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:47:27,800 : INFO : PROGRESS: at 87.12% examples, 267969 wor

2018-01-18 10:48:40,841 : INFO : PROGRESS: at 90.23% examples, 267607 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:48:41,855 : INFO : PROGRESS: at 90.28% examples, 267616 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:48:42,874 : INFO : PROGRESS: at 90.32% examples, 267609 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:48:43,889 : INFO : PROGRESS: at 90.36% examples, 267606 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:48:44,918 : INFO : PROGRESS: at 90.40% examples, 267570 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:48:45,951 : INFO : PROGRESS: at 90.43% examples, 267533 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:48:46,978 : INFO : PROGRESS: at 90.47% examples, 267509 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:48:48,012 : INFO : PROGRESS: at 90.51% examples, 267492 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:48:49,014 : INFO : PROGRESS: at 90.54% examples, 267468 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:48:50,040 : INFO : PROGRESS: at 90.59% examples, 267464 wor

2018-01-18 10:50:02,635 : INFO : PROGRESS: at 93.68% examples, 267126 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:50:03,642 : INFO : PROGRESS: at 93.73% examples, 267120 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:50:04,650 : INFO : PROGRESS: at 93.77% examples, 267127 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:50:05,680 : INFO : PROGRESS: at 93.82% examples, 267130 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:50:06,698 : INFO : PROGRESS: at 93.87% examples, 267139 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:50:07,705 : INFO : PROGRESS: at 93.92% examples, 267145 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:50:08,706 : INFO : PROGRESS: at 93.96% examples, 267152 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:50:09,750 : INFO : PROGRESS: at 94.01% examples, 267150 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:50:10,752 : INFO : PROGRESS: at 94.05% examples, 267145 words/s, in_qsize 6, out_qsize 0
2018-01-18 10:50:11,773 : INFO : PROGRESS: at 94.10% examples, 267153 wor

2018-01-18 10:51:24,666 : INFO : PROGRESS: at 97.42% examples, 267423 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:25,671 : INFO : PROGRESS: at 97.46% examples, 267425 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:26,682 : INFO : PROGRESS: at 97.51% examples, 267416 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:27,723 : INFO : PROGRESS: at 97.55% examples, 267425 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:28,740 : INFO : PROGRESS: at 97.60% examples, 267430 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:29,755 : INFO : PROGRESS: at 97.65% examples, 267435 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:30,791 : INFO : PROGRESS: at 97.70% examples, 267441 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:51:31,802 : INFO : PROGRESS: at 97.74% examples, 267442 words/s, in_qsize 4, out_qsize 1
2018-01-18 10:51:32,812 : INFO : PROGRESS: at 97.79% examples, 267448 words/s, in_qsize 5, out_qsize 0
2018-01-18 10:51:33,831 : INFO : PROGRESS: at 97.83% examples, 267453 wor

In [5]:
num_doc='50M'
sg=0
size=250
window=5
min_count=3
hs=1
negative=0
model_full = gensim.models.word2vec.Word2Vec(corpus, sg=sg, size=size, window=window, min_count=min_count, hs=hs, negative=0)

2018-01-21 19:31:05,777 : INFO : collecting all words and their counts
2018-01-21 19:31:05,779 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2018-01-21 19:31:05,842 : INFO : PROGRESS: at sentence #10000, processed 87251 words, keeping 7064 word types
2018-01-21 19:31:05,888 : INFO : PROGRESS: at sentence #20000, processed 178244 words, keeping 10377 word types
2018-01-21 19:31:05,938 : INFO : PROGRESS: at sentence #30000, processed 268294 words, keeping 12855 word types
2018-01-21 19:31:05,988 : INFO : PROGRESS: at sentence #40000, processed 357470 words, keeping 14950 word types
2018-01-21 19:31:06,035 : INFO : PROGRESS: at sentence #50000, processed 444951 words, keeping 16862 word types
2018-01-21 19:31:06,079 : INFO : PROGRESS: at sentence #60000, processed 531723 words, keeping 18607 word types
2018-01-21 19:31:06,123 : INFO : PROGRESS: at sentence #70000, processed 619805 words, keeping 20183 word types
2018-01-21 19:31:06,173 : INFO : PROGRESS: at se

2018-01-21 19:31:09,614 : INFO : PROGRESS: at sentence #710000, processed 6080504 words, keeping 78585 word types
2018-01-21 19:31:09,676 : INFO : PROGRESS: at sentence #720000, processed 6164893 words, keeping 79263 word types
2018-01-21 19:31:09,743 : INFO : PROGRESS: at sentence #730000, processed 6249699 words, keeping 79909 word types
2018-01-21 19:31:09,799 : INFO : PROGRESS: at sentence #740000, processed 6334495 words, keeping 80596 word types
2018-01-21 19:31:09,859 : INFO : PROGRESS: at sentence #750000, processed 6419771 words, keeping 81260 word types
2018-01-21 19:31:09,911 : INFO : PROGRESS: at sentence #760000, processed 6504866 words, keeping 81904 word types
2018-01-21 19:31:09,959 : INFO : PROGRESS: at sentence #770000, processed 6588967 words, keeping 82578 word types
2018-01-21 19:31:10,008 : INFO : PROGRESS: at sentence #780000, processed 6673573 words, keeping 83240 word types
2018-01-21 19:31:10,065 : INFO : PROGRESS: at sentence #790000, processed 6758292 words,

2018-01-21 19:31:13,318 : INFO : PROGRESS: at sentence #1420000, processed 12068791 words, keeping 121793 word types
2018-01-21 19:31:13,374 : INFO : PROGRESS: at sentence #1430000, processed 12151939 words, keeping 122317 word types
2018-01-21 19:31:13,434 : INFO : PROGRESS: at sentence #1440000, processed 12235349 words, keeping 122851 word types
2018-01-21 19:31:13,485 : INFO : PROGRESS: at sentence #1450000, processed 12318336 words, keeping 123400 word types
2018-01-21 19:31:13,534 : INFO : PROGRESS: at sentence #1460000, processed 12402604 words, keeping 123917 word types
2018-01-21 19:31:13,577 : INFO : PROGRESS: at sentence #1470000, processed 12484821 words, keeping 124491 word types
2018-01-21 19:31:13,631 : INFO : PROGRESS: at sentence #1480000, processed 12568217 words, keeping 125043 word types
2018-01-21 19:31:13,680 : INFO : PROGRESS: at sentence #1490000, processed 12651007 words, keeping 125578 word types
2018-01-21 19:31:13,729 : INFO : PROGRESS: at sentence #1500000,

2018-01-21 19:31:17,561 : INFO : PROGRESS: at sentence #2130000, processed 18085536 words, keeping 158685 word types
2018-01-21 19:31:17,616 : INFO : PROGRESS: at sentence #2140000, processed 18169842 words, keeping 159161 word types
2018-01-21 19:31:17,668 : INFO : PROGRESS: at sentence #2150000, processed 18255896 words, keeping 159695 word types
2018-01-21 19:31:17,720 : INFO : PROGRESS: at sentence #2160000, processed 18341502 words, keeping 160198 word types
2018-01-21 19:31:17,775 : INFO : PROGRESS: at sentence #2170000, processed 18427481 words, keeping 160706 word types
2018-01-21 19:31:17,825 : INFO : PROGRESS: at sentence #2180000, processed 18512852 words, keeping 161238 word types
2018-01-21 19:31:17,872 : INFO : PROGRESS: at sentence #2190000, processed 18598625 words, keeping 161697 word types
2018-01-21 19:31:17,924 : INFO : PROGRESS: at sentence #2200000, processed 18683888 words, keeping 162198 word types
2018-01-21 19:31:17,977 : INFO : PROGRESS: at sentence #2210000,

2018-01-21 19:31:21,759 : INFO : PROGRESS: at sentence #2840000, processed 24158500 words, keeping 192712 word types
2018-01-21 19:31:21,828 : INFO : PROGRESS: at sentence #2850000, processed 24243117 words, keeping 193228 word types
2018-01-21 19:31:21,884 : INFO : PROGRESS: at sentence #2860000, processed 24328944 words, keeping 193725 word types
2018-01-21 19:31:21,943 : INFO : PROGRESS: at sentence #2870000, processed 24414681 words, keeping 194189 word types
2018-01-21 19:31:22,005 : INFO : PROGRESS: at sentence #2880000, processed 24499709 words, keeping 194683 word types
2018-01-21 19:31:22,076 : INFO : PROGRESS: at sentence #2890000, processed 24584538 words, keeping 195148 word types
2018-01-21 19:31:22,148 : INFO : PROGRESS: at sentence #2900000, processed 24670374 words, keeping 195561 word types
2018-01-21 19:31:22,216 : INFO : PROGRESS: at sentence #2910000, processed 24754918 words, keeping 195995 word types
2018-01-21 19:31:22,287 : INFO : PROGRESS: at sentence #2920000,

2018-01-21 19:31:26,098 : INFO : PROGRESS: at sentence #3550000, processed 30224236 words, keeping 213830 word types
2018-01-21 19:31:26,151 : INFO : PROGRESS: at sentence #3560000, processed 30307811 words, keeping 213830 word types
2018-01-21 19:31:26,211 : INFO : PROGRESS: at sentence #3570000, processed 30392288 words, keeping 213830 word types
2018-01-21 19:31:26,262 : INFO : PROGRESS: at sentence #3580000, processed 30476806 words, keeping 213830 word types
2018-01-21 19:31:26,312 : INFO : PROGRESS: at sentence #3590000, processed 30561519 words, keeping 213830 word types
2018-01-21 19:31:26,362 : INFO : PROGRESS: at sentence #3600000, processed 30646247 words, keeping 213830 word types
2018-01-21 19:31:26,410 : INFO : PROGRESS: at sentence #3610000, processed 30730479 words, keeping 213830 word types
2018-01-21 19:31:26,472 : INFO : PROGRESS: at sentence #3620000, processed 30814630 words, keeping 213830 word types
2018-01-21 19:31:26,528 : INFO : PROGRESS: at sentence #3630000,

2018-01-21 19:31:30,070 : INFO : PROGRESS: at sentence #4260000, processed 36263046 words, keeping 213830 word types
2018-01-21 19:31:30,125 : INFO : PROGRESS: at sentence #4270000, processed 36348174 words, keeping 213830 word types
2018-01-21 19:31:30,175 : INFO : PROGRESS: at sentence #4280000, processed 36432178 words, keeping 213830 word types
2018-01-21 19:31:30,224 : INFO : PROGRESS: at sentence #4290000, processed 36517345 words, keeping 213830 word types
2018-01-21 19:31:30,279 : INFO : PROGRESS: at sentence #4300000, processed 36601491 words, keeping 213830 word types
2018-01-21 19:31:30,331 : INFO : PROGRESS: at sentence #4310000, processed 36685749 words, keeping 213830 word types
2018-01-21 19:31:30,377 : INFO : PROGRESS: at sentence #4320000, processed 36770312 words, keeping 213830 word types
2018-01-21 19:31:30,438 : INFO : PROGRESS: at sentence #4330000, processed 36854142 words, keeping 213830 word types
2018-01-21 19:31:30,493 : INFO : PROGRESS: at sentence #4340000,

2018-01-21 19:31:33,721 : INFO : PROGRESS: at sentence #4970000, processed 42224598 words, keeping 213830 word types
2018-01-21 19:31:33,770 : INFO : PROGRESS: at sentence #4980000, processed 42308406 words, keeping 213830 word types
2018-01-21 19:31:33,826 : INFO : PROGRESS: at sentence #4990000, processed 42392760 words, keeping 213830 word types
2018-01-21 19:31:33,873 : INFO : PROGRESS: at sentence #5000000, processed 42477875 words, keeping 213830 word types
2018-01-21 19:31:33,928 : INFO : PROGRESS: at sentence #5010000, processed 42562960 words, keeping 213830 word types
2018-01-21 19:31:33,980 : INFO : PROGRESS: at sentence #5020000, processed 42647763 words, keeping 213830 word types
2018-01-21 19:31:34,031 : INFO : PROGRESS: at sentence #5030000, processed 42731705 words, keeping 213830 word types
2018-01-21 19:31:34,088 : INFO : PROGRESS: at sentence #5040000, processed 42816214 words, keeping 213830 word types
2018-01-21 19:31:34,144 : INFO : PROGRESS: at sentence #5050000,

2018-01-21 19:31:37,925 : INFO : PROGRESS: at sentence #5680000, processed 48288767 words, keeping 213830 word types
2018-01-21 19:31:37,986 : INFO : PROGRESS: at sentence #5690000, processed 48374585 words, keeping 213830 word types
2018-01-21 19:31:38,038 : INFO : PROGRESS: at sentence #5700000, processed 48460548 words, keeping 213830 word types
2018-01-21 19:31:38,093 : INFO : PROGRESS: at sentence #5710000, processed 48546797 words, keeping 213830 word types
2018-01-21 19:31:38,143 : INFO : PROGRESS: at sentence #5720000, processed 48632019 words, keeping 213830 word types
2018-01-21 19:31:38,193 : INFO : PROGRESS: at sentence #5730000, processed 48717477 words, keeping 213830 word types
2018-01-21 19:31:38,245 : INFO : PROGRESS: at sentence #5740000, processed 48803140 words, keeping 213830 word types
2018-01-21 19:31:38,313 : INFO : PROGRESS: at sentence #5750000, processed 48887203 words, keeping 213830 word types
2018-01-21 19:31:38,384 : INFO : PROGRESS: at sentence #5760000,

2018-01-21 19:31:42,208 : INFO : PROGRESS: at sentence #6390000, processed 54341337 words, keeping 213830 word types
2018-01-21 19:31:42,273 : INFO : PROGRESS: at sentence #6400000, processed 54425178 words, keeping 213830 word types
2018-01-21 19:31:42,334 : INFO : PROGRESS: at sentence #6410000, processed 54509664 words, keeping 213830 word types
2018-01-21 19:31:42,396 : INFO : PROGRESS: at sentence #6420000, processed 54594298 words, keeping 213830 word types
2018-01-21 19:31:42,457 : INFO : PROGRESS: at sentence #6430000, processed 54678323 words, keeping 213830 word types
2018-01-21 19:31:42,526 : INFO : PROGRESS: at sentence #6440000, processed 54763432 words, keeping 213830 word types
2018-01-21 19:31:42,584 : INFO : PROGRESS: at sentence #6450000, processed 54848398 words, keeping 213830 word types
2018-01-21 19:31:42,647 : INFO : PROGRESS: at sentence #6460000, processed 54932994 words, keeping 213830 word types
2018-01-21 19:31:42,714 : INFO : PROGRESS: at sentence #6470000,

2018-01-21 19:31:46,654 : INFO : PROGRESS: at sentence #7100000, processed 60360943 words, keeping 236221 word types
2018-01-21 19:31:46,717 : INFO : PROGRESS: at sentence #7110000, processed 60446982 words, keeping 236710 word types
2018-01-21 19:31:46,781 : INFO : PROGRESS: at sentence #7120000, processed 60531800 words, keeping 237152 word types
2018-01-21 19:31:46,849 : INFO : PROGRESS: at sentence #7130000, processed 60616045 words, keeping 237577 word types
2018-01-21 19:31:46,932 : INFO : PROGRESS: at sentence #7140000, processed 60700686 words, keeping 238027 word types
2018-01-21 19:31:47,000 : INFO : PROGRESS: at sentence #7150000, processed 60787138 words, keeping 238489 word types
2018-01-21 19:31:47,075 : INFO : PROGRESS: at sentence #7160000, processed 60871234 words, keeping 238849 word types
2018-01-21 19:31:47,136 : INFO : PROGRESS: at sentence #7170000, processed 60956077 words, keeping 239318 word types
2018-01-21 19:31:47,198 : INFO : PROGRESS: at sentence #7180000,

2018-01-21 19:31:51,099 : INFO : PROGRESS: at sentence #7810000, processed 66393369 words, keeping 266604 word types
2018-01-21 19:31:51,154 : INFO : PROGRESS: at sentence #7820000, processed 66478370 words, keeping 267027 word types
2018-01-21 19:31:51,227 : INFO : PROGRESS: at sentence #7830000, processed 66562919 words, keeping 267462 word types
2018-01-21 19:31:51,280 : INFO : PROGRESS: at sentence #7840000, processed 66646796 words, keeping 267853 word types
2018-01-21 19:31:51,330 : INFO : PROGRESS: at sentence #7850000, processed 66732168 words, keeping 268288 word types
2018-01-21 19:31:51,389 : INFO : PROGRESS: at sentence #7860000, processed 66819316 words, keeping 268685 word types
2018-01-21 19:31:51,443 : INFO : PROGRESS: at sentence #7870000, processed 66906399 words, keeping 269146 word types
2018-01-21 19:31:51,495 : INFO : PROGRESS: at sentence #7880000, processed 66992310 words, keeping 269597 word types
2018-01-21 19:31:51,553 : INFO : PROGRESS: at sentence #7890000,

2018-01-21 19:31:55,649 : INFO : PROGRESS: at sentence #8520000, processed 72485408 words, keeping 295728 word types
2018-01-21 19:31:55,716 : INFO : PROGRESS: at sentence #8530000, processed 72572066 words, keeping 296134 word types
2018-01-21 19:31:55,786 : INFO : PROGRESS: at sentence #8540000, processed 72657369 words, keeping 296559 word types
2018-01-21 19:31:55,855 : INFO : PROGRESS: at sentence #8550000, processed 72742327 words, keeping 296948 word types
2018-01-21 19:31:55,922 : INFO : PROGRESS: at sentence #8560000, processed 72828384 words, keeping 297407 word types
2018-01-21 19:31:55,990 : INFO : PROGRESS: at sentence #8570000, processed 72914920 words, keeping 297796 word types
2018-01-21 19:31:56,050 : INFO : PROGRESS: at sentence #8580000, processed 73000332 words, keeping 298202 word types
2018-01-21 19:31:56,118 : INFO : PROGRESS: at sentence #8590000, processed 73086749 words, keeping 298657 word types
2018-01-21 19:31:56,180 : INFO : PROGRESS: at sentence #8600000,

2018-01-21 19:32:00,799 : INFO : PROGRESS: at sentence #9230000, processed 78593188 words, keeping 324760 word types
2018-01-21 19:32:00,857 : INFO : PROGRESS: at sentence #9240000, processed 78679014 words, keeping 325139 word types
2018-01-21 19:32:00,926 : INFO : PROGRESS: at sentence #9250000, processed 78764841 words, keeping 325558 word types
2018-01-21 19:32:01,000 : INFO : PROGRESS: at sentence #9260000, processed 78851348 words, keeping 325905 word types
2018-01-21 19:32:01,067 : INFO : PROGRESS: at sentence #9270000, processed 78938524 words, keeping 326315 word types
2018-01-21 19:32:01,131 : INFO : PROGRESS: at sentence #9280000, processed 79024383 words, keeping 326711 word types
2018-01-21 19:32:01,226 : INFO : PROGRESS: at sentence #9290000, processed 79110382 words, keeping 327107 word types
2018-01-21 19:32:01,295 : INFO : PROGRESS: at sentence #9300000, processed 79196535 words, keeping 327482 word types
2018-01-21 19:32:01,353 : INFO : PROGRESS: at sentence #9310000,

2018-01-21 19:32:05,104 : INFO : PROGRESS: at sentence #9940000, processed 84712758 words, keeping 352054 word types
2018-01-21 19:32:05,191 : INFO : PROGRESS: at sentence #9950000, processed 84799873 words, keeping 352438 word types
2018-01-21 19:32:05,250 : INFO : PROGRESS: at sentence #9960000, processed 84884837 words, keeping 352793 word types
2018-01-21 19:32:05,310 : INFO : PROGRESS: at sentence #9970000, processed 84969911 words, keeping 353213 word types
2018-01-21 19:32:05,362 : INFO : PROGRESS: at sentence #9980000, processed 85055741 words, keeping 353587 word types
2018-01-21 19:32:05,419 : INFO : PROGRESS: at sentence #9990000, processed 85142305 words, keeping 353974 word types
2018-01-21 19:32:05,469 : INFO : PROGRESS: at sentence #10000000, processed 85228942 words, keeping 354354 word types
2018-01-21 19:32:05,524 : INFO : PROGRESS: at sentence #10010000, processed 85313648 words, keeping 354726 word types
2018-01-21 19:32:05,575 : INFO : PROGRESS: at sentence #100200

2018-01-21 19:32:09,036 : INFO : PROGRESS: at sentence #10640000, processed 90760271 words, keeping 371099 word types
2018-01-21 19:32:09,087 : INFO : PROGRESS: at sentence #10650000, processed 90845285 words, keeping 371099 word types
2018-01-21 19:32:09,137 : INFO : PROGRESS: at sentence #10660000, processed 90930504 words, keeping 371099 word types
2018-01-21 19:32:09,188 : INFO : PROGRESS: at sentence #10670000, processed 91015328 words, keeping 371099 word types
2018-01-21 19:32:09,242 : INFO : PROGRESS: at sentence #10680000, processed 91100569 words, keeping 371099 word types
2018-01-21 19:32:09,295 : INFO : PROGRESS: at sentence #10690000, processed 91183820 words, keeping 371099 word types
2018-01-21 19:32:09,345 : INFO : PROGRESS: at sentence #10700000, processed 91268326 words, keeping 371099 word types
2018-01-21 19:32:09,397 : INFO : PROGRESS: at sentence #10710000, processed 91352766 words, keeping 371099 word types
2018-01-21 19:32:09,448 : INFO : PROGRESS: at sentence #

2018-01-21 19:32:12,831 : INFO : PROGRESS: at sentence #11340000, processed 96713260 words, keeping 371099 word types
2018-01-21 19:32:12,915 : INFO : PROGRESS: at sentence #11350000, processed 96798025 words, keeping 371099 word types
2018-01-21 19:32:13,000 : INFO : PROGRESS: at sentence #11360000, processed 96882943 words, keeping 371099 word types
2018-01-21 19:32:13,059 : INFO : PROGRESS: at sentence #11370000, processed 96968088 words, keeping 371099 word types
2018-01-21 19:32:13,118 : INFO : PROGRESS: at sentence #11380000, processed 97052860 words, keeping 371099 word types
2018-01-21 19:32:13,179 : INFO : PROGRESS: at sentence #11390000, processed 97138907 words, keeping 371099 word types
2018-01-21 19:32:13,238 : INFO : PROGRESS: at sentence #11400000, processed 97223794 words, keeping 371099 word types
2018-01-21 19:32:13,311 : INFO : PROGRESS: at sentence #11410000, processed 97308227 words, keeping 371099 word types
2018-01-21 19:32:13,372 : INFO : PROGRESS: at sentence #

2018-01-21 19:32:16,940 : INFO : PROGRESS: at sentence #12040000, processed 102594249 words, keeping 371099 word types
2018-01-21 19:32:17,001 : INFO : PROGRESS: at sentence #12050000, processed 102678092 words, keeping 371099 word types
2018-01-21 19:32:17,057 : INFO : PROGRESS: at sentence #12060000, processed 102762708 words, keeping 371099 word types
2018-01-21 19:32:17,116 : INFO : PROGRESS: at sentence #12070000, processed 102846785 words, keeping 371099 word types
2018-01-21 19:32:17,175 : INFO : PROGRESS: at sentence #12080000, processed 102931472 words, keeping 371099 word types
2018-01-21 19:32:17,237 : INFO : PROGRESS: at sentence #12090000, processed 103015647 words, keeping 371099 word types
2018-01-21 19:32:17,300 : INFO : PROGRESS: at sentence #12100000, processed 103100601 words, keeping 371099 word types
2018-01-21 19:32:17,357 : INFO : PROGRESS: at sentence #12110000, processed 103184742 words, keeping 371099 word types
2018-01-21 19:32:17,416 : INFO : PROGRESS: at se

2018-01-21 19:32:21,302 : INFO : PROGRESS: at sentence #12730000, processed 108475543 words, keeping 371099 word types
2018-01-21 19:32:21,366 : INFO : PROGRESS: at sentence #12740000, processed 108561307 words, keeping 371099 word types
2018-01-21 19:32:21,433 : INFO : PROGRESS: at sentence #12750000, processed 108646520 words, keeping 371099 word types
2018-01-21 19:32:21,516 : INFO : PROGRESS: at sentence #12760000, processed 108733560 words, keeping 371099 word types
2018-01-21 19:32:21,581 : INFO : PROGRESS: at sentence #12770000, processed 108818406 words, keeping 371099 word types
2018-01-21 19:32:21,658 : INFO : PROGRESS: at sentence #12780000, processed 108904930 words, keeping 371099 word types
2018-01-21 19:32:21,773 : INFO : PROGRESS: at sentence #12790000, processed 108991479 words, keeping 371099 word types
2018-01-21 19:32:21,868 : INFO : PROGRESS: at sentence #12800000, processed 109077323 words, keeping 371099 word types
2018-01-21 19:32:21,973 : INFO : PROGRESS: at se

2018-01-21 19:32:26,038 : INFO : PROGRESS: at sentence #13420000, processed 114366744 words, keeping 371099 word types
2018-01-21 19:32:26,109 : INFO : PROGRESS: at sentence #13430000, processed 114451761 words, keeping 371099 word types
2018-01-21 19:32:26,164 : INFO : PROGRESS: at sentence #13440000, processed 114536539 words, keeping 371099 word types
2018-01-21 19:32:26,254 : INFO : PROGRESS: at sentence #13450000, processed 114621977 words, keeping 371099 word types
2018-01-21 19:32:26,311 : INFO : PROGRESS: at sentence #13460000, processed 114706458 words, keeping 371099 word types
2018-01-21 19:32:26,374 : INFO : PROGRESS: at sentence #13470000, processed 114791040 words, keeping 371099 word types
2018-01-21 19:32:26,429 : INFO : PROGRESS: at sentence #13480000, processed 114876340 words, keeping 371099 word types
2018-01-21 19:32:26,487 : INFO : PROGRESS: at sentence #13490000, processed 114961869 words, keeping 371099 word types
2018-01-21 19:32:26,547 : INFO : PROGRESS: at se

2018-01-21 19:32:30,393 : INFO : PROGRESS: at sentence #14110000, processed 120222529 words, keeping 371099 word types
2018-01-21 19:32:30,461 : INFO : PROGRESS: at sentence #14120000, processed 120306361 words, keeping 371099 word types
2018-01-21 19:32:30,520 : INFO : PROGRESS: at sentence #14130000, processed 120391479 words, keeping 371099 word types
2018-01-21 19:32:30,582 : INFO : PROGRESS: at sentence #14140000, processed 120476501 words, keeping 371099 word types
2018-01-21 19:32:30,643 : INFO : PROGRESS: at sentence #14150000, processed 120560896 words, keeping 371099 word types
2018-01-21 19:32:30,700 : INFO : PROGRESS: at sentence #14160000, processed 120645018 words, keeping 371099 word types
2018-01-21 19:32:30,793 : INFO : PROGRESS: at sentence #14170000, processed 120729626 words, keeping 371099 word types
2018-01-21 19:32:30,858 : INFO : PROGRESS: at sentence #14180000, processed 120814949 words, keeping 371099 word types
2018-01-21 19:32:30,918 : INFO : PROGRESS: at se

2018-01-21 19:32:34,552 : INFO : PROGRESS: at sentence #14800000, processed 126080861 words, keeping 371099 word types
2018-01-21 19:32:34,606 : INFO : PROGRESS: at sentence #14810000, processed 126165528 words, keeping 371099 word types
2018-01-21 19:32:34,687 : INFO : PROGRESS: at sentence #14820000, processed 126250794 words, keeping 371099 word types
2018-01-21 19:32:34,748 : INFO : PROGRESS: at sentence #14830000, processed 126336330 words, keeping 371099 word types
2018-01-21 19:32:34,826 : INFO : PROGRESS: at sentence #14840000, processed 126421328 words, keeping 371099 word types
2018-01-21 19:32:34,895 : INFO : PROGRESS: at sentence #14850000, processed 126505585 words, keeping 371099 word types
2018-01-21 19:32:34,954 : INFO : PROGRESS: at sentence #14860000, processed 126590870 words, keeping 371099 word types
2018-01-21 19:32:35,018 : INFO : PROGRESS: at sentence #14870000, processed 126675640 words, keeping 371099 word types
2018-01-21 19:32:35,086 : INFO : PROGRESS: at se

2018-01-21 19:32:38,809 : INFO : PROGRESS: at sentence #15490000, processed 131986624 words, keeping 371099 word types
2018-01-21 19:32:38,871 : INFO : PROGRESS: at sentence #15500000, processed 132072845 words, keeping 371099 word types
2018-01-21 19:32:38,929 : INFO : PROGRESS: at sentence #15510000, processed 132157895 words, keeping 371099 word types
2018-01-21 19:32:38,985 : INFO : PROGRESS: at sentence #15520000, processed 132243066 words, keeping 371099 word types
2018-01-21 19:32:39,045 : INFO : PROGRESS: at sentence #15530000, processed 132329350 words, keeping 371099 word types
2018-01-21 19:32:39,107 : INFO : PROGRESS: at sentence #15540000, processed 132415365 words, keeping 371099 word types
2018-01-21 19:32:39,162 : INFO : PROGRESS: at sentence #15550000, processed 132500795 words, keeping 371099 word types
2018-01-21 19:32:39,218 : INFO : PROGRESS: at sentence #15560000, processed 132586278 words, keeping 371099 word types
2018-01-21 19:32:39,272 : INFO : PROGRESS: at se

2018-01-21 19:32:43,231 : INFO : PROGRESS: at sentence #16180000, processed 137920115 words, keeping 371099 word types
2018-01-21 19:32:43,338 : INFO : PROGRESS: at sentence #16190000, processed 138006408 words, keeping 371099 word types
2018-01-21 19:32:43,443 : INFO : PROGRESS: at sentence #16200000, processed 138092273 words, keeping 371099 word types
2018-01-21 19:32:43,541 : INFO : PROGRESS: at sentence #16210000, processed 138178103 words, keeping 371099 word types
2018-01-21 19:32:43,620 : INFO : PROGRESS: at sentence #16220000, processed 138264726 words, keeping 371099 word types
2018-01-21 19:32:43,688 : INFO : PROGRESS: at sentence #16230000, processed 138351422 words, keeping 371099 word types
2018-01-21 19:32:43,760 : INFO : PROGRESS: at sentence #16240000, processed 138437802 words, keeping 371099 word types
2018-01-21 19:32:43,830 : INFO : PROGRESS: at sentence #16250000, processed 138522636 words, keeping 371099 word types
2018-01-21 19:32:43,900 : INFO : PROGRESS: at se

2018-01-21 19:32:48,711 : INFO : PROGRESS: at sentence #16870000, processed 143865887 words, keeping 371099 word types
2018-01-21 19:32:48,791 : INFO : PROGRESS: at sentence #16880000, processed 143952815 words, keeping 371099 word types
2018-01-21 19:32:48,849 : INFO : PROGRESS: at sentence #16890000, processed 144038733 words, keeping 371099 word types
2018-01-21 19:32:48,908 : INFO : PROGRESS: at sentence #16900000, processed 144124181 words, keeping 371099 word types
2018-01-21 19:32:48,974 : INFO : PROGRESS: at sentence #16910000, processed 144210325 words, keeping 371099 word types
2018-01-21 19:32:49,028 : INFO : PROGRESS: at sentence #16920000, processed 144297825 words, keeping 371099 word types
2018-01-21 19:32:49,113 : INFO : PROGRESS: at sentence #16930000, processed 144384643 words, keeping 371099 word types
2018-01-21 19:32:49,175 : INFO : PROGRESS: at sentence #16940000, processed 144470736 words, keeping 371099 word types
2018-01-21 19:32:49,235 : INFO : PROGRESS: at se

2018-01-21 19:32:53,003 : INFO : PROGRESS: at sentence #17560000, processed 149810434 words, keeping 371099 word types
2018-01-21 19:32:53,036 : INFO : collected 371099 word types from a corpus of 149855462 raw words and 17565207 sentences
2018-01-21 19:32:53,043 : INFO : Loading a fresh vocabulary
2018-01-21 19:32:55,403 : INFO : min_count=3 retains 244261 unique words (65% of original 371099, drops 126838)
2018-01-21 19:32:55,405 : INFO : min_count=3 leaves 149601786 word corpus (99% of original 149855462, drops 253676)
2018-01-21 19:32:56,417 : INFO : deleting the raw counts dictionary of 371099 items
2018-01-21 19:32:56,493 : INFO : sample=0.001 downsamples 45 most-common words
2018-01-21 19:32:56,498 : INFO : downsampling leaves estimated 120858757 word corpus (80.8% of prior 149601786)
2018-01-21 19:32:56,500 : INFO : estimated required memory for 244261 words and 250 dimensions: 659504700 bytes
2018-01-21 19:32:56,974 : INFO : constructing a huffman tree from 244261 words
2018-0

2018-01-21 19:34:19,494 : INFO : PROGRESS: at 6.42% examples, 554188 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:20,500 : INFO : PROGRESS: at 6.50% examples, 553615 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:21,519 : INFO : PROGRESS: at 6.59% examples, 553635 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:22,523 : INFO : PROGRESS: at 6.68% examples, 553541 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:23,526 : INFO : PROGRESS: at 6.78% examples, 553470 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:24,532 : INFO : PROGRESS: at 6.87% examples, 553372 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:25,540 : INFO : PROGRESS: at 6.96% examples, 553470 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:34:26,543 : INFO : PROGRESS: at 7.05% examples, 553298 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:34:27,551 : INFO : PROGRESS: at 7.14% examples, 552890 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:34:28,561 : INFO : PROGRESS: at 7.22% examples, 552370 words/s, in_q

2018-01-21 19:35:40,631 : INFO : PROGRESS: at 12.18% examples, 488116 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:35:41,638 : INFO : PROGRESS: at 12.26% examples, 487795 words/s, in_qsize 6, out_qsize 1
2018-01-21 19:35:42,652 : INFO : PROGRESS: at 12.35% examples, 488034 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:35:43,658 : INFO : PROGRESS: at 12.43% examples, 488079 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:35:44,661 : INFO : PROGRESS: at 12.51% examples, 488023 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:35:45,679 : INFO : PROGRESS: at 12.61% examples, 488546 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:35:46,709 : INFO : PROGRESS: at 12.69% examples, 488464 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:35:47,721 : INFO : PROGRESS: at 12.77% examples, 488341 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:35:48,728 : INFO : PROGRESS: at 12.85% examples, 488282 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:35:49,734 : INFO : PROGRESS: at 12.93% examples, 488178 wor

2018-01-21 19:37:01,625 : INFO : PROGRESS: at 18.55% examples, 483213 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:02,634 : INFO : PROGRESS: at 18.63% examples, 483300 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:37:03,657 : INFO : PROGRESS: at 18.71% examples, 483323 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:04,673 : INFO : PROGRESS: at 18.78% examples, 483054 words/s, in_qsize 5, out_qsize 1
2018-01-21 19:37:05,677 : INFO : PROGRESS: at 18.87% examples, 483257 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:06,688 : INFO : PROGRESS: at 18.95% examples, 483270 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:07,689 : INFO : PROGRESS: at 19.03% examples, 483337 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:08,699 : INFO : PROGRESS: at 19.12% examples, 483452 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:09,706 : INFO : PROGRESS: at 19.20% examples, 483637 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:37:10,728 : INFO : PROGRESS: at 19.27% examples, 483290 wor

2018-01-21 19:38:22,899 : INFO : PROGRESS: at 24.57% examples, 474056 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:23,900 : INFO : PROGRESS: at 24.66% examples, 474159 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:24,924 : INFO : PROGRESS: at 24.75% examples, 474404 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:25,935 : INFO : PROGRESS: at 24.84% examples, 474670 words/s, in_qsize 5, out_qsize 2
2018-01-21 19:38:26,952 : INFO : PROGRESS: at 24.94% examples, 475026 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:27,957 : INFO : PROGRESS: at 25.04% examples, 475294 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:28,959 : INFO : PROGRESS: at 25.13% examples, 475523 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:29,969 : INFO : PROGRESS: at 25.21% examples, 475585 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:38:31,000 : INFO : PROGRESS: at 25.30% examples, 475642 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:38:32,001 : INFO : PROGRESS: at 25.36% examples, 475216 wor

2018-01-21 19:39:43,919 : INFO : PROGRESS: at 30.97% examples, 474690 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:44,924 : INFO : PROGRESS: at 31.05% examples, 474747 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:45,931 : INFO : PROGRESS: at 31.13% examples, 474801 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:46,932 : INFO : PROGRESS: at 31.22% examples, 474902 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:47,933 : INFO : PROGRESS: at 31.31% examples, 475064 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:39:48,947 : INFO : PROGRESS: at 31.39% examples, 475109 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:49,953 : INFO : PROGRESS: at 31.48% examples, 475243 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:50,958 : INFO : PROGRESS: at 31.56% examples, 475299 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:39:51,981 : INFO : PROGRESS: at 31.64% examples, 475373 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:39:52,989 : INFO : PROGRESS: at 31.73% examples, 475446 wor

2018-01-21 19:41:05,123 : INFO : PROGRESS: at 37.57% examples, 477407 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:06,148 : INFO : PROGRESS: at 37.66% examples, 477548 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:07,150 : INFO : PROGRESS: at 37.75% examples, 477644 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:41:08,156 : INFO : PROGRESS: at 37.84% examples, 477821 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:09,173 : INFO : PROGRESS: at 37.93% examples, 477985 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:10,173 : INFO : PROGRESS: at 38.02% examples, 478032 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:11,186 : INFO : PROGRESS: at 38.10% examples, 478149 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:12,188 : INFO : PROGRESS: at 38.19% examples, 478209 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:13,195 : INFO : PROGRESS: at 38.28% examples, 478330 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:41:14,208 : INFO : PROGRESS: at 38.36% examples, 478362 wor

2018-01-21 19:42:26,567 : INFO : PROGRESS: at 44.14% examples, 479001 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:42:27,570 : INFO : PROGRESS: at 44.23% examples, 479107 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:42:28,577 : INFO : PROGRESS: at 44.32% examples, 479149 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:42:29,578 : INFO : PROGRESS: at 44.38% examples, 478937 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:42:30,594 : INFO : PROGRESS: at 44.47% examples, 479043 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:42:31,598 : INFO : PROGRESS: at 44.55% examples, 479046 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:42:32,640 : INFO : PROGRESS: at 44.63% examples, 479032 words/s, in_qsize 5, out_qsize 1
2018-01-21 19:42:33,657 : INFO : PROGRESS: at 44.69% examples, 478866 words/s, in_qsize 5, out_qsize 1
2018-01-21 19:42:34,665 : INFO : PROGRESS: at 44.76% examples, 478738 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:42:35,683 : INFO : PROGRESS: at 44.82% examples, 478489 wor

2018-01-21 19:43:47,772 : INFO : PROGRESS: at 50.01% examples, 473480 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:48,778 : INFO : PROGRESS: at 50.09% examples, 473542 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:49,778 : INFO : PROGRESS: at 50.18% examples, 473670 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:50,784 : INFO : PROGRESS: at 50.27% examples, 473758 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:51,815 : INFO : PROGRESS: at 50.34% examples, 473649 words/s, in_qsize 5, out_qsize 2
2018-01-21 19:43:52,829 : INFO : PROGRESS: at 50.41% examples, 473553 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:53,831 : INFO : PROGRESS: at 50.49% examples, 473542 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:54,842 : INFO : PROGRESS: at 50.58% examples, 473675 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:43:55,857 : INFO : PROGRESS: at 50.66% examples, 473693 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:43:56,881 : INFO : PROGRESS: at 50.74% examples, 473679 wor

2018-01-21 19:45:08,672 : INFO : PROGRESS: at 56.94% examples, 478452 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:09,683 : INFO : PROGRESS: at 57.04% examples, 478588 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:10,697 : INFO : PROGRESS: at 57.11% examples, 478575 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:45:11,708 : INFO : PROGRESS: at 57.20% examples, 478641 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:12,736 : INFO : PROGRESS: at 57.30% examples, 478787 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:13,752 : INFO : PROGRESS: at 57.39% examples, 478905 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:14,754 : INFO : PROGRESS: at 57.48% examples, 479000 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:15,760 : INFO : PROGRESS: at 57.58% examples, 479125 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:16,761 : INFO : PROGRESS: at 57.67% examples, 479198 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:45:17,775 : INFO : PROGRESS: at 57.75% examples, 479228 wor

2018-01-21 19:46:29,543 : INFO : PROGRESS: at 63.94% examples, 483057 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:46:30,560 : INFO : PROGRESS: at 64.03% examples, 483164 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:46:31,580 : INFO : PROGRESS: at 64.12% examples, 483191 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:46:32,688 : INFO : PROGRESS: at 64.16% examples, 482825 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:46:33,702 : INFO : PROGRESS: at 64.23% examples, 482697 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:46:34,723 : INFO : PROGRESS: at 64.31% examples, 482723 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:46:35,735 : INFO : PROGRESS: at 64.40% examples, 482824 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:46:36,736 : INFO : PROGRESS: at 64.50% examples, 482933 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:46:37,738 : INFO : PROGRESS: at 64.59% examples, 483040 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:46:38,758 : INFO : PROGRESS: at 64.69% examples, 483158 wor

2018-01-21 19:47:50,440 : INFO : PROGRESS: at 71.19% examples, 488458 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:47:51,457 : INFO : PROGRESS: at 71.29% examples, 488543 words/s, in_qsize 6, out_qsize 1
2018-01-21 19:47:52,458 : INFO : PROGRESS: at 71.38% examples, 488655 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:47:53,479 : INFO : PROGRESS: at 71.47% examples, 488701 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:47:54,495 : INFO : PROGRESS: at 71.56% examples, 488761 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:47:55,497 : INFO : PROGRESS: at 71.65% examples, 488818 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:47:56,497 : INFO : PROGRESS: at 71.74% examples, 488877 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:47:57,508 : INFO : PROGRESS: at 71.82% examples, 488874 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:47:58,512 : INFO : PROGRESS: at 71.91% examples, 488937 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:47:59,520 : INFO : PROGRESS: at 72.00% examples, 489029 wor

2018-01-21 19:49:11,258 : INFO : PROGRESS: at 78.50% examples, 493338 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:12,267 : INFO : PROGRESS: at 78.59% examples, 493390 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:13,284 : INFO : PROGRESS: at 78.68% examples, 493413 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:49:14,297 : INFO : PROGRESS: at 78.77% examples, 493472 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:15,311 : INFO : PROGRESS: at 78.86% examples, 493539 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:49:16,315 : INFO : PROGRESS: at 78.96% examples, 493627 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:17,318 : INFO : PROGRESS: at 79.05% examples, 493682 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:18,323 : INFO : PROGRESS: at 79.14% examples, 493735 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:19,339 : INFO : PROGRESS: at 79.23% examples, 493799 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:49:20,361 : INFO : PROGRESS: at 79.33% examples, 493902 wor

2018-01-21 19:50:31,996 : INFO : PROGRESS: at 85.66% examples, 496547 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:33,000 : INFO : PROGRESS: at 85.75% examples, 496603 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:34,005 : INFO : PROGRESS: at 85.84% examples, 496642 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:35,006 : INFO : PROGRESS: at 85.94% examples, 496699 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:36,008 : INFO : PROGRESS: at 86.03% examples, 496755 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:37,032 : INFO : PROGRESS: at 86.12% examples, 496801 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:38,044 : INFO : PROGRESS: at 86.22% examples, 496875 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:39,048 : INFO : PROGRESS: at 86.31% examples, 496952 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:40,050 : INFO : PROGRESS: at 86.40% examples, 496977 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:50:41,053 : INFO : PROGRESS: at 86.47% examples, 496902 wor

2018-01-21 19:51:52,883 : INFO : PROGRESS: at 92.79% examples, 499240 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:51:53,898 : INFO : PROGRESS: at 92.88% examples, 499298 words/s, in_qsize 4, out_qsize 1
2018-01-21 19:51:54,922 : INFO : PROGRESS: at 92.97% examples, 499322 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:51:55,925 : INFO : PROGRESS: at 93.07% examples, 499392 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:51:56,936 : INFO : PROGRESS: at 93.17% examples, 499452 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:51:57,942 : INFO : PROGRESS: at 93.26% examples, 499515 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:51:58,947 : INFO : PROGRESS: at 93.36% examples, 499563 words/s, in_qsize 4, out_qsize 0
2018-01-21 19:51:59,950 : INFO : PROGRESS: at 93.45% examples, 499626 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:52:00,954 : INFO : PROGRESS: at 93.55% examples, 499689 words/s, in_qsize 5, out_qsize 0
2018-01-21 19:52:01,965 : INFO : PROGRESS: at 93.65% examples, 499762 wor

2018-01-21 19:53:13,711 : INFO : PROGRESS: at 99.90% examples, 501476 words/s, in_qsize 6, out_qsize 0
2018-01-21 19:53:14,719 : INFO : PROGRESS: at 100.00% examples, 501531 words/s, in_qsize 2, out_qsize 1
2018-01-21 19:53:14,724 : INFO : worker thread finished; awaiting finish of 2 more threads
2018-01-21 19:53:14,730 : INFO : worker thread finished; awaiting finish of 1 more threads
2018-01-21 19:53:14,735 : INFO : worker thread finished; awaiting finish of 0 more threads
2018-01-21 19:53:14,736 : INFO : training on 749277310 raw words (604299908 effective words) took 1204.9s, 501537 effective words/s


####  some considerations:
 - revisit tokenization  / spell checking / entity recognition
 - stop words?
 - things to adjust: vector size, negative sampling, min_count
 - review more notes on logs

## Save model

In [10]:
path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file = 'model_full_{}_sg{}_sz{}_win{}_min{}_hs{}_neg{}'.format(num_doc,sg,size,window,min_count,hs,negative)
model_full.save(path+file)
model_full.wv.save_word2vec_format(path+file+'_kv')

2018-01-21 20:04:24,483 : INFO : saving Word2Vec object under /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_50M_sg0_sz250_win5_min3_hs1_neg0, separately None
2018-01-21 20:04:24,487 : INFO : storing np array 'syn0' to /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_50M_sg0_sz250_win5_min3_hs1_neg0.wv.syn0.npy
2018-01-21 20:04:24,718 : INFO : not storing attribute syn0norm
2018-01-21 20:04:24,719 : INFO : storing np array 'syn1' to /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_50M_sg0_sz250_win5_min3_hs1_neg0.syn1.npy
2018-01-21 20:04:25,185 : INFO : not storing attribute cum_table
2018-01-21 20:04:30,793 : INFO : saved /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_50M_sg0_sz250_win5_min3_hs1_neg0
2018-01-21 20:04:30,795 : INFO : storing 244261x250 projection weights into /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_50M_sg0_sz250_win5_min3_hs1_neg0_kv


## brief tests

In [10]:
model_nostop.most_similar(['iterate'])

NameError: name 'model_nostop' is not defined

In [11]:
model_full.most_similar(['iterate'])

2018-01-18 11:05:27,640 : INFO : precomputing L2-norms of word weight vectors


[('iterating', 0.7748338580131531),
 ('looping', 0.6759294867515564),
 ('interate', 0.6694145202636719),
 ('traverse', 0.6611429452896118),
 ('itterate', 0.6592997312545776),
 ('itereate', 0.6149123311042786),
 ('enumerate', 0.5855768322944641),
 ('travese', 0.5848667025566101),
 ('iterates', 0.5784468054771423),
 ('iteration', 0.5774005651473999)]

**synonyms work pretty well!!!**

In [12]:
model_nostop.predict_output_word(['iterate','dataframe','rows'])

NameError: name 'model_nostop' is not defined

In [13]:
x = model_full.predict_output_word(['iterate','dataframe','rows'])

In [14]:
x

[('panda', 0.0070507326),
 ('dataframe', 0.0058084298),
 ('dataframes', 0.0043647736),
 ('pairwise', 0.0010982126),
 ('over', 0.0010611849),
 ('multiindex', 0.0010397913),
 ('subset', 0.00093480962),
 ('subsetting', 0.00092987594),
 ('tuples', 0.00075485575),
 ('numpy', 0.00072131585)]

In [None]:
q="How to find all questions that were duplicates of another question".split()
model_full.predict_output_word(q)