# Model Training

In this file I train a word2vec model on 17.5 million stack overflow question/post titles.

In [1]:
import gensim
import logging

from nltk.tokenize import RegexpTokenizer # tokenizing
from nltk.corpus import stopwords  # list of stop words
import re

# Logging code taken from http://rare-technologies.com/word2vec-tutorial/
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

# Loading & Pre-processing Data

#### functions

In [3]:
# functions to clean text
# https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/deepir.ipynb

contractions = re.compile(r"'|-|\"")
# all non alphanumeric
symbols = re.compile(r'(\W+)', re.U)
# single character removal
singles = re.compile(r'(\s\S\s)', re.I|re.U)
# separators (any whitespace)
seps = re.compile(r'\s+')
# tokenizer
tokenizer = RegexpTokenizer(r'\w+') # tokens separated by white spice
# stop words
stops = set(stopwords.words('english')) # list of english stop words

# cleaner (order matters)
def clean(text, return_tokens, rmv_stop_words=True): 
    text = text.lower()
    text = contractions.sub('', text)
    text = symbols.sub(r' \1 ', text)
    text = singles.sub(' ', text)
    text = seps.sub(' ', text)
    tokens = tokenizer.tokenize(text)     # tokenize
    if rmv_stop_words:
        tokens = [i for i in tokens if not i in stops] # remove stop words
    if return_tokens:
        return tokens
    return ' '.join(tokens)

### Pre-process corpus and save to file..
 - avoid having to pre-process again if training model again

In [4]:
# count how many titles in corpus

path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file_src = 'posts_titles_50M_processed.txt'
i = 0
with open(path+file_src, 'r') as f:
    for line in f:
        i+=1
print(i)

17565207


In [5]:
path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file_src = 'posts_titles_50M.txt'
file_dest = 'posts_titles_50M_processed.txt'

# compile and pre-process corpus serially
with open(path+file_src, 'r') as src, open(path+file_dest,'a+') as dest:
    for line in src:
        tmp = clean(line, rmv_stop_words=True, return_tokens=False)
        dest.write(tmp+'\n')

# Model Training

## Phraser (optional)

In [7]:
# generator to stream sentences into phraser
class sentence_stream:
    """Class that streams corpus into gensim models as a generator"""
    def __init__(self, file):
        self.file = file
    def __iter__(self):
        with open(self.file, 'r') as f:
            for line in f:
                yield line.split()

In [8]:
path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file = 'posts_titles_50M_processed.txt'
corpus = sentence_stream(path+file)
phrases = gensim.models.Phrases(corpus)
bigram_transformer = gensim.models.phrases.Phraser(phrases)

2018-01-29 09:58:38,344 : INFO : collecting all words and their counts
2018-01-29 09:58:38,347 : INFO : PROGRESS: at sentence #0, processed 0 words and 0 word types
2018-01-29 09:58:38,470 : INFO : PROGRESS: at sentence #10000, processed 53499 words and 43473 word types
2018-01-29 09:58:38,571 : INFO : PROGRESS: at sentence #20000, processed 108631 words and 80136 word types
2018-01-29 09:58:38,696 : INFO : PROGRESS: at sentence #30000, processed 164079 words and 113613 word types
2018-01-29 09:58:38,802 : INFO : PROGRESS: at sentence #40000, processed 219488 words and 144925 word types
2018-01-29 09:58:38,945 : INFO : PROGRESS: at sentence #50000, processed 274392 words and 174780 word types
2018-01-29 09:58:39,062 : INFO : PROGRESS: at sentence #60000, processed 328944 words and 203284 word types
2018-01-29 09:58:39,175 : INFO : PROGRESS: at sentence #70000, processed 383988 words and 230715 word types
2018-01-29 09:58:39,284 : INFO : PROGRESS: at sentence #80000, processed 438691 wo

2018-01-29 09:58:47,917 : INFO : PROGRESS: at sentence #740000, processed 4057675 words and 1505743 word types
2018-01-29 09:58:48,032 : INFO : PROGRESS: at sentence #750000, processed 4112925 words and 1521124 word types
2018-01-29 09:58:48,159 : INFO : PROGRESS: at sentence #760000, processed 4167845 words and 1536377 word types
2018-01-29 09:58:48,286 : INFO : PROGRESS: at sentence #770000, processed 4222332 words and 1551537 word types
2018-01-29 09:58:48,413 : INFO : PROGRESS: at sentence #780000, processed 4277218 words and 1566771 word types
2018-01-29 09:58:48,541 : INFO : PROGRESS: at sentence #790000, processed 4332093 words and 1582115 word types
2018-01-29 09:58:48,671 : INFO : PROGRESS: at sentence #800000, processed 4386943 words and 1597150 word types
2018-01-29 09:58:48,793 : INFO : PROGRESS: at sentence #810000, processed 4441908 words and 1612077 word types
2018-01-29 09:58:48,916 : INFO : PROGRESS: at sentence #820000, processed 4496609 words and 1627071 word types
2

2018-01-29 09:58:57,106 : INFO : PROGRESS: at sentence #1480000, processed 8116935 words and 2529837 word types
2018-01-29 09:58:57,218 : INFO : PROGRESS: at sentence #1490000, processed 8171416 words and 2542157 word types
2018-01-29 09:58:57,345 : INFO : PROGRESS: at sentence #1500000, processed 8225782 words and 2554550 word types
2018-01-29 09:58:57,459 : INFO : PROGRESS: at sentence #1510000, processed 8280878 words and 2567057 word types
2018-01-29 09:58:57,577 : INFO : PROGRESS: at sentence #1520000, processed 8334984 words and 2579500 word types
2018-01-29 09:58:57,702 : INFO : PROGRESS: at sentence #1530000, processed 8390096 words and 2592123 word types
2018-01-29 09:58:57,817 : INFO : PROGRESS: at sentence #1540000, processed 8445017 words and 2604619 word types
2018-01-29 09:58:57,939 : INFO : PROGRESS: at sentence #1550000, processed 8500331 words and 2617340 word types
2018-01-29 09:58:58,058 : INFO : PROGRESS: at sentence #1560000, processed 8555583 words and 2629909 wor

2018-01-29 09:59:06,299 : INFO : PROGRESS: at sentence #2210000, processed 12176810 words and 3410143 word types
2018-01-29 09:59:06,442 : INFO : PROGRESS: at sentence #2220000, processed 12232586 words and 3421548 word types
2018-01-29 09:59:06,574 : INFO : PROGRESS: at sentence #2230000, processed 12288861 words and 3433112 word types
2018-01-29 09:59:06,692 : INFO : PROGRESS: at sentence #2240000, processed 12344552 words and 3444280 word types
2018-01-29 09:59:06,823 : INFO : PROGRESS: at sentence #2250000, processed 12400881 words and 3455956 word types
2018-01-29 09:59:06,954 : INFO : PROGRESS: at sentence #2260000, processed 12457487 words and 3467645 word types
2018-01-29 09:59:07,079 : INFO : PROGRESS: at sentence #2270000, processed 12513310 words and 3479147 word types
2018-01-29 09:59:07,201 : INFO : PROGRESS: at sentence #2280000, processed 12569849 words and 3490805 word types
2018-01-29 09:59:07,328 : INFO : PROGRESS: at sentence #2290000, processed 12625553 words and 35

2018-01-29 09:59:16,153 : INFO : PROGRESS: at sentence #2940000, processed 16280067 words and 4219231 word types
2018-01-29 09:59:16,279 : INFO : PROGRESS: at sentence #2950000, processed 16335967 words and 4229719 word types
2018-01-29 09:59:16,401 : INFO : PROGRESS: at sentence #2960000, processed 16392376 words and 4240494 word types
2018-01-29 09:59:16,517 : INFO : PROGRESS: at sentence #2970000, processed 16448128 words and 4251007 word types
2018-01-29 09:59:16,648 : INFO : PROGRESS: at sentence #2980000, processed 16504446 words and 4261665 word types
2018-01-29 09:59:16,775 : INFO : PROGRESS: at sentence #2990000, processed 16561157 words and 4272297 word types
2018-01-29 09:59:16,902 : INFO : PROGRESS: at sentence #3000000, processed 16617333 words and 4282824 word types
2018-01-29 09:59:17,065 : INFO : PROGRESS: at sentence #3010000, processed 16673636 words and 4293296 word types
2018-01-29 09:59:17,257 : INFO : PROGRESS: at sentence #3020000, processed 16730460 words and 43

2018-01-29 09:59:25,708 : INFO : PROGRESS: at sentence #3670000, processed 20321524 words and 4594829 word types
2018-01-29 09:59:25,835 : INFO : PROGRESS: at sentence #3680000, processed 20375779 words and 4594829 word types
2018-01-29 09:59:25,955 : INFO : PROGRESS: at sentence #3690000, processed 20430060 words and 4594829 word types
2018-01-29 09:59:26,070 : INFO : PROGRESS: at sentence #3700000, processed 20483814 words and 4594829 word types
2018-01-29 09:59:26,187 : INFO : PROGRESS: at sentence #3710000, processed 20537689 words and 4594829 word types
2018-01-29 09:59:26,304 : INFO : PROGRESS: at sentence #3720000, processed 20592532 words and 4594829 word types
2018-01-29 09:59:26,415 : INFO : PROGRESS: at sentence #3730000, processed 20646707 words and 4594829 word types
2018-01-29 09:59:26,538 : INFO : PROGRESS: at sentence #3740000, processed 20701800 words and 4594829 word types
2018-01-29 09:59:26,659 : INFO : PROGRESS: at sentence #3750000, processed 20758013 words and 45

2018-01-29 09:59:34,751 : INFO : PROGRESS: at sentence #4400000, processed 24340224 words and 4594829 word types
2018-01-29 09:59:34,873 : INFO : PROGRESS: at sentence #4410000, processed 24394778 words and 4594829 word types
2018-01-29 09:59:35,002 : INFO : PROGRESS: at sentence #4420000, processed 24450162 words and 4594829 word types
2018-01-29 09:59:35,121 : INFO : PROGRESS: at sentence #4430000, processed 24505542 words and 4594829 word types
2018-01-29 09:59:35,233 : INFO : PROGRESS: at sentence #4440000, processed 24560391 words and 4594829 word types
2018-01-29 09:59:35,356 : INFO : PROGRESS: at sentence #4450000, processed 24615532 words and 4594829 word types
2018-01-29 09:59:35,466 : INFO : PROGRESS: at sentence #4460000, processed 24670807 words and 4594829 word types
2018-01-29 09:59:35,589 : INFO : PROGRESS: at sentence #4470000, processed 24726446 words and 4594829 word types
2018-01-29 09:59:35,705 : INFO : PROGRESS: at sentence #4480000, processed 24781684 words and 45

2018-01-29 09:59:43,500 : INFO : PROGRESS: at sentence #5130000, processed 28356701 words and 4594829 word types
2018-01-29 09:59:43,629 : INFO : PROGRESS: at sentence #5140000, processed 28412754 words and 4594829 word types
2018-01-29 09:59:43,744 : INFO : PROGRESS: at sentence #5150000, processed 28468305 words and 4594829 word types
2018-01-29 09:59:43,873 : INFO : PROGRESS: at sentence #5160000, processed 28524082 words and 4594829 word types
2018-01-29 09:59:43,989 : INFO : PROGRESS: at sentence #5170000, processed 28579951 words and 4594829 word types
2018-01-29 09:59:44,117 : INFO : PROGRESS: at sentence #5180000, processed 28635386 words and 4594829 word types
2018-01-29 09:59:44,232 : INFO : PROGRESS: at sentence #5190000, processed 28691913 words and 4594829 word types
2018-01-29 09:59:44,358 : INFO : PROGRESS: at sentence #5200000, processed 28747192 words and 4594829 word types
2018-01-29 09:59:44,473 : INFO : PROGRESS: at sentence #5210000, processed 28803210 words and 45

2018-01-29 09:59:52,939 : INFO : PROGRESS: at sentence #5860000, processed 32445612 words and 4594829 word types
2018-01-29 09:59:53,070 : INFO : PROGRESS: at sentence #5870000, processed 32501395 words and 4594829 word types
2018-01-29 09:59:53,192 : INFO : PROGRESS: at sentence #5880000, processed 32558024 words and 4594829 word types
2018-01-29 09:59:53,317 : INFO : PROGRESS: at sentence #5890000, processed 32614277 words and 4594829 word types
2018-01-29 09:59:53,445 : INFO : PROGRESS: at sentence #5900000, processed 32669997 words and 4594829 word types
2018-01-29 09:59:53,566 : INFO : PROGRESS: at sentence #5910000, processed 32726499 words and 4594829 word types
2018-01-29 09:59:53,711 : INFO : PROGRESS: at sentence #5920000, processed 32782790 words and 4594829 word types
2018-01-29 09:59:53,830 : INFO : PROGRESS: at sentence #5930000, processed 32838559 words and 4594829 word types
2018-01-29 09:59:53,961 : INFO : PROGRESS: at sentence #5940000, processed 32894911 words and 45

2018-01-29 10:00:02,999 : INFO : PROGRESS: at sentence #6590000, processed 36552909 words and 4594829 word types
2018-01-29 10:00:03,132 : INFO : PROGRESS: at sentence #6600000, processed 36609039 words and 4594829 word types
2018-01-29 10:00:03,270 : INFO : PROGRESS: at sentence #6610000, processed 36665478 words and 4603155 word types
2018-01-29 10:00:03,402 : INFO : PROGRESS: at sentence #6620000, processed 36721554 words and 4613422 word types
2018-01-29 10:00:03,527 : INFO : PROGRESS: at sentence #6630000, processed 36778011 words and 4623602 word types
2018-01-29 10:00:03,653 : INFO : PROGRESS: at sentence #6640000, processed 36834482 words and 4633852 word types
2018-01-29 10:00:03,804 : INFO : PROGRESS: at sentence #6650000, processed 36891018 words and 4644346 word types
2018-01-29 10:00:03,940 : INFO : PROGRESS: at sentence #6660000, processed 36947003 words and 4654659 word types
2018-01-29 10:00:04,062 : INFO : PROGRESS: at sentence #6670000, processed 37003338 words and 46

2018-01-29 10:00:13,281 : INFO : PROGRESS: at sentence #7320000, processed 40666932 words and 5313310 word types
2018-01-29 10:00:13,426 : INFO : PROGRESS: at sentence #7330000, processed 40723699 words and 5323044 word types
2018-01-29 10:00:13,570 : INFO : PROGRESS: at sentence #7340000, processed 40780509 words and 5332825 word types
2018-01-29 10:00:13,717 : INFO : PROGRESS: at sentence #7350000, processed 40836795 words and 5342291 word types
2018-01-29 10:00:13,867 : INFO : PROGRESS: at sentence #7360000, processed 40892808 words and 5351934 word types
2018-01-29 10:00:14,001 : INFO : PROGRESS: at sentence #7370000, processed 40949621 words and 5361831 word types
2018-01-29 10:00:14,146 : INFO : PROGRESS: at sentence #7380000, processed 41006541 words and 5371539 word types
2018-01-29 10:00:14,276 : INFO : PROGRESS: at sentence #7390000, processed 41062430 words and 5381363 word types
2018-01-29 10:00:14,419 : INFO : PROGRESS: at sentence #7400000, processed 41118862 words and 53

2018-01-29 10:00:24,053 : INFO : PROGRESS: at sentence #8050000, processed 44802322 words and 6000120 word types
2018-01-29 10:00:24,211 : INFO : PROGRESS: at sentence #8060000, processed 44859347 words and 6009248 word types
2018-01-29 10:00:24,345 : INFO : PROGRESS: at sentence #8070000, processed 44915705 words and 6018361 word types
2018-01-29 10:00:24,489 : INFO : PROGRESS: at sentence #8080000, processed 44972617 words and 6027347 word types
2018-01-29 10:00:24,637 : INFO : PROGRESS: at sentence #8090000, processed 45029377 words and 6036575 word types
2018-01-29 10:00:24,792 : INFO : PROGRESS: at sentence #8100000, processed 45086227 words and 6045545 word types
2018-01-29 10:00:24,951 : INFO : PROGRESS: at sentence #8110000, processed 45143294 words and 6054570 word types
2018-01-29 10:00:25,078 : INFO : PROGRESS: at sentence #8120000, processed 45200628 words and 6063719 word types
2018-01-29 10:00:25,204 : INFO : PROGRESS: at sentence #8130000, processed 45257799 words and 60

2018-01-29 10:00:34,328 : INFO : PROGRESS: at sentence #8780000, processed 48966588 words and 6655035 word types
2018-01-29 10:00:34,479 : INFO : PROGRESS: at sentence #8790000, processed 49023242 words and 6664065 word types
2018-01-29 10:00:34,620 : INFO : PROGRESS: at sentence #8800000, processed 49080500 words and 6672982 word types
2018-01-29 10:00:34,758 : INFO : PROGRESS: at sentence #8810000, processed 49138006 words and 6681755 word types
2018-01-29 10:00:34,898 : INFO : PROGRESS: at sentence #8820000, processed 49194163 words and 6690392 word types
2018-01-29 10:00:35,053 : INFO : PROGRESS: at sentence #8830000, processed 49251173 words and 6699067 word types
2018-01-29 10:00:35,180 : INFO : PROGRESS: at sentence #8840000, processed 49308481 words and 6707794 word types
2018-01-29 10:00:35,303 : INFO : PROGRESS: at sentence #8850000, processed 49365581 words and 6716486 word types
2018-01-29 10:00:35,438 : INFO : PROGRESS: at sentence #8860000, processed 49422070 words and 67

2018-01-29 10:00:44,676 : INFO : PROGRESS: at sentence #9510000, processed 53138282 words and 7287106 word types
2018-01-29 10:00:44,857 : INFO : PROGRESS: at sentence #9520000, processed 53194641 words and 7295335 word types
2018-01-29 10:00:44,991 : INFO : PROGRESS: at sentence #9530000, processed 53251826 words and 7303795 word types
2018-01-29 10:00:45,123 : INFO : PROGRESS: at sentence #9540000, processed 53308898 words and 7312392 word types
2018-01-29 10:00:45,275 : INFO : PROGRESS: at sentence #9550000, processed 53365940 words and 7320710 word types
2018-01-29 10:00:45,445 : INFO : PROGRESS: at sentence #9560000, processed 53423035 words and 7329120 word types
2018-01-29 10:00:45,589 : INFO : PROGRESS: at sentence #9570000, processed 53480012 words and 7337456 word types
2018-01-29 10:00:45,737 : INFO : PROGRESS: at sentence #9580000, processed 53537160 words and 7345953 word types
2018-01-29 10:00:45,872 : INFO : PROGRESS: at sentence #9590000, processed 53594318 words and 73

2018-01-29 10:00:55,076 : INFO : PROGRESS: at sentence #10240000, processed 57310263 words and 7894267 word types
2018-01-29 10:00:55,216 : INFO : PROGRESS: at sentence #10250000, processed 57368006 words and 7902646 word types
2018-01-29 10:00:55,347 : INFO : PROGRESS: at sentence #10260000, processed 57424603 words and 7910728 word types
2018-01-29 10:00:55,473 : INFO : PROGRESS: at sentence #10270000, processed 57481782 words and 7918891 word types
2018-01-29 10:00:55,609 : INFO : PROGRESS: at sentence #10280000, processed 57539381 words and 7927412 word types
2018-01-29 10:00:55,743 : INFO : PROGRESS: at sentence #10290000, processed 57597242 words and 7935774 word types
2018-01-29 10:00:55,876 : INFO : PROGRESS: at sentence #10300000, processed 57654584 words and 7944088 word types
2018-01-29 10:00:56,016 : INFO : PROGRESS: at sentence #10310000, processed 57712020 words and 7952459 word types
2018-01-29 10:00:56,145 : INFO : PROGRESS: at sentence #10320000, processed 57769947 wor

2018-01-29 10:01:04,598 : INFO : PROGRESS: at sentence #10960000, processed 61303372 words and 8055842 word types
2018-01-29 10:01:04,724 : INFO : PROGRESS: at sentence #10970000, processed 61358834 words and 8055842 word types
2018-01-29 10:01:04,860 : INFO : PROGRESS: at sentence #10980000, processed 61414394 words and 8055842 word types
2018-01-29 10:01:04,982 : INFO : PROGRESS: at sentence #10990000, processed 61469709 words and 8055842 word types
2018-01-29 10:01:05,112 : INFO : PROGRESS: at sentence #11000000, processed 61525205 words and 8055842 word types
2018-01-29 10:01:05,247 : INFO : PROGRESS: at sentence #11010000, processed 61580618 words and 8055842 word types
2018-01-29 10:01:05,379 : INFO : PROGRESS: at sentence #11020000, processed 61636297 words and 8055842 word types
2018-01-29 10:01:05,521 : INFO : PROGRESS: at sentence #11030000, processed 61691830 words and 8055842 word types
2018-01-29 10:01:05,656 : INFO : PROGRESS: at sentence #11040000, processed 61746958 wor

2018-01-29 10:01:14,263 : INFO : PROGRESS: at sentence #11680000, processed 65267037 words and 8055842 word types
2018-01-29 10:01:14,412 : INFO : PROGRESS: at sentence #11690000, processed 65322182 words and 8055842 word types
2018-01-29 10:01:14,560 : INFO : PROGRESS: at sentence #11700000, processed 65376182 words and 8055842 word types
2018-01-29 10:01:14,711 : INFO : PROGRESS: at sentence #11710000, processed 65431576 words and 8055842 word types
2018-01-29 10:01:14,859 : INFO : PROGRESS: at sentence #11720000, processed 65485433 words and 8055842 word types
2018-01-29 10:01:15,013 : INFO : PROGRESS: at sentence #11730000, processed 65540515 words and 8055842 word types
2018-01-29 10:01:15,170 : INFO : PROGRESS: at sentence #11740000, processed 65595311 words and 8055842 word types
2018-01-29 10:01:15,340 : INFO : PROGRESS: at sentence #11750000, processed 65649792 words and 8055842 word types
2018-01-29 10:01:15,463 : INFO : PROGRESS: at sentence #11760000, processed 65704770 wor

2018-01-29 10:01:24,135 : INFO : PROGRESS: at sentence #12400000, processed 69237933 words and 8055842 word types
2018-01-29 10:01:24,278 : INFO : PROGRESS: at sentence #12410000, processed 69293725 words and 8055842 word types
2018-01-29 10:01:24,417 : INFO : PROGRESS: at sentence #12420000, processed 69349611 words and 8055842 word types
2018-01-29 10:01:24,551 : INFO : PROGRESS: at sentence #12430000, processed 69405213 words and 8055842 word types
2018-01-29 10:01:24,676 : INFO : PROGRESS: at sentence #12440000, processed 69461298 words and 8055842 word types
2018-01-29 10:01:24,800 : INFO : PROGRESS: at sentence #12450000, processed 69516914 words and 8055842 word types
2018-01-29 10:01:24,932 : INFO : PROGRESS: at sentence #12460000, processed 69573386 words and 8055842 word types
2018-01-29 10:01:25,056 : INFO : PROGRESS: at sentence #12470000, processed 69629302 words and 8055842 word types
2018-01-29 10:01:25,182 : INFO : PROGRESS: at sentence #12480000, processed 69685701 wor

2018-01-29 10:01:33,593 : INFO : PROGRESS: at sentence #13120000, processed 73277061 words and 8055842 word types
2018-01-29 10:01:33,711 : INFO : PROGRESS: at sentence #13130000, processed 73332937 words and 8055842 word types
2018-01-29 10:01:33,861 : INFO : PROGRESS: at sentence #13140000, processed 73389418 words and 8055842 word types
2018-01-29 10:01:33,985 : INFO : PROGRESS: at sentence #13150000, processed 73445874 words and 8055842 word types
2018-01-29 10:01:34,109 : INFO : PROGRESS: at sentence #13160000, processed 73501772 words and 8055842 word types
2018-01-29 10:01:34,243 : INFO : PROGRESS: at sentence #13170000, processed 73558398 words and 8055842 word types
2018-01-29 10:01:34,378 : INFO : PROGRESS: at sentence #13180000, processed 73614986 words and 8055842 word types
2018-01-29 10:01:34,494 : INFO : PROGRESS: at sentence #13190000, processed 73671046 words and 8055842 word types
2018-01-29 10:01:34,621 : INFO : PROGRESS: at sentence #13200000, processed 73727832 wor

2018-01-29 10:01:42,932 : INFO : PROGRESS: at sentence #13840000, processed 77329021 words and 8055842 word types
2018-01-29 10:01:43,048 : INFO : PROGRESS: at sentence #13850000, processed 77384786 words and 8055842 word types
2018-01-29 10:01:43,178 : INFO : PROGRESS: at sentence #13860000, processed 77441197 words and 8055842 word types
2018-01-29 10:01:43,324 : INFO : PROGRESS: at sentence #13870000, processed 77498196 words and 8055842 word types
2018-01-29 10:01:43,444 : INFO : PROGRESS: at sentence #13880000, processed 77554102 words and 8055842 word types
2018-01-29 10:01:43,569 : INFO : PROGRESS: at sentence #13890000, processed 77610088 words and 8055842 word types
2018-01-29 10:01:43,687 : INFO : PROGRESS: at sentence #13900000, processed 77666342 words and 8055842 word types
2018-01-29 10:01:43,829 : INFO : PROGRESS: at sentence #13910000, processed 77722789 words and 8055842 word types
2018-01-29 10:01:43,949 : INFO : PROGRESS: at sentence #13920000, processed 77778891 wor

2018-01-29 10:01:52,153 : INFO : PROGRESS: at sentence #14560000, processed 81388859 words and 8055842 word types
2018-01-29 10:01:52,299 : INFO : PROGRESS: at sentence #14570000, processed 81445349 words and 8055842 word types
2018-01-29 10:01:52,429 : INFO : PROGRESS: at sentence #14580000, processed 81502158 words and 8055842 word types
2018-01-29 10:01:52,548 : INFO : PROGRESS: at sentence #14590000, processed 81559030 words and 8055842 word types
2018-01-29 10:01:52,674 : INFO : PROGRESS: at sentence #14600000, processed 81614864 words and 8055842 word types
2018-01-29 10:01:52,813 : INFO : PROGRESS: at sentence #14610000, processed 81671365 words and 8055842 word types
2018-01-29 10:01:52,943 : INFO : PROGRESS: at sentence #14620000, processed 81728120 words and 8055842 word types
2018-01-29 10:01:53,067 : INFO : PROGRESS: at sentence #14630000, processed 81784923 words and 8055842 word types
2018-01-29 10:01:53,184 : INFO : PROGRESS: at sentence #14640000, processed 81840916 wor

2018-01-29 10:02:01,497 : INFO : PROGRESS: at sentence #15280000, processed 85470979 words and 8055842 word types
2018-01-29 10:02:01,631 : INFO : PROGRESS: at sentence #15290000, processed 85528039 words and 8055842 word types
2018-01-29 10:02:01,784 : INFO : PROGRESS: at sentence #15300000, processed 85585119 words and 8055842 word types
2018-01-29 10:02:01,919 : INFO : PROGRESS: at sentence #15310000, processed 85642017 words and 8055842 word types
2018-01-29 10:02:02,044 : INFO : PROGRESS: at sentence #15320000, processed 85698808 words and 8055842 word types
2018-01-29 10:02:02,166 : INFO : PROGRESS: at sentence #15330000, processed 85756173 words and 8055842 word types
2018-01-29 10:02:02,312 : INFO : PROGRESS: at sentence #15340000, processed 85813713 words and 8055842 word types
2018-01-29 10:02:02,440 : INFO : PROGRESS: at sentence #15350000, processed 85870392 words and 8055842 word types
2018-01-29 10:02:02,559 : INFO : PROGRESS: at sentence #15360000, processed 85927176 wor

2018-01-29 10:02:11,641 : INFO : PROGRESS: at sentence #16000000, processed 89578929 words and 8055842 word types
2018-01-29 10:02:11,779 : INFO : PROGRESS: at sentence #16010000, processed 89636005 words and 8055842 word types
2018-01-29 10:02:11,912 : INFO : PROGRESS: at sentence #16020000, processed 89693539 words and 8055842 word types
2018-01-29 10:02:12,049 : INFO : PROGRESS: at sentence #16030000, processed 89750108 words and 8055842 word types
2018-01-29 10:02:12,194 : INFO : PROGRESS: at sentence #16040000, processed 89807402 words and 8055842 word types
2018-01-29 10:02:12,325 : INFO : PROGRESS: at sentence #16050000, processed 89864378 words and 8055842 word types
2018-01-29 10:02:12,466 : INFO : PROGRESS: at sentence #16060000, processed 89921890 words and 8055842 word types
2018-01-29 10:02:12,600 : INFO : PROGRESS: at sentence #16070000, processed 89979180 words and 8055842 word types
2018-01-29 10:02:12,734 : INFO : PROGRESS: at sentence #16080000, processed 90035655 wor

2018-01-29 10:02:21,118 : INFO : PROGRESS: at sentence #16720000, processed 93694356 words and 8055842 word types
2018-01-29 10:02:21,249 : INFO : PROGRESS: at sentence #16730000, processed 93751762 words and 8055842 word types
2018-01-29 10:02:21,378 : INFO : PROGRESS: at sentence #16740000, processed 93809074 words and 8055842 word types
2018-01-29 10:02:21,505 : INFO : PROGRESS: at sentence #16750000, processed 93865474 words and 8055842 word types
2018-01-29 10:02:21,638 : INFO : PROGRESS: at sentence #16760000, processed 93922381 words and 8055842 word types
2018-01-29 10:02:21,767 : INFO : PROGRESS: at sentence #16770000, processed 93979932 words and 8055842 word types
2018-01-29 10:02:21,894 : INFO : PROGRESS: at sentence #16780000, processed 94037525 words and 8055842 word types
2018-01-29 10:02:22,025 : INFO : PROGRESS: at sentence #16790000, processed 94094240 words and 8055842 word types
2018-01-29 10:02:22,148 : INFO : PROGRESS: at sentence #16800000, processed 94150868 wor

2018-01-29 10:02:30,589 : INFO : PROGRESS: at sentence #17440000, processed 97811920 words and 8055842 word types
2018-01-29 10:02:30,714 : INFO : PROGRESS: at sentence #17450000, processed 97869714 words and 8055842 word types
2018-01-29 10:02:30,848 : INFO : PROGRESS: at sentence #17460000, processed 97927696 words and 8055842 word types
2018-01-29 10:02:30,969 : INFO : PROGRESS: at sentence #17470000, processed 97985010 words and 8055842 word types
2018-01-29 10:02:31,096 : INFO : PROGRESS: at sentence #17480000, processed 98042101 words and 8055842 word types
2018-01-29 10:02:31,218 : INFO : PROGRESS: at sentence #17490000, processed 98100090 words and 8055842 word types
2018-01-29 10:02:31,347 : INFO : PROGRESS: at sentence #17500000, processed 98158218 words and 8055842 word types
2018-01-29 10:02:31,467 : INFO : PROGRESS: at sentence #17510000, processed 98215157 words and 8055842 word types
2018-01-29 10:02:31,595 : INFO : PROGRESS: at sentence #17520000, processed 98272827 wor

2018-01-29 10:02:39,627 : INFO : PROGRESS: at sentence #18160000, processed 101792178 words and 8055842 word types
2018-01-29 10:02:39,774 : INFO : PROGRESS: at sentence #18170000, processed 101847238 words and 8055842 word types
2018-01-29 10:02:39,902 : INFO : PROGRESS: at sentence #18180000, processed 101902510 words and 8055842 word types
2018-01-29 10:02:40,030 : INFO : PROGRESS: at sentence #18190000, processed 101958151 words and 8055842 word types
2018-01-29 10:02:40,159 : INFO : PROGRESS: at sentence #18200000, processed 102013757 words and 8055842 word types
2018-01-29 10:02:40,295 : INFO : PROGRESS: at sentence #18210000, processed 102068661 words and 8055842 word types
2018-01-29 10:02:40,421 : INFO : PROGRESS: at sentence #18220000, processed 102123152 words and 8055842 word types
2018-01-29 10:02:40,548 : INFO : PROGRESS: at sentence #18230000, processed 102178338 words and 8055842 word types
2018-01-29 10:02:40,674 : INFO : PROGRESS: at sentence #18240000, processed 1022

2018-01-29 10:02:49,174 : INFO : PROGRESS: at sentence #18880000, processed 105750321 words and 8055842 word types
2018-01-29 10:02:49,306 : INFO : PROGRESS: at sentence #18890000, processed 105805296 words and 8055842 word types
2018-01-29 10:02:49,466 : INFO : PROGRESS: at sentence #18900000, processed 105859563 words and 8055842 word types
2018-01-29 10:02:49,633 : INFO : PROGRESS: at sentence #18910000, processed 105914157 words and 8055842 word types
2018-01-29 10:02:49,782 : INFO : PROGRESS: at sentence #18920000, processed 105968706 words and 8055842 word types
2018-01-29 10:02:49,917 : INFO : PROGRESS: at sentence #18930000, processed 106023597 words and 8055842 word types
2018-01-29 10:02:50,046 : INFO : PROGRESS: at sentence #18940000, processed 106077831 words and 8055842 word types
2018-01-29 10:02:50,189 : INFO : PROGRESS: at sentence #18950000, processed 106132615 words and 8055842 word types
2018-01-29 10:02:50,313 : INFO : PROGRESS: at sentence #18960000, processed 1061

2018-01-29 10:02:58,741 : INFO : PROGRESS: at sentence #19600000, processed 109729398 words and 8055842 word types
2018-01-29 10:02:58,877 : INFO : PROGRESS: at sentence #19610000, processed 109785847 words and 8055842 word types
2018-01-29 10:02:59,010 : INFO : PROGRESS: at sentence #19620000, processed 109842483 words and 8055842 word types
2018-01-29 10:02:59,143 : INFO : PROGRESS: at sentence #19630000, processed 109898098 words and 8055842 word types
2018-01-29 10:02:59,282 : INFO : PROGRESS: at sentence #19640000, processed 109953995 words and 8055842 word types
2018-01-29 10:02:59,422 : INFO : PROGRESS: at sentence #19650000, processed 110009750 words and 8055842 word types
2018-01-29 10:02:59,548 : INFO : PROGRESS: at sentence #19660000, processed 110065591 words and 8055842 word types
2018-01-29 10:02:59,687 : INFO : PROGRESS: at sentence #19670000, processed 110122203 words and 8055842 word types
2018-01-29 10:02:59,811 : INFO : PROGRESS: at sentence #19680000, processed 1101

2018-01-29 10:03:08,936 : INFO : PROGRESS: at sentence #20320000, processed 113771426 words and 8055842 word types
2018-01-29 10:03:09,058 : INFO : PROGRESS: at sentence #20330000, processed 113827970 words and 8055842 word types
2018-01-29 10:03:09,195 : INFO : PROGRESS: at sentence #20340000, processed 113885255 words and 8055842 word types
2018-01-29 10:03:09,324 : INFO : PROGRESS: at sentence #20350000, processed 113941234 words and 8055842 word types
2018-01-29 10:03:09,445 : INFO : PROGRESS: at sentence #20360000, processed 113997810 words and 8055842 word types
2018-01-29 10:03:09,577 : INFO : PROGRESS: at sentence #20370000, processed 114054604 words and 8055842 word types
2018-01-29 10:03:09,697 : INFO : PROGRESS: at sentence #20380000, processed 114110202 words and 8055842 word types
2018-01-29 10:03:09,825 : INFO : PROGRESS: at sentence #20390000, processed 114166763 words and 8055842 word types
2018-01-29 10:03:09,961 : INFO : PROGRESS: at sentence #20400000, processed 1142

2018-01-29 10:03:18,581 : INFO : PROGRESS: at sentence #21040000, processed 117823106 words and 8055842 word types
2018-01-29 10:03:18,717 : INFO : PROGRESS: at sentence #21050000, processed 117878928 words and 8055842 word types
2018-01-29 10:03:18,852 : INFO : PROGRESS: at sentence #21060000, processed 117935226 words and 8055842 word types
2018-01-29 10:03:18,982 : INFO : PROGRESS: at sentence #21070000, processed 117992268 words and 8055842 word types
2018-01-29 10:03:19,111 : INFO : PROGRESS: at sentence #21080000, processed 118048153 words and 8055842 word types
2018-01-29 10:03:19,242 : INFO : PROGRESS: at sentence #21090000, processed 118104690 words and 8055842 word types
2018-01-29 10:03:19,369 : INFO : PROGRESS: at sentence #21100000, processed 118161524 words and 8055842 word types
2018-01-29 10:03:19,494 : INFO : PROGRESS: at sentence #21110000, processed 118217617 words and 8055842 word types
2018-01-29 10:03:19,631 : INFO : PROGRESS: at sentence #21120000, processed 1182

2018-01-29 10:03:28,195 : INFO : PROGRESS: at sentence #21760000, processed 121884965 words and 8055842 word types
2018-01-29 10:03:28,353 : INFO : PROGRESS: at sentence #21770000, processed 121941103 words and 8055842 word types
2018-01-29 10:03:28,484 : INFO : PROGRESS: at sentence #21780000, processed 121997494 words and 8055842 word types
2018-01-29 10:03:28,615 : INFO : PROGRESS: at sentence #21790000, processed 122054211 words and 8055842 word types
2018-01-29 10:03:28,736 : INFO : PROGRESS: at sentence #21800000, processed 122110726 words and 8055842 word types
2018-01-29 10:03:28,875 : INFO : PROGRESS: at sentence #21810000, processed 122166883 words and 8055842 word types
2018-01-29 10:03:29,016 : INFO : PROGRESS: at sentence #21820000, processed 122223074 words and 8055842 word types
2018-01-29 10:03:29,150 : INFO : PROGRESS: at sentence #21830000, processed 122279636 words and 8055842 word types
2018-01-29 10:03:29,294 : INFO : PROGRESS: at sentence #21840000, processed 1223

2018-01-29 10:03:39,420 : INFO : PROGRESS: at sentence #22480000, processed 125970546 words and 8055842 word types
2018-01-29 10:03:39,551 : INFO : PROGRESS: at sentence #22490000, processed 126027165 words and 8055842 word types
2018-01-29 10:03:39,689 : INFO : PROGRESS: at sentence #22500000, processed 126084452 words and 8055842 word types
2018-01-29 10:03:39,822 : INFO : PROGRESS: at sentence #22510000, processed 126141987 words and 8055842 word types
2018-01-29 10:03:39,969 : INFO : PROGRESS: at sentence #22520000, processed 126199083 words and 8055842 word types
2018-01-29 10:03:40,132 : INFO : PROGRESS: at sentence #22530000, processed 126255508 words and 8055842 word types
2018-01-29 10:03:40,268 : INFO : PROGRESS: at sentence #22540000, processed 126312284 words and 8055842 word types
2018-01-29 10:03:40,432 : INFO : PROGRESS: at sentence #22550000, processed 126369866 words and 8055842 word types
2018-01-29 10:03:40,563 : INFO : PROGRESS: at sentence #22560000, processed 1264

2018-01-29 10:03:49,773 : INFO : PROGRESS: at sentence #23200000, processed 130079403 words and 8055842 word types
2018-01-29 10:03:49,911 : INFO : PROGRESS: at sentence #23210000, processed 130135752 words and 8055842 word types
2018-01-29 10:03:50,042 : INFO : PROGRESS: at sentence #23220000, processed 130192620 words and 8055842 word types
2018-01-29 10:03:50,220 : INFO : PROGRESS: at sentence #23230000, processed 130249515 words and 8055842 word types
2018-01-29 10:03:50,364 : INFO : PROGRESS: at sentence #23240000, processed 130306961 words and 8055842 word types
2018-01-29 10:03:50,496 : INFO : PROGRESS: at sentence #23250000, processed 130362845 words and 8055842 word types
2018-01-29 10:03:50,680 : INFO : PROGRESS: at sentence #23260000, processed 130419790 words and 8055842 word types
2018-01-29 10:03:50,813 : INFO : PROGRESS: at sentence #23270000, processed 130476758 words and 8055842 word types
2018-01-29 10:03:50,964 : INFO : PROGRESS: at sentence #23280000, processed 1305

2018-01-29 10:04:01,242 : INFO : PROGRESS: at sentence #23920000, processed 134194596 words and 8055842 word types
2018-01-29 10:04:01,439 : INFO : PROGRESS: at sentence #23930000, processed 134250834 words and 8055842 word types
2018-01-29 10:04:01,576 : INFO : PROGRESS: at sentence #23940000, processed 134308103 words and 8055842 word types
2018-01-29 10:04:01,706 : INFO : PROGRESS: at sentence #23950000, processed 134365275 words and 8055842 word types
2018-01-29 10:04:01,831 : INFO : PROGRESS: at sentence #23960000, processed 134422405 words and 8055842 word types
2018-01-29 10:04:02,009 : INFO : PROGRESS: at sentence #23970000, processed 134479788 words and 8055842 word types
2018-01-29 10:04:02,179 : INFO : PROGRESS: at sentence #23980000, processed 134536521 words and 8055842 word types
2018-01-29 10:04:02,346 : INFO : PROGRESS: at sentence #23990000, processed 134593429 words and 8055842 word types
2018-01-29 10:04:02,517 : INFO : PROGRESS: at sentence #24000000, processed 1346

2018-01-29 10:04:13,293 : INFO : PROGRESS: at sentence #24640000, processed 138315390 words and 8055842 word types
2018-01-29 10:04:13,469 : INFO : PROGRESS: at sentence #24650000, processed 138372671 words and 8055842 word types
2018-01-29 10:04:13,610 : INFO : PROGRESS: at sentence #24660000, processed 138430607 words and 8055842 word types
2018-01-29 10:04:13,750 : INFO : PROGRESS: at sentence #24670000, processed 138488607 words and 8055842 word types
2018-01-29 10:04:13,886 : INFO : PROGRESS: at sentence #24680000, processed 138546310 words and 8055842 word types
2018-01-29 10:04:14,023 : INFO : PROGRESS: at sentence #24690000, processed 138603909 words and 8055842 word types
2018-01-29 10:04:14,172 : INFO : PROGRESS: at sentence #24700000, processed 138661858 words and 8058143 word types
2018-01-29 10:04:14,327 : INFO : PROGRESS: at sentence #24710000, processed 138719892 words and 8066811 word types
2018-01-29 10:04:14,474 : INFO : PROGRESS: at sentence #24720000, processed 1387

2018-01-29 10:04:23,111 : INFO : PROGRESS: at sentence #25360000, processed 142492849 words and 8609908 word types
2018-01-29 10:04:23,233 : INFO : PROGRESS: at sentence #25370000, processed 142550873 words and 8618408 word types
2018-01-29 10:04:23,361 : INFO : PROGRESS: at sentence #25380000, processed 142608862 words and 8626803 word types
2018-01-29 10:04:23,486 : INFO : PROGRESS: at sentence #25390000, processed 142666133 words and 8634935 word types
2018-01-29 10:04:23,614 : INFO : PROGRESS: at sentence #25400000, processed 142724290 words and 8643322 word types
2018-01-29 10:04:23,735 : INFO : PROGRESS: at sentence #25410000, processed 142782165 words and 8651655 word types
2018-01-29 10:04:23,889 : INFO : PROGRESS: at sentence #25420000, processed 142840632 words and 8660138 word types
2018-01-29 10:04:24,011 : INFO : PROGRESS: at sentence #25430000, processed 142897999 words and 8668393 word types
2018-01-29 10:04:24,143 : INFO : PROGRESS: at sentence #25440000, processed 1429

2018-01-29 10:04:32,940 : INFO : PROGRESS: at sentence #26080000, processed 146674828 words and 9199998 word types
2018-01-29 10:04:33,083 : INFO : PROGRESS: at sentence #26090000, processed 146733043 words and 9208210 word types
2018-01-29 10:04:33,214 : INFO : PROGRESS: at sentence #26100000, processed 146790289 words and 9216005 word types
2018-01-29 10:04:33,347 : INFO : PROGRESS: at sentence #26110000, processed 146848237 words and 9224183 word types
2018-01-29 10:04:33,519 : INFO : PROGRESS: at sentence #26120000, processed 146906746 words and 9232451 word types
2018-01-29 10:04:33,688 : INFO : PROGRESS: at sentence #26130000, processed 146965050 words and 9240709 word types
2018-01-29 10:04:33,816 : INFO : PROGRESS: at sentence #26140000, processed 147022578 words and 9248625 word types
2018-01-29 10:04:34,025 : INFO : PROGRESS: at sentence #26150000, processed 147080791 words and 9256740 word types
2018-01-29 10:04:34,207 : INFO : PROGRESS: at sentence #26160000, processed 1471

In [9]:
bigram_transformer.save(path+'bigram_Phraser')
phrases.save(path+'bigram_phrases')

2018-01-29 10:06:36,188 : INFO : saving Phraser object under /Users/stevenfelix/Documents/DataScience_local/Insight/bigram_Phraser, separately None
2018-01-29 10:06:36,624 : INFO : saved /Users/stevenfelix/Documents/DataScience_local/Insight/bigram_Phraser
2018-01-29 10:06:36,626 : INFO : saving Phrases object under /Users/stevenfelix/Documents/DataScience_local/Insight/bigram_phrases, separately None
2018-01-29 10:06:55,260 : INFO : saved /Users/stevenfelix/Documents/DataScience_local/Insight/bigram_phrases


In [13]:
phrases['create column pandas dataframe'.split()]



['create', 'column', 'pandas_dataframe']

### Train word2vec

**skip gram model **

In [14]:
## skip gram model with negative sampling, which can generate predictions from contexts
num_doc='50M'
sg=1
size=250
window=5
min_count=3
corpus = sentence_stream(path+file)

#model_full = gensim.models.word2vec.Word2Vec(corpus, sg=sg, size=size, window=window, min_count=min_count)

** CBOW model **

In [16]:
## CBOW model, which can generate phrase scores
## to save on memory, initialize a stentence_stream class and pass it to model

num_doc='50M'
sg=0
size=250
window=5
min_count=5
hs=1
negative=0

path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file = 'posts_titles_75M_processed.txt'
corpus = sentence_stream(path+file)

model_full = gensim.models.word2vec.Word2Vec(corpus, sg=sg, size=size, window=window, min_count=min_count, hs=hs, negative=0)

2018-01-29 10:08:20,029 : INFO : collecting all words and their counts
2018-01-29 10:08:20,031 : INFO : PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2018-01-29 10:08:20,289 : INFO : PROGRESS: at sentence #10000, processed 49477 words, keeping 8890 word types
2018-01-29 10:08:20,512 : INFO : PROGRESS: at sentence #20000, processed 100629 words, keeping 13891 word types
2018-01-29 10:08:20,740 : INFO : PROGRESS: at sentence #30000, processed 152211 words, keeping 17694 word types
2018-01-29 10:08:20,960 : INFO : PROGRESS: at sentence #40000, processed 203669 words, keeping 21000 word types
2018-01-29 10:08:21,176 : INFO : PROGRESS: at sentence #50000, processed 254809 words, keeping 24067 word types
2018-01-29 10:08:21,377 : INFO : PROGRESS: at sentence #60000, processed 305606 words, keeping 26861 word types
2018-01-29 10:08:21,590 : INFO : PROGRESS: at sentence #70000, processed 356976 words, keeping 29350 word types
2018-01-29 10:08:21,791 : INFO : PROGRESS: at se

2018-01-29 10:08:36,658 : INFO : PROGRESS: at sentence #710000, processed 3651655 words, keeping 127275 word types
2018-01-29 10:08:36,898 : INFO : PROGRESS: at sentence #720000, processed 3703187 words, keeping 128403 word types
2018-01-29 10:08:37,139 : INFO : PROGRESS: at sentence #730000, processed 3754936 words, keeping 129496 word types
2018-01-29 10:08:37,380 : INFO : PROGRESS: at sentence #740000, processed 3806707 words, keeping 130652 word types
2018-01-29 10:08:37,609 : INFO : PROGRESS: at sentence #750000, processed 3858685 words, keeping 131785 word types
2018-01-29 10:08:37,855 : INFO : PROGRESS: at sentence #760000, processed 3910329 words, keeping 132907 word types
2018-01-29 10:08:38,075 : INFO : PROGRESS: at sentence #770000, processed 3961688 words, keeping 134009 word types
2018-01-29 10:08:38,301 : INFO : PROGRESS: at sentence #780000, processed 4013397 words, keeping 135117 word types
2018-01-29 10:08:38,522 : INFO : PROGRESS: at sentence #790000, processed 406501

2018-01-29 10:08:53,010 : INFO : PROGRESS: at sentence #1420000, processed 7323616 words, keeping 199434 word types
2018-01-29 10:08:53,235 : INFO : PROGRESS: at sentence #1430000, processed 7375087 words, keeping 200284 word types
2018-01-29 10:08:53,461 : INFO : PROGRESS: at sentence #1440000, processed 7426626 words, keeping 201180 word types
2018-01-29 10:08:53,689 : INFO : PROGRESS: at sentence #1450000, processed 7477940 words, keeping 202069 word types
2018-01-29 10:08:53,920 : INFO : PROGRESS: at sentence #1460000, processed 7529694 words, keeping 202915 word types
2018-01-29 10:08:54,152 : INFO : PROGRESS: at sentence #1470000, processed 7580341 words, keeping 203819 word types
2018-01-29 10:08:54,379 : INFO : PROGRESS: at sentence #1480000, processed 7631827 words, keeping 204724 word types
2018-01-29 10:08:54,608 : INFO : PROGRESS: at sentence #1490000, processed 7683257 words, keeping 205597 word types
2018-01-29 10:08:54,840 : INFO : PROGRESS: at sentence #1500000, process

2018-01-29 10:09:09,612 : INFO : PROGRESS: at sentence #2130000, processed 11041793 words, keeping 259104 word types
2018-01-29 10:09:09,836 : INFO : PROGRESS: at sentence #2140000, processed 11093945 words, keeping 259859 word types
2018-01-29 10:09:10,071 : INFO : PROGRESS: at sentence #2150000, processed 11147141 words, keeping 260683 word types
2018-01-29 10:09:10,294 : INFO : PROGRESS: at sentence #2160000, processed 11199836 words, keeping 261486 word types
2018-01-29 10:09:10,517 : INFO : PROGRESS: at sentence #2170000, processed 11252826 words, keeping 262288 word types
2018-01-29 10:09:10,746 : INFO : PROGRESS: at sentence #2180000, processed 11305738 words, keeping 263107 word types
2018-01-29 10:09:10,966 : INFO : PROGRESS: at sentence #2190000, processed 11358333 words, keeping 263889 word types
2018-01-29 10:09:11,282 : INFO : PROGRESS: at sentence #2200000, processed 11411069 words, keeping 264679 word types
2018-01-29 10:09:11,517 : INFO : PROGRESS: at sentence #2210000,

2018-01-29 10:09:26,200 : INFO : PROGRESS: at sentence #2840000, processed 14807427 words, keeping 312954 word types
2018-01-29 10:09:26,459 : INFO : PROGRESS: at sentence #2850000, processed 14860160 words, keeping 313734 word types
2018-01-29 10:09:26,707 : INFO : PROGRESS: at sentence #2860000, processed 14913800 words, keeping 314487 word types
2018-01-29 10:09:26,955 : INFO : PROGRESS: at sentence #2870000, processed 14967085 words, keeping 315198 word types
2018-01-29 10:09:27,216 : INFO : PROGRESS: at sentence #2880000, processed 15020133 words, keeping 315929 word types
2018-01-29 10:09:27,481 : INFO : PROGRESS: at sentence #2890000, processed 15073060 words, keeping 316646 word types
2018-01-29 10:09:27,723 : INFO : PROGRESS: at sentence #2900000, processed 15126607 words, keeping 317303 word types
2018-01-29 10:09:27,976 : INFO : PROGRESS: at sentence #2910000, processed 15179340 words, keeping 317988 word types
2018-01-29 10:09:28,216 : INFO : PROGRESS: at sentence #2920000,

2018-01-29 10:09:42,859 : INFO : PROGRESS: at sentence #3550000, processed 18524359 words, keeping 345330 word types
2018-01-29 10:09:43,082 : INFO : PROGRESS: at sentence #3560000, processed 18574965 words, keeping 345330 word types
2018-01-29 10:09:43,309 : INFO : PROGRESS: at sentence #3570000, processed 18626187 words, keeping 345330 word types
2018-01-29 10:09:43,523 : INFO : PROGRESS: at sentence #3580000, processed 18677320 words, keeping 345330 word types
2018-01-29 10:09:43,729 : INFO : PROGRESS: at sentence #3590000, processed 18728572 words, keeping 345330 word types
2018-01-29 10:09:43,952 : INFO : PROGRESS: at sentence #3600000, processed 18779763 words, keeping 345330 word types
2018-01-29 10:09:44,159 : INFO : PROGRESS: at sentence #3610000, processed 18830800 words, keeping 345330 word types
2018-01-29 10:09:44,368 : INFO : PROGRESS: at sentence #3620000, processed 18881717 words, keeping 345330 word types
2018-01-29 10:09:44,588 : INFO : PROGRESS: at sentence #3630000,

2018-01-29 10:09:58,614 : INFO : PROGRESS: at sentence #4260000, processed 22194350 words, keeping 345330 word types
2018-01-29 10:09:58,823 : INFO : PROGRESS: at sentence #4270000, processed 22246422 words, keeping 345330 word types
2018-01-29 10:09:59,044 : INFO : PROGRESS: at sentence #4280000, processed 22297755 words, keeping 345330 word types
2018-01-29 10:09:59,249 : INFO : PROGRESS: at sentence #4290000, processed 22349649 words, keeping 345330 word types
2018-01-29 10:09:59,459 : INFO : PROGRESS: at sentence #4300000, processed 22401107 words, keeping 345330 word types
2018-01-29 10:09:59,679 : INFO : PROGRESS: at sentence #4310000, processed 22452843 words, keeping 345330 word types
2018-01-29 10:09:59,902 : INFO : PROGRESS: at sentence #4320000, processed 22504571 words, keeping 345330 word types
2018-01-29 10:10:00,123 : INFO : PROGRESS: at sentence #4330000, processed 22555803 words, keeping 345330 word types
2018-01-29 10:10:00,386 : INFO : PROGRESS: at sentence #4340000,

2018-01-29 10:10:17,823 : INFO : PROGRESS: at sentence #4970000, processed 25868600 words, keeping 345330 word types
2018-01-29 10:10:18,072 : INFO : PROGRESS: at sentence #4980000, processed 25920625 words, keeping 345330 word types
2018-01-29 10:10:18,308 : INFO : PROGRESS: at sentence #4990000, processed 25972898 words, keeping 345330 word types
2018-01-29 10:10:18,540 : INFO : PROGRESS: at sentence #5000000, processed 26025607 words, keeping 345330 word types
2018-01-29 10:10:18,788 : INFO : PROGRESS: at sentence #5010000, processed 26078252 words, keeping 345330 word types
2018-01-29 10:10:19,023 : INFO : PROGRESS: at sentence #5020000, processed 26130849 words, keeping 345330 word types
2018-01-29 10:10:19,263 : INFO : PROGRESS: at sentence #5030000, processed 26183047 words, keeping 345330 word types
2018-01-29 10:10:19,502 : INFO : PROGRESS: at sentence #5040000, processed 26235654 words, keeping 345330 word types
2018-01-29 10:10:19,739 : INFO : PROGRESS: at sentence #5050000,

2018-01-29 10:10:35,321 : INFO : PROGRESS: at sentence #5680000, processed 29614893 words, keeping 345330 word types
2018-01-29 10:10:35,545 : INFO : PROGRESS: at sentence #5690000, processed 29667886 words, keeping 345330 word types
2018-01-29 10:10:35,767 : INFO : PROGRESS: at sentence #5700000, processed 29721366 words, keeping 345330 word types
2018-01-29 10:10:35,988 : INFO : PROGRESS: at sentence #5710000, processed 29774623 words, keeping 345330 word types
2018-01-29 10:10:36,207 : INFO : PROGRESS: at sentence #5720000, processed 29827509 words, keeping 345330 word types
2018-01-29 10:10:36,429 : INFO : PROGRESS: at sentence #5730000, processed 29880292 words, keeping 345330 word types
2018-01-29 10:10:36,680 : INFO : PROGRESS: at sentence #5740000, processed 29933405 words, keeping 345330 word types
2018-01-29 10:10:36,960 : INFO : PROGRESS: at sentence #5750000, processed 29985560 words, keeping 345330 word types
2018-01-29 10:10:37,174 : INFO : PROGRESS: at sentence #5760000,

2018-01-29 10:10:51,363 : INFO : PROGRESS: at sentence #6390000, processed 33383748 words, keeping 345330 word types
2018-01-29 10:10:51,578 : INFO : PROGRESS: at sentence #6400000, processed 33436417 words, keeping 345330 word types
2018-01-29 10:10:51,801 : INFO : PROGRESS: at sentence #6410000, processed 33489442 words, keeping 345330 word types
2018-01-29 10:10:52,038 : INFO : PROGRESS: at sentence #6420000, processed 33542577 words, keeping 345330 word types
2018-01-29 10:10:52,269 : INFO : PROGRESS: at sentence #6430000, processed 33595336 words, keeping 345330 word types
2018-01-29 10:10:52,497 : INFO : PROGRESS: at sentence #6440000, processed 33648651 words, keeping 345330 word types
2018-01-29 10:10:52,713 : INFO : PROGRESS: at sentence #6450000, processed 33702069 words, keeping 345330 word types
2018-01-29 10:10:52,942 : INFO : PROGRESS: at sentence #6460000, processed 33755020 words, keeping 345330 word types
2018-01-29 10:10:53,177 : INFO : PROGRESS: at sentence #6470000,

2018-01-29 10:11:07,628 : INFO : PROGRESS: at sentence #7100000, processed 37157497 words, keeping 377924 word types
2018-01-29 10:11:07,846 : INFO : PROGRESS: at sentence #7110000, processed 37211308 words, keeping 378619 word types
2018-01-29 10:11:08,061 : INFO : PROGRESS: at sentence #7120000, processed 37264793 words, keeping 379254 word types
2018-01-29 10:11:08,276 : INFO : PROGRESS: at sentence #7130000, processed 37317794 words, keeping 379869 word types
2018-01-29 10:11:08,496 : INFO : PROGRESS: at sentence #7140000, processed 37370961 words, keeping 380513 word types
2018-01-29 10:11:08,712 : INFO : PROGRESS: at sentence #7150000, processed 37424866 words, keeping 381190 word types
2018-01-29 10:11:08,926 : INFO : PROGRESS: at sentence #7160000, processed 37477636 words, keeping 381757 word types
2018-01-29 10:11:09,144 : INFO : PROGRESS: at sentence #7170000, processed 37530906 words, keeping 382409 word types
2018-01-29 10:11:09,368 : INFO : PROGRESS: at sentence #7180000,

2018-01-29 10:11:23,228 : INFO : PROGRESS: at sentence #7810000, processed 40949332 words, keeping 421356 word types
2018-01-29 10:11:23,447 : INFO : PROGRESS: at sentence #7820000, processed 41002653 words, keeping 421949 word types
2018-01-29 10:11:23,667 : INFO : PROGRESS: at sentence #7830000, processed 41055863 words, keeping 422590 word types
2018-01-29 10:11:23,880 : INFO : PROGRESS: at sentence #7840000, processed 41108865 words, keeping 423157 word types
2018-01-29 10:11:24,102 : INFO : PROGRESS: at sentence #7850000, processed 41162708 words, keeping 423766 word types
2018-01-29 10:11:24,321 : INFO : PROGRESS: at sentence #7860000, processed 41217042 words, keeping 424339 word types
2018-01-29 10:11:24,545 : INFO : PROGRESS: at sentence #7870000, processed 41271550 words, keeping 424951 word types
2018-01-29 10:11:24,762 : INFO : PROGRESS: at sentence #7880000, processed 41325518 words, keeping 425582 word types
2018-01-29 10:11:24,985 : INFO : PROGRESS: at sentence #7890000,

2018-01-29 10:11:41,609 : INFO : PROGRESS: at sentence #8520000, processed 44776641 words, keeping 462123 word types
2018-01-29 10:11:41,897 : INFO : PROGRESS: at sentence #8530000, processed 44830818 words, keeping 462681 word types
2018-01-29 10:11:42,142 : INFO : PROGRESS: at sentence #8540000, processed 44884164 words, keeping 463267 word types
2018-01-29 10:11:42,387 : INFO : PROGRESS: at sentence #8550000, processed 44937869 words, keeping 463797 word types
2018-01-29 10:11:42,646 : INFO : PROGRESS: at sentence #8560000, processed 44992200 words, keeping 464419 word types
2018-01-29 10:11:43,023 : INFO : PROGRESS: at sentence #8570000, processed 45046506 words, keeping 464984 word types
2018-01-29 10:11:43,336 : INFO : PROGRESS: at sentence #8580000, processed 45100142 words, keeping 465540 word types
2018-01-29 10:11:43,616 : INFO : PROGRESS: at sentence #8590000, processed 45154370 words, keeping 466136 word types
2018-01-29 10:11:43,913 : INFO : PROGRESS: at sentence #8600000,

2018-01-29 10:12:01,787 : INFO : PROGRESS: at sentence #9230000, processed 48608237 words, keeping 501486 word types
2018-01-29 10:12:02,023 : INFO : PROGRESS: at sentence #9240000, processed 48661907 words, keeping 501985 word types
2018-01-29 10:12:02,259 : INFO : PROGRESS: at sentence #9250000, processed 48716130 words, keeping 502539 word types
2018-01-29 10:12:02,494 : INFO : PROGRESS: at sentence #9260000, processed 48770510 words, keeping 503039 word types
2018-01-29 10:12:02,729 : INFO : PROGRESS: at sentence #9270000, processed 48825108 words, keeping 503599 word types
2018-01-29 10:12:02,958 : INFO : PROGRESS: at sentence #9280000, processed 48878945 words, keeping 504137 word types
2018-01-29 10:12:03,193 : INFO : PROGRESS: at sentence #9290000, processed 48932951 words, keeping 504681 word types
2018-01-29 10:12:03,429 : INFO : PROGRESS: at sentence #9300000, processed 48986950 words, keeping 505194 word types
2018-01-29 10:12:03,658 : INFO : PROGRESS: at sentence #9310000,

2018-01-29 10:12:18,636 : INFO : PROGRESS: at sentence #9940000, processed 52449786 words, keeping 538071 word types
2018-01-29 10:12:18,861 : INFO : PROGRESS: at sentence #9950000, processed 52504637 words, keeping 538586 word types
2018-01-29 10:12:19,080 : INFO : PROGRESS: at sentence #9960000, processed 52558007 words, keeping 539019 word types
2018-01-29 10:12:19,296 : INFO : PROGRESS: at sentence #9970000, processed 52611926 words, keeping 539546 word types
2018-01-29 10:12:19,518 : INFO : PROGRESS: at sentence #9980000, processed 52666034 words, keeping 540046 word types
2018-01-29 10:12:19,758 : INFO : PROGRESS: at sentence #9990000, processed 52720315 words, keeping 540564 word types
2018-01-29 10:12:20,015 : INFO : PROGRESS: at sentence #10000000, processed 52774393 words, keeping 541051 word types
2018-01-29 10:12:20,253 : INFO : PROGRESS: at sentence #10010000, processed 52827844 words, keeping 541528 word types
2018-01-29 10:12:20,478 : INFO : PROGRESS: at sentence #100200

2018-01-29 10:12:34,543 : INFO : PROGRESS: at sentence #10640000, processed 56181480 words, keeping 562724 word types
2018-01-29 10:12:34,757 : INFO : PROGRESS: at sentence #10650000, processed 56232649 words, keeping 562724 word types
2018-01-29 10:12:34,993 : INFO : PROGRESS: at sentence #10660000, processed 56283783 words, keeping 562724 word types
2018-01-29 10:12:35,211 : INFO : PROGRESS: at sentence #10670000, processed 56334886 words, keeping 562724 word types
2018-01-29 10:12:35,430 : INFO : PROGRESS: at sentence #10680000, processed 56386351 words, keeping 562724 word types
2018-01-29 10:12:35,642 : INFO : PROGRESS: at sentence #10690000, processed 56436792 words, keeping 562724 word types
2018-01-29 10:12:35,853 : INFO : PROGRESS: at sentence #10700000, processed 56487956 words, keeping 562724 word types
2018-01-29 10:12:36,073 : INFO : PROGRESS: at sentence #10710000, processed 56539127 words, keeping 562724 word types
2018-01-29 10:12:36,302 : INFO : PROGRESS: at sentence #

2018-01-29 10:12:50,214 : INFO : PROGRESS: at sentence #11340000, processed 59796384 words, keeping 562724 word types
2018-01-29 10:12:50,432 : INFO : PROGRESS: at sentence #11350000, processed 59848119 words, keeping 562724 word types
2018-01-29 10:12:50,651 : INFO : PROGRESS: at sentence #11360000, processed 59899940 words, keeping 562724 word types
2018-01-29 10:12:50,868 : INFO : PROGRESS: at sentence #11370000, processed 59951855 words, keeping 562724 word types
2018-01-29 10:12:51,083 : INFO : PROGRESS: at sentence #11380000, processed 60003605 words, keeping 562724 word types
2018-01-29 10:12:51,301 : INFO : PROGRESS: at sentence #11390000, processed 60056039 words, keeping 562724 word types
2018-01-29 10:12:51,521 : INFO : PROGRESS: at sentence #11400000, processed 60107999 words, keeping 562724 word types
2018-01-29 10:12:51,739 : INFO : PROGRESS: at sentence #11410000, processed 60159540 words, keeping 562724 word types
2018-01-29 10:12:51,977 : INFO : PROGRESS: at sentence #

2018-01-29 10:13:05,738 : INFO : PROGRESS: at sentence #12040000, processed 63417150 words, keeping 562724 word types
2018-01-29 10:13:05,954 : INFO : PROGRESS: at sentence #12050000, processed 63469222 words, keeping 562724 word types
2018-01-29 10:13:06,183 : INFO : PROGRESS: at sentence #12060000, processed 63521538 words, keeping 562724 word types
2018-01-29 10:13:06,397 : INFO : PROGRESS: at sentence #12070000, processed 63573541 words, keeping 562724 word types
2018-01-29 10:13:06,619 : INFO : PROGRESS: at sentence #12080000, processed 63625801 words, keeping 562724 word types
2018-01-29 10:13:06,832 : INFO : PROGRESS: at sentence #12090000, processed 63677782 words, keeping 562724 word types
2018-01-29 10:13:07,081 : INFO : PROGRESS: at sentence #12100000, processed 63730131 words, keeping 562724 word types
2018-01-29 10:13:07,310 : INFO : PROGRESS: at sentence #12110000, processed 63782337 words, keeping 562724 word types
2018-01-29 10:13:07,524 : INFO : PROGRESS: at sentence #

2018-01-29 10:13:21,953 : INFO : PROGRESS: at sentence #12740000, processed 67103481 words, keeping 562724 word types
2018-01-29 10:13:22,205 : INFO : PROGRESS: at sentence #12750000, processed 67156381 words, keeping 562724 word types
2018-01-29 10:13:22,450 : INFO : PROGRESS: at sentence #12760000, processed 67210126 words, keeping 562724 word types
2018-01-29 10:13:22,690 : INFO : PROGRESS: at sentence #12770000, processed 67262618 words, keeping 562724 word types
2018-01-29 10:13:22,946 : INFO : PROGRESS: at sentence #12780000, processed 67315932 words, keeping 562724 word types
2018-01-29 10:13:23,191 : INFO : PROGRESS: at sentence #12790000, processed 67369379 words, keeping 562724 word types
2018-01-29 10:13:23,491 : INFO : PROGRESS: at sentence #12800000, processed 67422419 words, keeping 562724 word types
2018-01-29 10:13:23,740 : INFO : PROGRESS: at sentence #12810000, processed 67476060 words, keeping 562724 word types
2018-01-29 10:13:23,965 : INFO : PROGRESS: at sentence #

2018-01-29 10:13:38,110 : INFO : PROGRESS: at sentence #13440000, processed 70819457 words, keeping 562724 word types
2018-01-29 10:13:38,334 : INFO : PROGRESS: at sentence #13450000, processed 70872900 words, keeping 562724 word types
2018-01-29 10:13:38,555 : INFO : PROGRESS: at sentence #13460000, processed 70925607 words, keeping 562724 word types
2018-01-29 10:13:38,779 : INFO : PROGRESS: at sentence #13470000, processed 70978512 words, keeping 562724 word types
2018-01-29 10:13:39,002 : INFO : PROGRESS: at sentence #13480000, processed 71031922 words, keeping 562724 word types
2018-01-29 10:13:39,230 : INFO : PROGRESS: at sentence #13490000, processed 71085519 words, keeping 562724 word types
2018-01-29 10:13:39,451 : INFO : PROGRESS: at sentence #13500000, processed 71138346 words, keeping 562724 word types
2018-01-29 10:13:39,679 : INFO : PROGRESS: at sentence #13510000, processed 71191993 words, keeping 562724 word types
2018-01-29 10:13:39,920 : INFO : PROGRESS: at sentence #

2018-01-29 10:13:53,834 : INFO : PROGRESS: at sentence #14140000, processed 74541850 words, keeping 562724 word types
2018-01-29 10:13:54,060 : INFO : PROGRESS: at sentence #14150000, processed 74594811 words, keeping 562724 word types
2018-01-29 10:13:54,281 : INFO : PROGRESS: at sentence #14160000, processed 74647673 words, keeping 562724 word types
2018-01-29 10:13:54,503 : INFO : PROGRESS: at sentence #14170000, processed 74700874 words, keeping 562724 word types
2018-01-29 10:13:54,735 : INFO : PROGRESS: at sentence #14180000, processed 74754320 words, keeping 562724 word types
2018-01-29 10:13:54,961 : INFO : PROGRESS: at sentence #14190000, processed 74806773 words, keeping 562724 word types
2018-01-29 10:13:55,182 : INFO : PROGRESS: at sentence #14200000, processed 74859765 words, keeping 562724 word types
2018-01-29 10:13:55,405 : INFO : PROGRESS: at sentence #14210000, processed 74913003 words, keeping 562724 word types
2018-01-29 10:13:55,628 : INFO : PROGRESS: at sentence #

2018-01-29 10:14:09,935 : INFO : PROGRESS: at sentence #14840000, processed 78276956 words, keeping 562724 word types
2018-01-29 10:14:10,166 : INFO : PROGRESS: at sentence #14850000, processed 78330212 words, keeping 562724 word types
2018-01-29 10:14:10,391 : INFO : PROGRESS: at sentence #14860000, processed 78383581 words, keeping 562724 word types
2018-01-29 10:14:10,611 : INFO : PROGRESS: at sentence #14870000, processed 78437071 words, keeping 562724 word types
2018-01-29 10:14:10,834 : INFO : PROGRESS: at sentence #14880000, processed 78490313 words, keeping 562724 word types
2018-01-29 10:14:11,052 : INFO : PROGRESS: at sentence #14890000, processed 78543352 words, keeping 562724 word types
2018-01-29 10:14:11,283 : INFO : PROGRESS: at sentence #14900000, processed 78597326 words, keeping 562724 word types
2018-01-29 10:14:11,509 : INFO : PROGRESS: at sentence #14910000, processed 78651241 words, keeping 562724 word types
2018-01-29 10:14:11,732 : INFO : PROGRESS: at sentence #

2018-01-29 10:14:25,741 : INFO : PROGRESS: at sentence #15540000, processed 82043522 words, keeping 562724 word types
2018-01-29 10:14:25,967 : INFO : PROGRESS: at sentence #15550000, processed 82097416 words, keeping 562724 word types
2018-01-29 10:14:26,196 : INFO : PROGRESS: at sentence #15560000, processed 82151441 words, keeping 562724 word types
2018-01-29 10:14:26,428 : INFO : PROGRESS: at sentence #15570000, processed 82205670 words, keeping 562724 word types
2018-01-29 10:14:26,654 : INFO : PROGRESS: at sentence #15580000, processed 82260265 words, keeping 562724 word types
2018-01-29 10:14:26,895 : INFO : PROGRESS: at sentence #15590000, processed 82314283 words, keeping 562724 word types
2018-01-29 10:14:27,135 : INFO : PROGRESS: at sentence #15600000, processed 82368027 words, keeping 562724 word types
2018-01-29 10:14:27,368 : INFO : PROGRESS: at sentence #15610000, processed 82422085 words, keeping 562724 word types
2018-01-29 10:14:27,593 : INFO : PROGRESS: at sentence #

2018-01-29 10:14:42,606 : INFO : PROGRESS: at sentence #16240000, processed 85822404 words, keeping 562724 word types
2018-01-29 10:14:42,844 : INFO : PROGRESS: at sentence #16250000, processed 85875877 words, keeping 562724 word types
2018-01-29 10:14:43,081 : INFO : PROGRESS: at sentence #16260000, processed 85929916 words, keeping 562724 word types
2018-01-29 10:14:43,310 : INFO : PROGRESS: at sentence #16270000, processed 85984227 words, keeping 562724 word types
2018-01-29 10:14:43,542 : INFO : PROGRESS: at sentence #16280000, processed 86038182 words, keeping 562724 word types
2018-01-29 10:14:43,767 : INFO : PROGRESS: at sentence #16290000, processed 86091674 words, keeping 562724 word types
2018-01-29 10:14:44,006 : INFO : PROGRESS: at sentence #16300000, processed 86145139 words, keeping 562724 word types
2018-01-29 10:14:44,252 : INFO : PROGRESS: at sentence #16310000, processed 86199151 words, keeping 562724 word types
2018-01-29 10:14:44,490 : INFO : PROGRESS: at sentence #

2018-01-29 10:14:59,255 : INFO : PROGRESS: at sentence #16940000, processed 89609126 words, keeping 562724 word types
2018-01-29 10:14:59,514 : INFO : PROGRESS: at sentence #16950000, processed 89662806 words, keeping 562724 word types
2018-01-29 10:14:59,778 : INFO : PROGRESS: at sentence #16960000, processed 89716895 words, keeping 562724 word types
2018-01-29 10:15:00,054 : INFO : PROGRESS: at sentence #16970000, processed 89771225 words, keeping 562724 word types
2018-01-29 10:15:00,321 : INFO : PROGRESS: at sentence #16980000, processed 89825612 words, keeping 562724 word types
2018-01-29 10:15:00,571 : INFO : PROGRESS: at sentence #16990000, processed 89879025 words, keeping 562724 word types
2018-01-29 10:15:00,821 : INFO : PROGRESS: at sentence #17000000, processed 89932418 words, keeping 562724 word types
2018-01-29 10:15:01,069 : INFO : PROGRESS: at sentence #17010000, processed 89986680 words, keeping 562724 word types
2018-01-29 10:15:01,321 : INFO : PROGRESS: at sentence #

2018-01-29 10:15:16,444 : INFO : PROGRESS: at sentence #17640000, processed 93381007 words, keeping 562724 word types
2018-01-29 10:15:16,667 : INFO : PROGRESS: at sentence #17650000, processed 93431882 words, keeping 562724 word types
2018-01-29 10:15:16,889 : INFO : PROGRESS: at sentence #17660000, processed 93482188 words, keeping 562724 word types
2018-01-29 10:15:17,122 : INFO : PROGRESS: at sentence #17670000, processed 93533063 words, keeping 562724 word types
2018-01-29 10:15:17,360 : INFO : PROGRESS: at sentence #17680000, processed 93583580 words, keeping 562724 word types
2018-01-29 10:15:17,600 : INFO : PROGRESS: at sentence #17690000, processed 93634259 words, keeping 562724 word types
2018-01-29 10:15:17,832 : INFO : PROGRESS: at sentence #17700000, processed 93685072 words, keeping 562724 word types
2018-01-29 10:15:18,076 : INFO : PROGRESS: at sentence #17710000, processed 93736470 words, keeping 562724 word types
2018-01-29 10:15:18,326 : INFO : PROGRESS: at sentence #

2018-01-29 10:15:32,362 : INFO : PROGRESS: at sentence #18340000, processed 96985848 words, keeping 562724 word types
2018-01-29 10:15:32,577 : INFO : PROGRESS: at sentence #18350000, processed 97037636 words, keeping 562724 word types
2018-01-29 10:15:32,824 : INFO : PROGRESS: at sentence #18360000, processed 97089428 words, keeping 562724 word types
2018-01-29 10:15:33,036 : INFO : PROGRESS: at sentence #18370000, processed 97140723 words, keeping 562724 word types
2018-01-29 10:15:33,264 : INFO : PROGRESS: at sentence #18380000, processed 97192586 words, keeping 562724 word types
2018-01-29 10:15:33,476 : INFO : PROGRESS: at sentence #18390000, processed 97244301 words, keeping 562724 word types
2018-01-29 10:15:33,709 : INFO : PROGRESS: at sentence #18400000, processed 97295857 words, keeping 562724 word types
2018-01-29 10:15:33,923 : INFO : PROGRESS: at sentence #18410000, processed 97346806 words, keeping 562724 word types
2018-01-29 10:15:34,149 : INFO : PROGRESS: at sentence #

2018-01-29 10:15:48,375 : INFO : PROGRESS: at sentence #19040000, processed 100604485 words, keeping 562724 word types
2018-01-29 10:15:48,609 : INFO : PROGRESS: at sentence #19050000, processed 100656025 words, keeping 562724 word types
2018-01-29 10:15:48,850 : INFO : PROGRESS: at sentence #19060000, processed 100707353 words, keeping 562724 word types
2018-01-29 10:15:49,068 : INFO : PROGRESS: at sentence #19070000, processed 100758878 words, keeping 562724 word types
2018-01-29 10:15:49,288 : INFO : PROGRESS: at sentence #19080000, processed 100810230 words, keeping 562724 word types
2018-01-29 10:15:49,520 : INFO : PROGRESS: at sentence #19090000, processed 100861832 words, keeping 562724 word types
2018-01-29 10:15:49,746 : INFO : PROGRESS: at sentence #19100000, processed 100914249 words, keeping 562724 word types
2018-01-29 10:15:49,981 : INFO : PROGRESS: at sentence #19110000, processed 100965725 words, keeping 562724 word types
2018-01-29 10:15:50,200 : INFO : PROGRESS: at se

2018-01-29 10:16:03,941 : INFO : PROGRESS: at sentence #19730000, processed 104224465 words, keeping 562724 word types
2018-01-29 10:16:04,166 : INFO : PROGRESS: at sentence #19740000, processed 104277646 words, keeping 562724 word types
2018-01-29 10:16:04,383 : INFO : PROGRESS: at sentence #19750000, processed 104330419 words, keeping 562724 word types
2018-01-29 10:16:04,602 : INFO : PROGRESS: at sentence #19760000, processed 104383098 words, keeping 562724 word types
2018-01-29 10:16:04,827 : INFO : PROGRESS: at sentence #19770000, processed 104435858 words, keeping 562724 word types
2018-01-29 10:16:05,066 : INFO : PROGRESS: at sentence #19780000, processed 104488346 words, keeping 562724 word types
2018-01-29 10:16:05,375 : INFO : PROGRESS: at sentence #19790000, processed 104541566 words, keeping 562724 word types
2018-01-29 10:16:05,599 : INFO : PROGRESS: at sentence #19800000, processed 104594524 words, keeping 562724 word types
2018-01-29 10:16:05,813 : INFO : PROGRESS: at se

2018-01-29 10:16:19,561 : INFO : PROGRESS: at sentence #20420000, processed 107885154 words, keeping 562724 word types
2018-01-29 10:16:19,786 : INFO : PROGRESS: at sentence #20430000, processed 107938656 words, keeping 562724 word types
2018-01-29 10:16:20,017 : INFO : PROGRESS: at sentence #20440000, processed 107991731 words, keeping 562724 word types
2018-01-29 10:16:20,240 : INFO : PROGRESS: at sentence #20450000, processed 108044932 words, keeping 562724 word types
2018-01-29 10:16:20,469 : INFO : PROGRESS: at sentence #20460000, processed 108098185 words, keeping 562724 word types
2018-01-29 10:16:20,689 : INFO : PROGRESS: at sentence #20470000, processed 108151158 words, keeping 562724 word types
2018-01-29 10:16:20,910 : INFO : PROGRESS: at sentence #20480000, processed 108204050 words, keeping 562724 word types
2018-01-29 10:16:21,135 : INFO : PROGRESS: at sentence #20490000, processed 108257100 words, keeping 562724 word types
2018-01-29 10:16:21,354 : INFO : PROGRESS: at se

2018-01-29 10:16:35,058 : INFO : PROGRESS: at sentence #21110000, processed 111551209 words, keeping 562724 word types
2018-01-29 10:16:35,304 : INFO : PROGRESS: at sentence #21120000, processed 111604047 words, keeping 562724 word types
2018-01-29 10:16:35,529 : INFO : PROGRESS: at sentence #21130000, processed 111657382 words, keeping 562724 word types
2018-01-29 10:16:35,749 : INFO : PROGRESS: at sentence #21140000, processed 111710896 words, keeping 562724 word types
2018-01-29 10:16:35,975 : INFO : PROGRESS: at sentence #21150000, processed 111763985 words, keeping 562724 word types
2018-01-29 10:16:36,201 : INFO : PROGRESS: at sentence #21160000, processed 111817463 words, keeping 562724 word types
2018-01-29 10:16:36,420 : INFO : PROGRESS: at sentence #21170000, processed 111871049 words, keeping 562724 word types
2018-01-29 10:16:36,641 : INFO : PROGRESS: at sentence #21180000, processed 111923937 words, keeping 562724 word types
2018-01-29 10:16:36,882 : INFO : PROGRESS: at se

2018-01-29 10:16:50,846 : INFO : PROGRESS: at sentence #21800000, processed 115230410 words, keeping 562724 word types
2018-01-29 10:16:51,074 : INFO : PROGRESS: at sentence #21810000, processed 115283563 words, keeping 562724 word types
2018-01-29 10:16:51,298 : INFO : PROGRESS: at sentence #21820000, processed 115336658 words, keeping 562724 word types
2018-01-29 10:16:51,519 : INFO : PROGRESS: at sentence #21830000, processed 115390089 words, keeping 562724 word types
2018-01-29 10:16:51,742 : INFO : PROGRESS: at sentence #21840000, processed 115443466 words, keeping 562724 word types
2018-01-29 10:16:51,993 : INFO : PROGRESS: at sentence #21850000, processed 115496027 words, keeping 562724 word types
2018-01-29 10:16:52,222 : INFO : PROGRESS: at sentence #21860000, processed 115549392 words, keeping 562724 word types
2018-01-29 10:16:52,446 : INFO : PROGRESS: at sentence #21870000, processed 115602947 words, keeping 562724 word types
2018-01-29 10:16:52,677 : INFO : PROGRESS: at se

2018-01-29 10:17:06,554 : INFO : PROGRESS: at sentence #22490000, processed 118933375 words, keeping 562724 word types
2018-01-29 10:17:06,783 : INFO : PROGRESS: at sentence #22500000, processed 118987572 words, keeping 562724 word types
2018-01-29 10:17:07,029 : INFO : PROGRESS: at sentence #22510000, processed 119041956 words, keeping 562724 word types
2018-01-29 10:17:07,255 : INFO : PROGRESS: at sentence #22520000, processed 119095991 words, keeping 562724 word types
2018-01-29 10:17:07,475 : INFO : PROGRESS: at sentence #22530000, processed 119149369 words, keeping 562724 word types
2018-01-29 10:17:07,696 : INFO : PROGRESS: at sentence #22540000, processed 119203015 words, keeping 562724 word types
2018-01-29 10:17:07,938 : INFO : PROGRESS: at sentence #22550000, processed 119257574 words, keeping 562724 word types
2018-01-29 10:17:08,165 : INFO : PROGRESS: at sentence #22560000, processed 119311345 words, keeping 562724 word types
2018-01-29 10:17:08,388 : INFO : PROGRESS: at se

2018-01-29 10:17:23,339 : INFO : PROGRESS: at sentence #23180000, processed 122658719 words, keeping 562724 word types
2018-01-29 10:17:23,573 : INFO : PROGRESS: at sentence #23190000, processed 122713105 words, keeping 562724 word types
2018-01-29 10:17:23,818 : INFO : PROGRESS: at sentence #23200000, processed 122767407 words, keeping 562724 word types
2018-01-29 10:17:24,070 : INFO : PROGRESS: at sentence #23210000, processed 122820673 words, keeping 562724 word types
2018-01-29 10:17:24,339 : INFO : PROGRESS: at sentence #23220000, processed 122874422 words, keeping 562724 word types
2018-01-29 10:17:24,596 : INFO : PROGRESS: at sentence #23230000, processed 122928269 words, keeping 562724 word types
2018-01-29 10:17:24,853 : INFO : PROGRESS: at sentence #23240000, processed 122982544 words, keeping 562724 word types
2018-01-29 10:17:25,099 : INFO : PROGRESS: at sentence #23250000, processed 123035398 words, keeping 562724 word types
2018-01-29 10:17:25,369 : INFO : PROGRESS: at se

2018-01-29 10:17:39,770 : INFO : PROGRESS: at sentence #23870000, processed 126389488 words, keeping 562724 word types
2018-01-29 10:17:39,997 : INFO : PROGRESS: at sentence #23880000, processed 126442882 words, keeping 562724 word types
2018-01-29 10:17:40,216 : INFO : PROGRESS: at sentence #23890000, processed 126496610 words, keeping 562724 word types
2018-01-29 10:17:40,448 : INFO : PROGRESS: at sentence #23900000, processed 126550858 words, keeping 562724 word types
2018-01-29 10:17:40,679 : INFO : PROGRESS: at sentence #23910000, processed 126605261 words, keeping 562724 word types
2018-01-29 10:17:40,918 : INFO : PROGRESS: at sentence #23920000, processed 126659444 words, keeping 562724 word types
2018-01-29 10:17:41,151 : INFO : PROGRESS: at sentence #23930000, processed 126712725 words, keeping 562724 word types
2018-01-29 10:17:41,376 : INFO : PROGRESS: at sentence #23940000, processed 126766882 words, keeping 562724 word types
2018-01-29 10:17:41,603 : INFO : PROGRESS: at se

2018-01-29 10:17:55,481 : INFO : PROGRESS: at sentence #24560000, processed 130122966 words, keeping 562724 word types
2018-01-29 10:17:55,708 : INFO : PROGRESS: at sentence #24570000, processed 130177208 words, keeping 562724 word types
2018-01-29 10:17:55,940 : INFO : PROGRESS: at sentence #24580000, processed 130231600 words, keeping 562724 word types
2018-01-29 10:17:56,168 : INFO : PROGRESS: at sentence #24590000, processed 130286327 words, keeping 562724 word types
2018-01-29 10:17:56,389 : INFO : PROGRESS: at sentence #24600000, processed 130341038 words, keeping 562724 word types
2018-01-29 10:17:56,620 : INFO : PROGRESS: at sentence #24610000, processed 130395004 words, keeping 562724 word types
2018-01-29 10:17:56,836 : INFO : PROGRESS: at sentence #24620000, processed 130449675 words, keeping 562724 word types
2018-01-29 10:17:57,096 : INFO : PROGRESS: at sentence #24630000, processed 130504741 words, keeping 562724 word types
2018-01-29 10:17:57,317 : INFO : PROGRESS: at se

2018-01-29 10:18:11,813 : INFO : PROGRESS: at sentence #25250000, processed 133917147 words, keeping 589783 word types
2018-01-29 10:18:12,043 : INFO : PROGRESS: at sentence #25260000, processed 133972262 words, keeping 590263 word types
2018-01-29 10:18:12,275 : INFO : PROGRESS: at sentence #25270000, processed 134026675 words, keeping 590769 word types
2018-01-29 10:18:12,499 : INFO : PROGRESS: at sentence #25280000, processed 134081577 words, keeping 591297 word types
2018-01-29 10:18:12,739 : INFO : PROGRESS: at sentence #25290000, processed 134137046 words, keeping 591784 word types
2018-01-29 10:18:12,989 : INFO : PROGRESS: at sentence #25300000, processed 134192344 words, keeping 592290 word types
2018-01-29 10:18:13,216 : INFO : PROGRESS: at sentence #25310000, processed 134246852 words, keeping 592806 word types
2018-01-29 10:18:13,446 : INFO : PROGRESS: at sentence #25320000, processed 134301758 words, keeping 593286 word types
2018-01-29 10:18:13,678 : INFO : PROGRESS: at se

2018-01-29 10:18:27,976 : INFO : PROGRESS: at sentence #25940000, processed 137716013 words, keeping 623021 word types
2018-01-29 10:18:28,212 : INFO : PROGRESS: at sentence #25950000, processed 137771850 words, keeping 623458 word types
2018-01-29 10:18:28,438 : INFO : PROGRESS: at sentence #25960000, processed 137827245 words, keeping 623913 word types
2018-01-29 10:18:28,675 : INFO : PROGRESS: at sentence #25970000, processed 137882191 words, keeping 624347 word types
2018-01-29 10:18:28,898 : INFO : PROGRESS: at sentence #25980000, processed 137937389 words, keeping 624793 word types
2018-01-29 10:18:29,142 : INFO : PROGRESS: at sentence #25990000, processed 137993078 words, keeping 625290 word types
2018-01-29 10:18:29,366 : INFO : PROGRESS: at sentence #26000000, processed 138048826 words, keeping 625765 word types
2018-01-29 10:18:29,603 : INFO : PROGRESS: at sentence #26010000, processed 138103145 words, keeping 626228 word types
2018-01-29 10:18:29,830 : INFO : PROGRESS: at se

2018-01-29 10:18:44,113 : INFO : PROGRESS: at sentence #26630000, processed 141536001 words, keeping 654671 word types
2018-01-29 10:18:44,354 : INFO : PROGRESS: at sentence #26640000, processed 141590830 words, keeping 655143 word types
2018-01-29 10:18:44,589 : INFO : PROGRESS: at sentence #26650000, processed 141646454 words, keeping 655635 word types
2018-01-29 10:18:44,833 : INFO : PROGRESS: at sentence #26660000, processed 141701892 words, keeping 656077 word types
2018-01-29 10:18:45,057 : INFO : PROGRESS: at sentence #26670000, processed 141756908 words, keeping 656510 word types
2018-01-29 10:18:45,297 : INFO : PROGRESS: at sentence #26680000, processed 141812307 words, keeping 656960 word types
2018-01-29 10:18:45,522 : INFO : PROGRESS: at sentence #26690000, processed 141867950 words, keeping 657386 word types
2018-01-29 10:18:45,764 : INFO : PROGRESS: at sentence #26700000, processed 141923937 words, keeping 657853 word types
2018-01-29 10:18:45,997 : INFO : PROGRESS: at se

2018-01-29 10:20:05,409 : INFO : PROGRESS: at 1.59% examples, 172516 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:06,446 : INFO : PROGRESS: at 1.62% examples, 172357 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:07,470 : INFO : PROGRESS: at 1.64% examples, 172087 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:08,547 : INFO : PROGRESS: at 1.66% examples, 171834 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:09,588 : INFO : PROGRESS: at 1.69% examples, 171681 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:10,792 : INFO : PROGRESS: at 1.72% examples, 171252 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:11,838 : INFO : PROGRESS: at 1.75% examples, 171387 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:12,901 : INFO : PROGRESS: at 1.77% examples, 171335 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:20:13,936 : INFO : PROGRESS: at 1.80% examples, 171494 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:20:14,940 : INFO : PROGRESS: at 1.83% examples, 171587 words/s, in_q

2018-01-29 10:21:29,760 : INFO : PROGRESS: at 3.82% examples, 173107 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:30,779 : INFO : PROGRESS: at 3.84% examples, 173121 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:31,785 : INFO : PROGRESS: at 3.87% examples, 173216 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:32,792 : INFO : PROGRESS: at 3.90% examples, 173245 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:33,822 : INFO : PROGRESS: at 3.93% examples, 173310 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:35,079 : INFO : PROGRESS: at 3.96% examples, 173111 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:36,129 : INFO : PROGRESS: at 3.98% examples, 173028 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:37,153 : INFO : PROGRESS: at 4.01% examples, 173161 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:38,187 : INFO : PROGRESS: at 4.04% examples, 173219 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:21:39,262 : INFO : PROGRESS: at 4.07% examples, 173292 words/s, in_q

2018-01-29 10:22:53,918 : INFO : PROGRESS: at 6.02% examples, 173867 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:22:55,163 : INFO : PROGRESS: at 6.05% examples, 173783 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:22:56,174 : INFO : PROGRESS: at 6.08% examples, 173794 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:22:57,197 : INFO : PROGRESS: at 6.10% examples, 173755 words/s, in_qsize 0, out_qsize 1
2018-01-29 10:22:58,205 : INFO : PROGRESS: at 6.13% examples, 173850 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:22:59,214 : INFO : PROGRESS: at 6.16% examples, 173902 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:23:00,224 : INFO : PROGRESS: at 6.19% examples, 173952 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:23:01,239 : INFO : PROGRESS: at 6.21% examples, 173800 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:23:02,261 : INFO : PROGRESS: at 6.24% examples, 173802 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:23:03,265 : INFO : PROGRESS: at 6.26% examples, 173858 words/s, in_q

2018-01-29 10:24:18,347 : INFO : PROGRESS: at 8.21% examples, 173981 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:19,362 : INFO : PROGRESS: at 8.24% examples, 174019 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:20,390 : INFO : PROGRESS: at 8.26% examples, 173901 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:21,409 : INFO : PROGRESS: at 8.29% examples, 173936 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:22,414 : INFO : PROGRESS: at 8.32% examples, 173950 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:23,429 : INFO : PROGRESS: at 8.35% examples, 173958 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:24,480 : INFO : PROGRESS: at 8.38% examples, 174035 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:25,482 : INFO : PROGRESS: at 8.40% examples, 173933 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:26,507 : INFO : PROGRESS: at 8.43% examples, 173964 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:24:27,546 : INFO : PROGRESS: at 8.46% examples, 173988 words/s, in_q

2018-01-29 10:25:41,980 : INFO : PROGRESS: at 10.42% examples, 174063 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:43,010 : INFO : PROGRESS: at 10.45% examples, 174085 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:44,035 : INFO : PROGRESS: at 10.48% examples, 174108 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:45,071 : INFO : PROGRESS: at 10.51% examples, 174127 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:46,086 : INFO : PROGRESS: at 10.53% examples, 174155 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:47,152 : INFO : PROGRESS: at 10.56% examples, 174066 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:48,172 : INFO : PROGRESS: at 10.58% examples, 174091 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:49,198 : INFO : PROGRESS: at 10.61% examples, 174114 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:50,227 : INFO : PROGRESS: at 10.64% examples, 174135 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:25:51,236 : INFO : PROGRESS: at 10.67% examples, 174165 wor

2018-01-29 10:27:05,427 : INFO : PROGRESS: at 12.53% examples, 173487 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:06,470 : INFO : PROGRESS: at 12.56% examples, 173442 words/s, in_qsize 0, out_qsize 2
2018-01-29 10:27:07,479 : INFO : PROGRESS: at 12.59% examples, 173507 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:08,513 : INFO : PROGRESS: at 12.62% examples, 173543 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:09,533 : INFO : PROGRESS: at 12.64% examples, 173487 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:10,543 : INFO : PROGRESS: at 12.67% examples, 173512 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:11,557 : INFO : PROGRESS: at 12.69% examples, 173535 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:12,661 : INFO : PROGRESS: at 12.72% examples, 173526 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:13,681 : INFO : PROGRESS: at 12.75% examples, 173586 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:27:14,702 : INFO : PROGRESS: at 12.78% examples, 173530 wor

2018-01-29 10:28:28,420 : INFO : PROGRESS: at 14.73% examples, 173654 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:29,466 : INFO : PROGRESS: at 14.75% examples, 173599 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:30,505 : INFO : PROGRESS: at 14.78% examples, 173629 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:31,521 : INFO : PROGRESS: at 14.81% examples, 173649 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:32,573 : INFO : PROGRESS: at 14.84% examples, 173659 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:33,585 : INFO : PROGRESS: at 14.87% examples, 173680 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:34,588 : INFO : PROGRESS: at 14.89% examples, 173704 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:35,591 : INFO : PROGRESS: at 14.92% examples, 173646 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:36,634 : INFO : PROGRESS: at 14.94% examples, 173675 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:28:37,665 : INFO : PROGRESS: at 14.97% examples, 173690 wor

2018-01-29 10:29:51,891 : INFO : PROGRESS: at 16.49% examples, 169402 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:52,944 : INFO : PROGRESS: at 16.51% examples, 169330 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:53,986 : INFO : PROGRESS: at 16.53% examples, 169245 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:55,180 : INFO : PROGRESS: at 16.54% examples, 169093 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:56,219 : INFO : PROGRESS: at 16.56% examples, 169011 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:57,250 : INFO : PROGRESS: at 16.58% examples, 168930 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:58,334 : INFO : PROGRESS: at 16.60% examples, 168851 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:29:59,388 : INFO : PROGRESS: at 16.61% examples, 168765 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:30:00,389 : INFO : PROGRESS: at 16.63% examples, 168679 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:30:01,393 : INFO : PROGRESS: at 16.65% examples, 168593 wor

2018-01-29 10:31:16,695 : INFO : PROGRESS: at 17.84% examples, 162236 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:17,695 : INFO : PROGRESS: at 17.85% examples, 162156 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:18,917 : INFO : PROGRESS: at 17.87% examples, 162026 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:20,013 : INFO : PROGRESS: at 17.88% examples, 161950 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:21,044 : INFO : PROGRESS: at 17.90% examples, 161876 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:22,101 : INFO : PROGRESS: at 17.92% examples, 161809 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:23,149 : INFO : PROGRESS: at 17.94% examples, 161744 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:24,235 : INFO : PROGRESS: at 17.95% examples, 161672 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:25,280 : INFO : PROGRESS: at 17.97% examples, 161608 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:31:26,294 : INFO : PROGRESS: at 17.99% examples, 161538 wor

2018-01-29 10:32:40,483 : INFO : PROGRESS: at 19.18% examples, 156822 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:41,581 : INFO : PROGRESS: at 19.20% examples, 156771 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:42,616 : INFO : PROGRESS: at 19.21% examples, 156676 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:43,696 : INFO : PROGRESS: at 19.23% examples, 156618 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:44,739 : INFO : PROGRESS: at 19.24% examples, 156555 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:32:45,761 : INFO : PROGRESS: at 19.26% examples, 156497 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:46,823 : INFO : PROGRESS: at 19.28% examples, 156431 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:47,839 : INFO : PROGRESS: at 19.29% examples, 156363 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:49,041 : INFO : PROGRESS: at 19.30% examples, 156214 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:32:50,083 : INFO : PROGRESS: at 19.31% examples, 156062 wor

2018-01-29 10:34:04,844 : INFO : PROGRESS: at 20.52% examples, 152208 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:34:05,854 : INFO : PROGRESS: at 20.53% examples, 152121 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:06,880 : INFO : PROGRESS: at 20.55% examples, 152063 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:07,949 : INFO : PROGRESS: at 20.57% examples, 152019 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:08,995 : INFO : PROGRESS: at 20.59% examples, 151979 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:10,093 : INFO : PROGRESS: at 20.61% examples, 151930 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:11,158 : INFO : PROGRESS: at 20.62% examples, 151876 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:12,191 : INFO : PROGRESS: at 20.64% examples, 151838 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:13,417 : INFO : PROGRESS: at 20.66% examples, 151747 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:34:14,467 : INFO : PROGRESS: at 20.68% examples, 151707 wor

2018-01-29 10:35:30,196 : INFO : PROGRESS: at 21.93% examples, 148398 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:31,224 : INFO : PROGRESS: at 21.95% examples, 148368 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:32,246 : INFO : PROGRESS: at 21.96% examples, 148338 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:33,377 : INFO : PROGRESS: at 21.98% examples, 148263 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:34,431 : INFO : PROGRESS: at 22.00% examples, 148238 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:35,447 : INFO : PROGRESS: at 22.02% examples, 148209 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:36,501 : INFO : PROGRESS: at 22.03% examples, 148174 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:37,615 : INFO : PROGRESS: at 22.05% examples, 148131 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:38,634 : INFO : PROGRESS: at 22.07% examples, 148102 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:35:39,663 : INFO : PROGRESS: at 22.09% examples, 148062 wor

2018-01-29 10:36:54,579 : INFO : PROGRESS: at 23.33% examples, 145314 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:36:55,629 : INFO : PROGRESS: at 23.35% examples, 145286 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:36:56,637 : INFO : PROGRESS: at 23.37% examples, 145255 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:36:57,740 : INFO : PROGRESS: at 23.38% examples, 145220 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:36:58,832 : INFO : PROGRESS: at 23.41% examples, 145194 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:37:00,023 : INFO : PROGRESS: at 23.42% examples, 145130 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:37:01,084 : INFO : PROGRESS: at 23.44% examples, 145100 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:37:02,098 : INFO : PROGRESS: at 23.46% examples, 145077 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:37:03,101 : INFO : PROGRESS: at 23.48% examples, 145056 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:37:04,187 : INFO : PROGRESS: at 23.50% examples, 145023 wor

2018-01-29 10:38:19,387 : INFO : PROGRESS: at 24.73% examples, 142666 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:20,389 : INFO : PROGRESS: at 24.74% examples, 142607 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:21,416 : INFO : PROGRESS: at 24.76% examples, 142578 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:22,473 : INFO : PROGRESS: at 24.78% examples, 142554 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:23,522 : INFO : PROGRESS: at 24.79% examples, 142505 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:24,549 : INFO : PROGRESS: at 24.81% examples, 142468 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:25,617 : INFO : PROGRESS: at 24.83% examples, 142434 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:26,663 : INFO : PROGRESS: at 24.84% examples, 142411 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:27,669 : INFO : PROGRESS: at 24.86% examples, 142384 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:38:28,770 : INFO : PROGRESS: at 24.87% examples, 142330 wor

2018-01-29 10:39:43,357 : INFO : PROGRESS: at 26.07% examples, 140207 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:44,399 : INFO : PROGRESS: at 26.08% examples, 140180 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:45,434 : INFO : PROGRESS: at 26.10% examples, 140153 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:46,519 : INFO : PROGRESS: at 26.12% examples, 140129 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:47,590 : INFO : PROGRESS: at 26.14% examples, 140107 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:48,595 : INFO : PROGRESS: at 26.15% examples, 140084 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:49,607 : INFO : PROGRESS: at 26.17% examples, 140068 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:50,649 : INFO : PROGRESS: at 26.19% examples, 140049 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:51,881 : INFO : PROGRESS: at 26.21% examples, 140000 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:39:52,918 : INFO : PROGRESS: at 26.22% examples, 139982 wor

2018-01-29 10:41:07,988 : INFO : PROGRESS: at 27.44% examples, 138282 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:09,004 : INFO : PROGRESS: at 27.46% examples, 138260 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:10,011 : INFO : PROGRESS: at 27.48% examples, 138240 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:11,080 : INFO : PROGRESS: at 27.49% examples, 138213 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:12,113 : INFO : PROGRESS: at 27.51% examples, 138190 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:13,169 : INFO : PROGRESS: at 27.53% examples, 138165 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:14,283 : INFO : PROGRESS: at 27.54% examples, 138120 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:15,294 : INFO : PROGRESS: at 27.56% examples, 138099 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:16,402 : INFO : PROGRESS: at 27.57% examples, 138076 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:41:17,427 : INFO : PROGRESS: at 27.59% examples, 138061 wor

2018-01-29 10:42:32,589 : INFO : PROGRESS: at 28.84% examples, 136521 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:33,957 : INFO : PROGRESS: at 28.86% examples, 136469 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:35,023 : INFO : PROGRESS: at 28.88% examples, 136452 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:36,114 : INFO : PROGRESS: at 28.90% examples, 136434 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:37,169 : INFO : PROGRESS: at 28.92% examples, 136418 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:38,268 : INFO : PROGRESS: at 28.94% examples, 136405 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:39,298 : INFO : PROGRESS: at 28.96% examples, 136392 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:40,316 : INFO : PROGRESS: at 28.97% examples, 136381 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:41,324 : INFO : PROGRESS: at 28.99% examples, 136343 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:42:42,366 : INFO : PROGRESS: at 29.01% examples, 136329 wor

2018-01-29 10:43:56,743 : INFO : PROGRESS: at 30.26% examples, 135137 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:43:57,758 : INFO : PROGRESS: at 30.29% examples, 135158 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:43:58,823 : INFO : PROGRESS: at 30.31% examples, 135156 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:43:59,837 : INFO : PROGRESS: at 30.33% examples, 135171 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:44:00,842 : INFO : PROGRESS: at 30.36% examples, 135187 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:44:01,904 : INFO : PROGRESS: at 30.39% examples, 135210 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:44:02,907 : INFO : PROGRESS: at 30.41% examples, 135251 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:44:03,980 : INFO : PROGRESS: at 30.44% examples, 135261 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:44:04,982 : INFO : PROGRESS: at 30.47% examples, 135290 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:44:06,017 : INFO : PROGRESS: at 30.49% examples, 135315 wor

2018-01-29 10:45:20,439 : INFO : PROGRESS: at 32.30% examples, 136651 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:21,444 : INFO : PROGRESS: at 32.33% examples, 136683 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:22,466 : INFO : PROGRESS: at 32.35% examples, 136713 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:23,498 : INFO : PROGRESS: at 32.38% examples, 136719 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:24,540 : INFO : PROGRESS: at 32.40% examples, 136748 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:25,547 : INFO : PROGRESS: at 32.43% examples, 136779 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:26,551 : INFO : PROGRESS: at 32.46% examples, 136811 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:27,582 : INFO : PROGRESS: at 32.49% examples, 136840 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:28,587 : INFO : PROGRESS: at 32.51% examples, 136842 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:45:29,592 : INFO : PROGRESS: at 32.54% examples, 136874 wor

2018-01-29 10:46:43,182 : INFO : PROGRESS: at 34.46% examples, 138476 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:44,220 : INFO : PROGRESS: at 34.49% examples, 138508 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:45,279 : INFO : PROGRESS: at 34.52% examples, 138510 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:46,281 : INFO : PROGRESS: at 34.55% examples, 138540 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:47,346 : INFO : PROGRESS: at 34.57% examples, 138564 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:48,398 : INFO : PROGRESS: at 34.60% examples, 138595 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:49,405 : INFO : PROGRESS: at 34.63% examples, 138625 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:50,492 : INFO : PROGRESS: at 34.66% examples, 138630 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:51,528 : INFO : PROGRESS: at 34.69% examples, 138662 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:46:52,570 : INFO : PROGRESS: at 34.71% examples, 138683 wor

2018-01-29 10:48:06,693 : INFO : PROGRESS: at 36.59% examples, 139998 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:07,739 : INFO : PROGRESS: at 36.62% examples, 140016 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:08,790 : INFO : PROGRESS: at 36.65% examples, 140045 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:09,875 : INFO : PROGRESS: at 36.68% examples, 140065 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:10,901 : INFO : PROGRESS: at 36.71% examples, 140096 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:12,065 : INFO : PROGRESS: at 36.73% examples, 140104 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:48:13,108 : INFO : PROGRESS: at 36.76% examples, 140128 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:14,125 : INFO : PROGRESS: at 36.79% examples, 140154 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:15,127 : INFO : PROGRESS: at 36.82% examples, 140181 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:48:16,162 : INFO : PROGRESS: at 36.84% examples, 140205 wor

2018-01-29 10:49:29,794 : INFO : PROGRESS: at 38.70% examples, 141447 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:30,813 : INFO : PROGRESS: at 38.72% examples, 141445 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:31,834 : INFO : PROGRESS: at 38.74% examples, 141463 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:32,895 : INFO : PROGRESS: at 38.77% examples, 141473 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:33,912 : INFO : PROGRESS: at 38.79% examples, 141477 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:34,938 : INFO : PROGRESS: at 38.81% examples, 141489 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:36,014 : INFO : PROGRESS: at 38.84% examples, 141503 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:37,067 : INFO : PROGRESS: at 38.86% examples, 141499 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:38,136 : INFO : PROGRESS: at 38.89% examples, 141519 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:49:39,163 : INFO : PROGRESS: at 38.91% examples, 141541 wor

2018-01-29 10:50:52,266 : INFO : PROGRESS: at 40.80% examples, 142792 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:50:53,271 : INFO : PROGRESS: at 40.82% examples, 142785 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:50:54,344 : INFO : PROGRESS: at 40.85% examples, 142809 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:50:55,358 : INFO : PROGRESS: at 40.88% examples, 142832 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:50:56,375 : INFO : PROGRESS: at 40.91% examples, 142854 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:50:57,406 : INFO : PROGRESS: at 40.94% examples, 142876 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:50:58,413 : INFO : PROGRESS: at 40.96% examples, 142874 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:50:59,434 : INFO : PROGRESS: at 40.99% examples, 142896 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:51:00,436 : INFO : PROGRESS: at 41.02% examples, 142920 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:51:01,440 : INFO : PROGRESS: at 41.05% examples, 142938 wor

2018-01-29 10:52:15,175 : INFO : PROGRESS: at 43.01% examples, 144135 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:16,190 : INFO : PROGRESS: at 43.04% examples, 144156 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:17,235 : INFO : PROGRESS: at 43.06% examples, 144151 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:18,250 : INFO : PROGRESS: at 43.09% examples, 144172 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:19,259 : INFO : PROGRESS: at 43.12% examples, 144194 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:20,306 : INFO : PROGRESS: at 43.14% examples, 144213 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:21,321 : INFO : PROGRESS: at 43.17% examples, 144234 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:22,384 : INFO : PROGRESS: at 43.20% examples, 144228 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:23,388 : INFO : PROGRESS: at 43.22% examples, 144250 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:52:24,407 : INFO : PROGRESS: at 43.25% examples, 144270 wor

2018-01-29 10:53:38,359 : INFO : PROGRESS: at 45.12% examples, 145085 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:39,384 : INFO : PROGRESS: at 45.14% examples, 145081 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:40,438 : INFO : PROGRESS: at 45.17% examples, 145093 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:41,485 : INFO : PROGRESS: at 45.19% examples, 145106 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:42,501 : INFO : PROGRESS: at 45.22% examples, 145121 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:43,538 : INFO : PROGRESS: at 45.25% examples, 145139 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:44,571 : INFO : PROGRESS: at 45.27% examples, 145135 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:45,583 : INFO : PROGRESS: at 45.30% examples, 145150 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:46,594 : INFO : PROGRESS: at 45.32% examples, 145165 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:53:47,650 : INFO : PROGRESS: at 45.35% examples, 145182 wor

2018-01-29 10:55:01,074 : INFO : PROGRESS: at 47.16% examples, 145906 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:55:02,087 : INFO : PROGRESS: at 47.19% examples, 145920 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:03,151 : INFO : PROGRESS: at 47.21% examples, 145913 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:04,182 : INFO : PROGRESS: at 47.24% examples, 145930 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:05,188 : INFO : PROGRESS: at 47.26% examples, 145949 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:06,213 : INFO : PROGRESS: at 47.29% examples, 145962 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:07,285 : INFO : PROGRESS: at 47.32% examples, 145972 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:08,330 : INFO : PROGRESS: at 47.34% examples, 145975 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:09,345 : INFO : PROGRESS: at 47.37% examples, 145985 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:55:10,379 : INFO : PROGRESS: at 47.39% examples, 145997 wor

2018-01-29 10:56:24,411 : INFO : PROGRESS: at 49.30% examples, 146787 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:25,450 : INFO : PROGRESS: at 49.33% examples, 146803 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:26,538 : INFO : PROGRESS: at 49.36% examples, 146820 words/s, in_qsize 0, out_qsize 1
2018-01-29 10:56:27,548 : INFO : PROGRESS: at 49.39% examples, 146833 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:28,618 : INFO : PROGRESS: at 49.41% examples, 146830 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:29,648 : INFO : PROGRESS: at 49.43% examples, 146838 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:30,671 : INFO : PROGRESS: at 49.46% examples, 146838 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:31,716 : INFO : PROGRESS: at 49.48% examples, 146841 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:32,747 : INFO : PROGRESS: at 49.50% examples, 146841 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:56:33,792 : INFO : PROGRESS: at 49.52% examples, 146827 wor

2018-01-29 10:57:47,192 : INFO : PROGRESS: at 51.33% examples, 147384 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:48,226 : INFO : PROGRESS: at 51.36% examples, 147399 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:49,238 : INFO : PROGRESS: at 51.38% examples, 147396 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:50,269 : INFO : PROGRESS: at 51.40% examples, 147403 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:51,325 : INFO : PROGRESS: at 51.43% examples, 147421 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:57:52,327 : INFO : PROGRESS: at 51.46% examples, 147430 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:53,328 : INFO : PROGRESS: at 51.48% examples, 147447 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:54,382 : INFO : PROGRESS: at 51.51% examples, 147445 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:55,405 : INFO : PROGRESS: at 51.53% examples, 147461 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:57:56,438 : INFO : PROGRESS: at 51.56% examples, 147476 wor

2018-01-29 10:59:10,201 : INFO : PROGRESS: at 53.45% examples, 148239 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:11,309 : INFO : PROGRESS: at 53.47% examples, 148233 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:59:12,317 : INFO : PROGRESS: at 53.50% examples, 148246 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:13,350 : INFO : PROGRESS: at 53.53% examples, 148260 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:14,373 : INFO : PROGRESS: at 53.56% examples, 148268 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:15,375 : INFO : PROGRESS: at 53.58% examples, 148277 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:16,494 : INFO : PROGRESS: at 53.61% examples, 148274 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:17,506 : INFO : PROGRESS: at 53.63% examples, 148286 words/s, in_qsize 0, out_qsize 0
2018-01-29 10:59:18,528 : INFO : PROGRESS: at 53.66% examples, 148294 words/s, in_qsize 1, out_qsize 0
2018-01-29 10:59:19,557 : INFO : PROGRESS: at 53.69% examples, 148309 wor

2018-01-29 11:00:33,221 : INFO : PROGRESS: at 55.57% examples, 148921 words/s, in_qsize 0, out_qsize 1
2018-01-29 11:00:34,258 : INFO : PROGRESS: at 55.60% examples, 148934 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:35,327 : INFO : PROGRESS: at 55.63% examples, 148946 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:36,378 : INFO : PROGRESS: at 55.66% examples, 148958 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:37,426 : INFO : PROGRESS: at 55.69% examples, 148971 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:38,431 : INFO : PROGRESS: at 55.71% examples, 148986 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:39,465 : INFO : PROGRESS: at 55.74% examples, 148981 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:40,604 : INFO : PROGRESS: at 55.76% examples, 148985 words/s, in_qsize 3, out_qsize 0
2018-01-29 11:00:41,684 : INFO : PROGRESS: at 55.80% examples, 149014 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:00:42,706 : INFO : PROGRESS: at 55.83% examples, 149028 wor

2018-01-29 11:01:55,890 : INFO : PROGRESS: at 57.66% examples, 149599 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:01:56,905 : INFO : PROGRESS: at 57.69% examples, 149606 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:01:57,922 : INFO : PROGRESS: at 57.71% examples, 149612 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:01:58,933 : INFO : PROGRESS: at 57.74% examples, 149623 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:01:59,941 : INFO : PROGRESS: at 57.76% examples, 149633 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:02:00,951 : INFO : PROGRESS: at 57.78% examples, 149625 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:02:01,965 : INFO : PROGRESS: at 57.81% examples, 149636 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:02:02,976 : INFO : PROGRESS: at 57.84% examples, 149646 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:02:03,998 : INFO : PROGRESS: at 57.86% examples, 149652 words/s, in_qsize 0, out_qsize 1
2018-01-29 11:02:05,006 : INFO : PROGRESS: at 57.89% examples, 149666 wor

2018-01-29 11:03:18,670 : INFO : PROGRESS: at 59.71% examples, 150225 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:19,708 : INFO : PROGRESS: at 59.74% examples, 150240 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:20,753 : INFO : PROGRESS: at 59.76% examples, 150237 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:21,755 : INFO : PROGRESS: at 59.79% examples, 150247 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:22,804 : INFO : PROGRESS: at 59.81% examples, 150258 words/s, in_qsize 0, out_qsize 1
2018-01-29 11:03:23,817 : INFO : PROGRESS: at 59.84% examples, 150268 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:24,829 : INFO : PROGRESS: at 59.87% examples, 150285 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:25,848 : INFO : PROGRESS: at 59.89% examples, 150287 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:26,868 : INFO : PROGRESS: at 59.92% examples, 150296 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:03:27,888 : INFO : PROGRESS: at 59.94% examples, 150309 wor

2018-01-29 11:04:41,351 : INFO : PROGRESS: at 61.93% examples, 151030 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:42,377 : INFO : PROGRESS: at 61.95% examples, 151042 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:43,399 : INFO : PROGRESS: at 61.98% examples, 151054 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:44,433 : INFO : PROGRESS: at 62.01% examples, 151065 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:45,481 : INFO : PROGRESS: at 62.03% examples, 151062 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:46,522 : INFO : PROGRESS: at 62.06% examples, 151077 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:47,542 : INFO : PROGRESS: at 62.09% examples, 151086 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:48,573 : INFO : PROGRESS: at 62.12% examples, 151097 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:49,608 : INFO : PROGRESS: at 62.15% examples, 151112 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:04:50,615 : INFO : PROGRESS: at 62.17% examples, 151108 wor

2018-01-29 11:06:03,718 : INFO : PROGRESS: at 64.10% examples, 151673 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:04,750 : INFO : PROGRESS: at 64.12% examples, 151684 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:05,757 : INFO : PROGRESS: at 64.15% examples, 151687 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:06,761 : INFO : PROGRESS: at 64.17% examples, 151686 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:07,831 : INFO : PROGRESS: at 64.20% examples, 151698 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:08,872 : INFO : PROGRESS: at 64.23% examples, 151709 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:09,917 : INFO : PROGRESS: at 64.26% examples, 151723 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:10,954 : INFO : PROGRESS: at 64.29% examples, 151734 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:11,984 : INFO : PROGRESS: at 64.31% examples, 151732 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:06:12,985 : INFO : PROGRESS: at 64.34% examples, 151741 wor

2018-01-29 11:07:26,091 : INFO : PROGRESS: at 66.28% examples, 152410 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:27,092 : INFO : PROGRESS: at 66.30% examples, 152410 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:28,128 : INFO : PROGRESS: at 66.33% examples, 152420 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:29,141 : INFO : PROGRESS: at 66.36% examples, 152431 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:30,203 : INFO : PROGRESS: at 66.39% examples, 152443 words/s, in_qsize 0, out_qsize 1
2018-01-29 11:07:31,204 : INFO : PROGRESS: at 66.42% examples, 152455 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:32,260 : INFO : PROGRESS: at 66.44% examples, 152452 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:33,285 : INFO : PROGRESS: at 66.47% examples, 152466 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:34,311 : INFO : PROGRESS: at 66.49% examples, 152476 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:07:35,356 : INFO : PROGRESS: at 66.52% examples, 152489 wor

2018-01-29 11:08:48,688 : INFO : PROGRESS: at 68.44% examples, 153028 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:49,692 : INFO : PROGRESS: at 68.46% examples, 153037 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:50,705 : INFO : PROGRESS: at 68.49% examples, 153045 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:51,724 : INFO : PROGRESS: at 68.51% examples, 153037 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:52,773 : INFO : PROGRESS: at 68.54% examples, 153046 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:53,774 : INFO : PROGRESS: at 68.57% examples, 153052 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:54,797 : INFO : PROGRESS: at 68.59% examples, 153059 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:55,833 : INFO : PROGRESS: at 68.62% examples, 153069 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:56,849 : INFO : PROGRESS: at 68.64% examples, 153064 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:08:57,852 : INFO : PROGRESS: at 68.67% examples, 153076 wor

2018-01-29 11:10:12,420 : INFO : PROGRESS: at 70.47% examples, 153245 words/s, in_qsize 1, out_qsize 0
2018-01-29 11:10:13,443 : INFO : PROGRESS: at 70.50% examples, 153252 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:14,467 : INFO : PROGRESS: at 70.52% examples, 153250 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:15,507 : INFO : PROGRESS: at 70.55% examples, 153257 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:16,546 : INFO : PROGRESS: at 70.57% examples, 153244 words/s, in_qsize 1, out_qsize 0
2018-01-29 11:10:17,600 : INFO : PROGRESS: at 70.59% examples, 153247 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:18,665 : INFO : PROGRESS: at 70.62% examples, 153258 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:19,675 : INFO : PROGRESS: at 70.64% examples, 153250 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:20,749 : INFO : PROGRESS: at 70.66% examples, 153233 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:10:21,772 : INFO : PROGRESS: at 70.68% examples, 153219 wor

2018-01-29 11:11:35,339 : INFO : PROGRESS: at 72.47% examples, 153488 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:36,360 : INFO : PROGRESS: at 72.49% examples, 153488 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:37,364 : INFO : PROGRESS: at 72.52% examples, 153496 words/s, in_qsize 1, out_qsize 0
2018-01-29 11:11:38,383 : INFO : PROGRESS: at 72.55% examples, 153506 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:39,424 : INFO : PROGRESS: at 72.58% examples, 153518 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:40,455 : INFO : PROGRESS: at 72.61% examples, 153527 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:41,699 : INFO : PROGRESS: at 72.63% examples, 153526 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:42,722 : INFO : PROGRESS: at 72.66% examples, 153532 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:43,738 : INFO : PROGRESS: at 72.69% examples, 153545 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:11:44,754 : INFO : PROGRESS: at 72.72% examples, 153555 wor

2018-01-29 11:12:58,038 : INFO : PROGRESS: at 74.62% examples, 153962 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:12:59,048 : INFO : PROGRESS: at 74.65% examples, 153972 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:00,067 : INFO : PROGRESS: at 74.68% examples, 153981 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:01,085 : INFO : PROGRESS: at 74.70% examples, 153979 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:02,088 : INFO : PROGRESS: at 74.73% examples, 153990 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:03,089 : INFO : PROGRESS: at 74.76% examples, 153997 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:04,105 : INFO : PROGRESS: at 74.79% examples, 154007 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:05,138 : INFO : PROGRESS: at 74.82% examples, 154019 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:06,144 : INFO : PROGRESS: at 74.84% examples, 154015 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:13:07,150 : INFO : PROGRESS: at 74.86% examples, 154016 wor

2018-01-29 11:14:20,834 : INFO : PROGRESS: at 76.75% examples, 154398 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:21,846 : INFO : PROGRESS: at 76.78% examples, 154408 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:22,864 : INFO : PROGRESS: at 76.80% examples, 154417 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:23,893 : INFO : PROGRESS: at 76.83% examples, 154414 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:24,935 : INFO : PROGRESS: at 76.85% examples, 154425 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:25,999 : INFO : PROGRESS: at 76.88% examples, 154435 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:27,037 : INFO : PROGRESS: at 76.91% examples, 154446 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:28,139 : INFO : PROGRESS: at 76.94% examples, 154443 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:29,147 : INFO : PROGRESS: at 76.96% examples, 154452 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:14:30,150 : INFO : PROGRESS: at 76.99% examples, 154462 wor

2018-01-29 11:15:43,496 : INFO : PROGRESS: at 78.85% examples, 154846 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:44,511 : INFO : PROGRESS: at 78.88% examples, 154858 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:45,540 : INFO : PROGRESS: at 78.90% examples, 154855 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:46,599 : INFO : PROGRESS: at 78.93% examples, 154864 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:47,663 : INFO : PROGRESS: at 78.96% examples, 154874 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:48,680 : INFO : PROGRESS: at 78.98% examples, 154882 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:49,687 : INFO : PROGRESS: at 79.01% examples, 154892 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:50,867 : INFO : PROGRESS: at 79.03% examples, 154882 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:51,889 : INFO : PROGRESS: at 79.06% examples, 154888 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:15:52,905 : INFO : PROGRESS: at 79.09% examples, 154896 wor

2018-01-29 11:17:06,149 : INFO : PROGRESS: at 81.03% examples, 155383 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:07,179 : INFO : PROGRESS: at 81.06% examples, 155391 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:08,224 : INFO : PROGRESS: at 81.09% examples, 155402 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:09,267 : INFO : PROGRESS: at 81.12% examples, 155412 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:10,312 : INFO : PROGRESS: at 81.15% examples, 155408 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:11,345 : INFO : PROGRESS: at 81.17% examples, 155416 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:12,414 : INFO : PROGRESS: at 81.20% examples, 155420 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:13,420 : INFO : PROGRESS: at 81.23% examples, 155423 words/s, in_qsize 1, out_qsize 0
2018-01-29 11:17:14,446 : INFO : PROGRESS: at 81.25% examples, 155429 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:17:15,510 : INFO : PROGRESS: at 81.28% examples, 155422 wor

2018-01-29 11:18:28,888 : INFO : PROGRESS: at 83.10% examples, 155598 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:29,912 : INFO : PROGRESS: at 83.12% examples, 155587 words/s, in_qsize 1, out_qsize 0
2018-01-29 11:18:30,979 : INFO : PROGRESS: at 83.14% examples, 155583 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:32,011 : INFO : PROGRESS: at 83.17% examples, 155591 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:33,059 : INFO : PROGRESS: at 83.19% examples, 155585 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:34,061 : INFO : PROGRESS: at 83.22% examples, 155594 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:35,083 : INFO : PROGRESS: at 83.25% examples, 155602 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:36,120 : INFO : PROGRESS: at 83.28% examples, 155610 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:37,125 : INFO : PROGRESS: at 83.31% examples, 155619 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:18:38,178 : INFO : PROGRESS: at 83.33% examples, 155618 wor

2018-01-29 11:19:51,720 : INFO : PROGRESS: at 85.25% examples, 155963 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:52,734 : INFO : PROGRESS: at 85.27% examples, 155964 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:53,783 : INFO : PROGRESS: at 85.30% examples, 155965 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:54,801 : INFO : PROGRESS: at 85.32% examples, 155971 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:55,811 : INFO : PROGRESS: at 85.34% examples, 155963 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:56,863 : INFO : PROGRESS: at 85.37% examples, 155965 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:57,904 : INFO : PROGRESS: at 85.39% examples, 155961 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:58,933 : INFO : PROGRESS: at 85.42% examples, 155964 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:19:59,947 : INFO : PROGRESS: at 85.44% examples, 155972 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:20:00,975 : INFO : PROGRESS: at 85.47% examples, 155974 wor

2018-01-29 11:21:14,116 : INFO : PROGRESS: at 87.29% examples, 156207 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:15,146 : INFO : PROGRESS: at 87.31% examples, 156214 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:16,158 : INFO : PROGRESS: at 87.34% examples, 156222 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:17,174 : INFO : PROGRESS: at 87.37% examples, 156229 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:18,208 : INFO : PROGRESS: at 87.40% examples, 156234 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:19,253 : INFO : PROGRESS: at 87.42% examples, 156230 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:20,286 : INFO : PROGRESS: at 87.45% examples, 156237 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:21,289 : INFO : PROGRESS: at 87.47% examples, 156243 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:22,308 : INFO : PROGRESS: at 87.50% examples, 156248 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:21:23,326 : INFO : PROGRESS: at 87.53% examples, 156255 wor

2018-01-29 11:22:37,771 : INFO : PROGRESS: at 89.27% examples, 156213 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:38,776 : INFO : PROGRESS: at 89.30% examples, 156219 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:39,878 : INFO : PROGRESS: at 89.32% examples, 156213 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:40,879 : INFO : PROGRESS: at 89.35% examples, 156219 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:41,893 : INFO : PROGRESS: at 89.38% examples, 156224 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:42,937 : INFO : PROGRESS: at 89.40% examples, 156228 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:43,939 : INFO : PROGRESS: at 89.43% examples, 156234 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:44,959 : INFO : PROGRESS: at 89.45% examples, 156232 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:45,978 : INFO : PROGRESS: at 89.48% examples, 156239 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:22:47,020 : INFO : PROGRESS: at 89.51% examples, 156246 wor

2018-01-29 11:24:00,784 : INFO : PROGRESS: at 91.36% examples, 156465 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:01,802 : INFO : PROGRESS: at 91.39% examples, 156473 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:02,807 : INFO : PROGRESS: at 91.42% examples, 156480 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:03,850 : INFO : PROGRESS: at 91.44% examples, 156475 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:04,879 : INFO : PROGRESS: at 91.46% examples, 156482 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:05,887 : INFO : PROGRESS: at 91.49% examples, 156489 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:06,890 : INFO : PROGRESS: at 91.52% examples, 156495 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:07,928 : INFO : PROGRESS: at 91.55% examples, 156501 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:08,938 : INFO : PROGRESS: at 91.57% examples, 156499 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:24:09,938 : INFO : PROGRESS: at 91.60% examples, 156507 wor

2018-01-29 11:25:23,246 : INFO : PROGRESS: at 93.43% examples, 156722 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:24,318 : INFO : PROGRESS: at 93.46% examples, 156727 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:25,364 : INFO : PROGRESS: at 93.48% examples, 156722 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:26,419 : INFO : PROGRESS: at 93.51% examples, 156725 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:27,503 : INFO : PROGRESS: at 93.54% examples, 156732 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:28,556 : INFO : PROGRESS: at 93.56% examples, 156736 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:29,556 : INFO : PROGRESS: at 93.59% examples, 156739 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:30,596 : INFO : PROGRESS: at 93.61% examples, 156734 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:31,692 : INFO : PROGRESS: at 93.64% examples, 156736 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:25:32,741 : INFO : PROGRESS: at 93.67% examples, 156742 wor

2018-01-29 11:26:46,326 : INFO : PROGRESS: at 95.58% examples, 157016 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:47,327 : INFO : PROGRESS: at 95.60% examples, 157007 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:48,405 : INFO : PROGRESS: at 95.62% examples, 156993 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:49,413 : INFO : PROGRESS: at 95.65% examples, 157000 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:50,497 : INFO : PROGRESS: at 95.67% examples, 156998 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:51,517 : INFO : PROGRESS: at 95.70% examples, 157002 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:52,552 : INFO : PROGRESS: at 95.72% examples, 157006 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:53,553 : INFO : PROGRESS: at 95.75% examples, 157009 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:54,555 : INFO : PROGRESS: at 95.77% examples, 157012 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:26:55,577 : INFO : PROGRESS: at 95.80% examples, 157016 wor

2018-01-29 11:28:08,940 : INFO : PROGRESS: at 97.61% examples, 157175 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:09,950 : INFO : PROGRESS: at 97.63% examples, 157168 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:11,002 : INFO : PROGRESS: at 97.65% examples, 157173 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:12,015 : INFO : PROGRESS: at 97.68% examples, 157180 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:13,058 : INFO : PROGRESS: at 97.71% examples, 157188 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:14,151 : INFO : PROGRESS: at 97.74% examples, 157192 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:15,168 : INFO : PROGRESS: at 97.76% examples, 157190 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:16,190 : INFO : PROGRESS: at 97.79% examples, 157196 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:17,195 : INFO : PROGRESS: at 97.81% examples, 157201 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:28:18,212 : INFO : PROGRESS: at 97.84% examples, 157205 wor

2018-01-29 11:29:31,311 : INFO : PROGRESS: at 99.69% examples, 157485 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:32,345 : INFO : PROGRESS: at 99.72% examples, 157491 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:33,388 : INFO : PROGRESS: at 99.74% examples, 157488 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:34,389 : INFO : PROGRESS: at 99.76% examples, 157495 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:35,398 : INFO : PROGRESS: at 99.79% examples, 157501 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:36,423 : INFO : PROGRESS: at 99.82% examples, 157507 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:37,461 : INFO : PROGRESS: at 99.84% examples, 157511 words/s, in_qsize 0, out_qsize 1
2018-01-29 11:29:38,484 : INFO : PROGRESS: at 99.87% examples, 157508 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:39,514 : INFO : PROGRESS: at 99.89% examples, 157516 words/s, in_qsize 0, out_qsize 0
2018-01-29 11:29:40,534 : INFO : PROGRESS: at 99.92% examples, 157522 wor

####  some considerations:
 - revisit tokenization  / spell checking / entity recognition
 - stop words?
 - things to adjust: vector size, negative sampling, min_count
 - review more notes on logs

## Save model

In [17]:
path = "/Users/stevenfelix/Documents/DataScience_local/Insight/"
file = 'model_full_{}_sg{}_sz{}_win{}_min{}_hs{}_neg{}'.format(num_doc,sg,size,window,min_count,hs,negative)
notes = ''
file = file+notes
model_full.save(path+file)

2018-01-29 11:34:06,274 : INFO : saving Word2Vec object under /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_75M_sg0_sz250_win5_min5_hs1_neg0_bigram, separately None
2018-01-29 11:34:06,276 : INFO : storing np array 'syn0' to /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_75M_sg0_sz250_win5_min5_hs1_neg0_bigram.wv.syn0.npy
2018-01-29 11:34:06,570 : INFO : not storing attribute syn0norm
2018-01-29 11:34:06,571 : INFO : storing np array 'syn1' to /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_75M_sg0_sz250_win5_min5_hs1_neg0_bigram.syn1.npy
2018-01-29 11:34:06,861 : INFO : not storing attribute cum_table
2018-01-29 11:34:11,232 : INFO : saved /Users/stevenfelix/Documents/DataScience_local/Insight/model_full_75M_sg0_sz250_win5_min5_hs1_neg0_bigram
