In [1]:
from common import *

In [2]:
fnames = glob('../data/lingvo/raw/*')

In [3]:
import gc, io

# prog = re.compile("[\W\d]", re.UNICODE)

def process(fnames, i):
    with io.open('../data/lingvo/%s.txt'%i, 'w', encoding='utf8') as fw:
        for fn in fnames:
            with io.open(fn, encoding='utf8') as fr:
                text = fr.read()
            sents = sent_tokenize(text)
            sents = [[w for w in s.split() if w not in stop_list and len(w)>1] 
                     for s in sents]
            sents = [s for s in sents if len(s)]
            s = json.dumps((basename(fn).split('.')[0], sents), ensure_ascii=False)            
            fw.write(s + u'\n')
    gc.collect()

In [None]:
parallelizer = Parallel(n_jobs=cpu_count)

tasks_iterator = ( delayed(process)(list_block, i) for 
                  i, list_block in enumerate(grouper(len(fnames)//500, fnames)) ) 
result = parallelizer( tasks_iterator )

In [None]:
pwd = !pwd
%cd ../data/lingvo/
!cat *.txt > corpus_json.txt&&gzip -f corpus_json.txt&&rm *.txt
%cd {pwd[0]}

In [None]:
import ujson

def iter_docs(corpus_path):
    with GzipFile(corpus_path, 'r') as fr:
        for line in fr:
            _id, sents = ujson.loads(line)
            yield _id, sents
    
def iter_sents(corpus_path):
    for _id, sents in iter_docs(corpus_path):
        for s in sents:
            yield s
                
class Sentences(object):
    def __init__(self, corpus_path):
        self.corpus_path = corpus_path
    def __iter__(self):
        for sent in iter_sents(self.corpus_path):
            yield sent
            
            
def extract_bigrams(corpus_path, name, min_count=5, threshold=10):
    bigram_path = '../data/lingvo/%s' % name
    bigram = gensim.models.Phrases(iter_sents(corpus_path),
                                   progress_per=100000)
    bigram.min_count = min_count
    bigram.threshold = threshold

    bigram.save(bigram_path)
    logging.info('vocab size %s' % len(bigram.vocab))
    
    bigram_ph = gensim.models.phrases.Phraser(bigram)
    bigram_ph_path = bigram_path + '_ph_%s_%s' % (min_count, threshold)
    bigram_ph.save(bigram_ph_path)
    
    bigram_corpus_path = '../data/lingvo/%s_corpus.txt.gz' % name
    logging.info('saving %s' % bigram_corpus_path)
    with GzipFile(bigram_corpus_path, 'w') as f:
        for sent in bigram_ph[iter_sents(corpus_path)]:
            f.write(unicode(' '.join(sent)).encode('utf8') + '\n')

# Bigrams

In [None]:
name = 'bigram'
corpus_path = '../data/lingvo/corpus.txt.gz'
igram_path = '../data/lingvo/%s' % name
min_count, threshold = 20, 30

In [None]:
extract_bigrams(corpus_path, name, min_count, threshold)

In [15]:
bigram = gensim.models.phrases.Phrases.load(bigram_path)
for phrase, score in bigram.export_phrases(islice(iter_sents(corpus_path), 500)):
    print('{0}   {1}'.format(phrase, score))

торговый наименование   46.0968005278
ti zr   54.2023204961
et al   40.2187126168
gas turbine   1327.44429617
advanced materials   222.224706823
цирконий гафний   40.7752414915
цирконий гафний   40.7752414915
american society   297.733331399
test materials   106.495165158
американский общество   160.174541596
беговой дорожка   82.4931135071
беговой дорожка   82.4931135071
беговой дорожка   82.4931135071
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхронный резонанс   64.1262090751
подсинхр

In [None]:
bigram_ph_path = bigram_path + '_ph_%s_%s' % (min_count, threshold)
bigram_ph = gensim.models.phrases.Phraser.load(bigram_ph_path)

# Trigrams

In [None]:
name = 'trigram'
corpus_path = '../data/lingvo/bigram_corpus.txt.gz'
trigram_path = '../data/lingvo/%s' % name
min_count, threshold = 20, 30

In [None]:
extract_bigrams(corpus_path, name, min_count, threshold)

In [None]:
trigram_ph_path = bigram_path + '_ph_%s_%s' % (min_count, threshold)
trigram_ph = gensim.models.phrases.Phraser.load(trigram_ph_path)

# Word2Vec

In [10]:
corpus_path = '../data/lingvo/trigram_corpus.txt.gz'
model = Word2Vec(Sentences(corpus_path), size=300, sg=1, 
                 min_count=5, window=10, workers=cpu_count)

2017-10-25 07:34:42,716 [MainThread  ] [INFO ]  collecting all words and their counts
2017-10-25 07:34:42,717 [MainThread  ] [INFO ]  PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
2017-10-25 07:34:43,721 [MainThread  ] [INFO ]  PROGRESS: at sentence #10000, processed 113858 words, keeping 13569 word types
2017-10-25 07:34:44,690 [MainThread  ] [INFO ]  PROGRESS: at sentence #20000, processed 225483 words, keeping 22380 word types
2017-10-25 07:34:45,590 [MainThread  ] [INFO ]  PROGRESS: at sentence #30000, processed 326531 words, keeping 30608 word types
2017-10-25 07:34:46,508 [MainThread  ] [INFO ]  PROGRESS: at sentence #40000, processed 430515 words, keeping 39716 word types
2017-10-25 07:34:47,375 [MainThread  ] [INFO ]  PROGRESS: at sentence #50000, processed 527838 words, keeping 45677 word types
2017-10-25 07:34:48,360 [MainThread  ] [INFO ]  PROGRESS: at sentence #60000, processed 639470 words, keeping 53030 word types
2017-10-25 07:34:49,383 [MainThread  ]

2017-10-25 07:35:41,835 [MainThread  ] [INFO ]  PROGRESS: at sentence #630000, processed 6626137 words, keeping 323056 word types
2017-10-25 07:35:42,755 [MainThread  ] [INFO ]  PROGRESS: at sentence #640000, processed 6728370 words, keeping 328670 word types
2017-10-25 07:35:43,689 [MainThread  ] [INFO ]  PROGRESS: at sentence #650000, processed 6833990 words, keeping 332649 word types
2017-10-25 07:35:44,602 [MainThread  ] [INFO ]  PROGRESS: at sentence #660000, processed 6935887 words, keeping 336545 word types
2017-10-25 07:35:45,576 [MainThread  ] [INFO ]  PROGRESS: at sentence #670000, processed 7045297 words, keeping 339651 word types
2017-10-25 07:35:46,559 [MainThread  ] [INFO ]  PROGRESS: at sentence #680000, processed 7156996 words, keeping 343200 word types
2017-10-25 07:35:47,560 [MainThread  ] [INFO ]  PROGRESS: at sentence #690000, processed 7270858 words, keeping 346544 word types
2017-10-25 07:35:48,541 [MainThread  ] [INFO ]  PROGRESS: at sentence #700000, processed 7

2017-10-25 07:36:41,004 [MainThread  ] [INFO ]  PROGRESS: at sentence #1260000, processed 13207020 words, keeping 557293 word types
2017-10-25 07:36:41,988 [MainThread  ] [INFO ]  PROGRESS: at sentence #1270000, processed 13316572 words, keeping 560491 word types
2017-10-25 07:36:42,946 [MainThread  ] [INFO ]  PROGRESS: at sentence #1280000, processed 13424982 words, keeping 563859 word types
2017-10-25 07:36:43,919 [MainThread  ] [INFO ]  PROGRESS: at sentence #1290000, processed 13534258 words, keeping 566722 word types
2017-10-25 07:36:44,902 [MainThread  ] [INFO ]  PROGRESS: at sentence #1300000, processed 13641596 words, keeping 569733 word types
2017-10-25 07:36:45,833 [MainThread  ] [INFO ]  PROGRESS: at sentence #1310000, processed 13744300 words, keeping 573588 word types
2017-10-25 07:36:46,764 [MainThread  ] [INFO ]  PROGRESS: at sentence #1320000, processed 13844157 words, keeping 576992 word types
2017-10-25 07:36:47,736 [MainThread  ] [INFO ]  PROGRESS: at sentence #13300

2017-10-25 07:37:40,335 [MainThread  ] [INFO ]  PROGRESS: at sentence #1890000, processed 19795754 words, keeping 763983 word types
2017-10-25 07:37:41,215 [MainThread  ] [INFO ]  PROGRESS: at sentence #1900000, processed 19884586 words, keeping 766590 word types
2017-10-25 07:37:42,154 [MainThread  ] [INFO ]  PROGRESS: at sentence #1910000, processed 19986496 words, keeping 769985 word types
2017-10-25 07:37:43,117 [MainThread  ] [INFO ]  PROGRESS: at sentence #1920000, processed 20094067 words, keeping 772206 word types
2017-10-25 07:37:44,155 [MainThread  ] [INFO ]  PROGRESS: at sentence #1930000, processed 20210156 words, keeping 774963 word types
2017-10-25 07:37:45,052 [MainThread  ] [INFO ]  PROGRESS: at sentence #1940000, processed 20308237 words, keeping 777165 word types
2017-10-25 07:37:45,981 [MainThread  ] [INFO ]  PROGRESS: at sentence #1950000, processed 20410772 words, keeping 779801 word types
2017-10-25 07:37:46,906 [MainThread  ] [INFO ]  PROGRESS: at sentence #19600

2017-10-25 07:38:39,834 [MainThread  ] [INFO ]  PROGRESS: at sentence #2520000, processed 26364114 words, keeping 945315 word types
2017-10-25 07:38:40,784 [MainThread  ] [INFO ]  PROGRESS: at sentence #2530000, processed 26469294 words, keeping 947759 word types
2017-10-25 07:38:41,753 [MainThread  ] [INFO ]  PROGRESS: at sentence #2540000, processed 26578408 words, keeping 949771 word types
2017-10-25 07:38:42,738 [MainThread  ] [INFO ]  PROGRESS: at sentence #2550000, processed 26688664 words, keeping 953212 word types
2017-10-25 07:38:43,684 [MainThread  ] [INFO ]  PROGRESS: at sentence #2560000, processed 26792689 words, keeping 955616 word types
2017-10-25 07:38:44,658 [MainThread  ] [INFO ]  PROGRESS: at sentence #2570000, processed 26902682 words, keeping 957889 word types
2017-10-25 07:38:45,593 [MainThread  ] [INFO ]  PROGRESS: at sentence #2580000, processed 27006267 words, keeping 960122 word types
2017-10-25 07:38:46,510 [MainThread  ] [INFO ]  PROGRESS: at sentence #25900

2017-10-25 07:39:39,255 [MainThread  ] [INFO ]  PROGRESS: at sentence #3140000, processed 32907721 words, keeping 1112279 word types
2017-10-25 07:39:40,214 [MainThread  ] [INFO ]  PROGRESS: at sentence #3150000, processed 33013662 words, keeping 1114561 word types
2017-10-25 07:39:41,169 [MainThread  ] [INFO ]  PROGRESS: at sentence #3160000, processed 33120619 words, keeping 1116694 word types
2017-10-25 07:39:42,107 [MainThread  ] [INFO ]  PROGRESS: at sentence #3170000, processed 33225402 words, keeping 1118501 word types
2017-10-25 07:39:43,052 [MainThread  ] [INFO ]  PROGRESS: at sentence #3180000, processed 33329905 words, keeping 1120771 word types
2017-10-25 07:39:44,031 [MainThread  ] [INFO ]  PROGRESS: at sentence #3190000, processed 33438073 words, keeping 1123880 word types
2017-10-25 07:39:45,008 [MainThread  ] [INFO ]  PROGRESS: at sentence #3200000, processed 33547079 words, keeping 1126262 word types
2017-10-25 07:39:45,993 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:40:38,323 [MainThread  ] [INFO ]  PROGRESS: at sentence #3760000, processed 39412393 words, keeping 1266254 word types
2017-10-25 07:40:39,292 [MainThread  ] [INFO ]  PROGRESS: at sentence #3770000, processed 39519740 words, keeping 1268349 word types
2017-10-25 07:40:40,264 [MainThread  ] [INFO ]  PROGRESS: at sentence #3780000, processed 39629201 words, keeping 1270606 word types
2017-10-25 07:40:41,201 [MainThread  ] [INFO ]  PROGRESS: at sentence #3790000, processed 39732312 words, keeping 1273054 word types
2017-10-25 07:40:42,171 [MainThread  ] [INFO ]  PROGRESS: at sentence #3800000, processed 39839844 words, keeping 1274953 word types
2017-10-25 07:40:43,106 [MainThread  ] [INFO ]  PROGRESS: at sentence #3810000, processed 39943191 words, keeping 1276842 word types
2017-10-25 07:40:44,646 [MainThread  ] [INFO ]  PROGRESS: at sentence #3820000, processed 40042959 words, keeping 1280448 word types
2017-10-25 07:40:45,668 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:41:38,102 [MainThread  ] [INFO ]  PROGRESS: at sentence #4380000, processed 45908397 words, keeping 1415416 word types
2017-10-25 07:41:39,040 [MainThread  ] [INFO ]  PROGRESS: at sentence #4390000, processed 46009561 words, keeping 1417597 word types
2017-10-25 07:41:39,982 [MainThread  ] [INFO ]  PROGRESS: at sentence #4400000, processed 46109581 words, keeping 1419741 word types
2017-10-25 07:41:40,947 [MainThread  ] [INFO ]  PROGRESS: at sentence #4410000, processed 46216118 words, keeping 1421729 word types
2017-10-25 07:41:41,921 [MainThread  ] [INFO ]  PROGRESS: at sentence #4420000, processed 46321353 words, keeping 1424103 word types
2017-10-25 07:41:42,918 [MainThread  ] [INFO ]  PROGRESS: at sentence #4430000, processed 46430474 words, keeping 1426858 word types
2017-10-25 07:41:43,894 [MainThread  ] [INFO ]  PROGRESS: at sentence #4440000, processed 46534411 words, keeping 1428335 word types
2017-10-25 07:41:44,816 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:42:37,856 [MainThread  ] [INFO ]  PROGRESS: at sentence #5000000, processed 52387287 words, keeping 1568461 word types
2017-10-25 07:42:38,811 [MainThread  ] [INFO ]  PROGRESS: at sentence #5010000, processed 52492544 words, keeping 1570236 word types
2017-10-25 07:42:39,826 [MainThread  ] [INFO ]  PROGRESS: at sentence #5020000, processed 52604198 words, keeping 1572138 word types
2017-10-25 07:42:40,779 [MainThread  ] [INFO ]  PROGRESS: at sentence #5030000, processed 52707823 words, keeping 1573919 word types
2017-10-25 07:42:41,768 [MainThread  ] [INFO ]  PROGRESS: at sentence #5040000, processed 52816544 words, keeping 1576245 word types
2017-10-25 07:42:42,679 [MainThread  ] [INFO ]  PROGRESS: at sentence #5050000, processed 52913277 words, keeping 1577781 word types
2017-10-25 07:42:43,646 [MainThread  ] [INFO ]  PROGRESS: at sentence #5060000, processed 53018979 words, keeping 1579520 word types
2017-10-25 07:42:44,564 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:43:37,840 [MainThread  ] [INFO ]  PROGRESS: at sentence #5620000, processed 58886267 words, keeping 1705853 word types
2017-10-25 07:43:38,801 [MainThread  ] [INFO ]  PROGRESS: at sentence #5630000, processed 58990727 words, keeping 1707774 word types
2017-10-25 07:43:39,775 [MainThread  ] [INFO ]  PROGRESS: at sentence #5640000, processed 59097197 words, keeping 1709285 word types
2017-10-25 07:43:40,714 [MainThread  ] [INFO ]  PROGRESS: at sentence #5650000, processed 59198761 words, keeping 1711630 word types
2017-10-25 07:43:41,626 [MainThread  ] [INFO ]  PROGRESS: at sentence #5660000, processed 59297335 words, keeping 1713804 word types
2017-10-25 07:43:42,718 [MainThread  ] [INFO ]  PROGRESS: at sentence #5670000, processed 59420014 words, keeping 1715967 word types
2017-10-25 07:43:43,716 [MainThread  ] [INFO ]  PROGRESS: at sentence #5680000, processed 59529797 words, keeping 1717439 word types
2017-10-25 07:43:44,731 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:44:37,802 [MainThread  ] [INFO ]  PROGRESS: at sentence #6240000, processed 65357629 words, keeping 1837651 word types
2017-10-25 07:44:38,746 [MainThread  ] [INFO ]  PROGRESS: at sentence #6250000, processed 65458837 words, keeping 1839675 word types
2017-10-25 07:44:39,744 [MainThread  ] [INFO ]  PROGRESS: at sentence #6260000, processed 65567953 words, keeping 1841955 word types
2017-10-25 07:44:40,683 [MainThread  ] [INFO ]  PROGRESS: at sentence #6270000, processed 65669500 words, keeping 1843925 word types
2017-10-25 07:44:41,686 [MainThread  ] [INFO ]  PROGRESS: at sentence #6280000, processed 65779736 words, keeping 1846011 word types
2017-10-25 07:44:42,721 [MainThread  ] [INFO ]  PROGRESS: at sentence #6290000, processed 65891599 words, keeping 1848602 word types
2017-10-25 07:44:43,701 [MainThread  ] [INFO ]  PROGRESS: at sentence #6300000, processed 65999417 words, keeping 1850162 word types
2017-10-25 07:44:44,652 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:45:38,469 [MainThread  ] [INFO ]  PROGRESS: at sentence #6860000, processed 71925801 words, keeping 1963986 word types
2017-10-25 07:45:39,489 [MainThread  ] [INFO ]  PROGRESS: at sentence #6870000, processed 72037301 words, keeping 1968868 word types
2017-10-25 07:45:40,455 [MainThread  ] [INFO ]  PROGRESS: at sentence #6880000, processed 72140743 words, keeping 1971241 word types
2017-10-25 07:45:41,313 [MainThread  ] [INFO ]  PROGRESS: at sentence #6890000, processed 72230334 words, keeping 1973048 word types
2017-10-25 07:45:42,283 [MainThread  ] [INFO ]  PROGRESS: at sentence #6900000, processed 72335288 words, keeping 1975745 word types
2017-10-25 07:45:43,281 [MainThread  ] [INFO ]  PROGRESS: at sentence #6910000, processed 72444444 words, keeping 1977944 word types
2017-10-25 07:45:44,253 [MainThread  ] [INFO ]  PROGRESS: at sentence #6920000, processed 72550318 words, keeping 1979760 word types
2017-10-25 07:45:45,208 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:46:38,800 [MainThread  ] [INFO ]  PROGRESS: at sentence #7480000, processed 78429156 words, keeping 2098663 word types
2017-10-25 07:46:39,784 [MainThread  ] [INFO ]  PROGRESS: at sentence #7490000, processed 78536537 words, keeping 2100342 word types
2017-10-25 07:46:40,712 [MainThread  ] [INFO ]  PROGRESS: at sentence #7500000, processed 78635485 words, keeping 2102946 word types
2017-10-25 07:46:41,691 [MainThread  ] [INFO ]  PROGRESS: at sentence #7510000, processed 78738211 words, keeping 2104747 word types
2017-10-25 07:46:42,646 [MainThread  ] [INFO ]  PROGRESS: at sentence #7520000, processed 78840691 words, keeping 2106222 word types
2017-10-25 07:46:43,633 [MainThread  ] [INFO ]  PROGRESS: at sentence #7530000, processed 78946067 words, keeping 2108232 word types
2017-10-25 07:46:44,611 [MainThread  ] [INFO ]  PROGRESS: at sentence #7540000, processed 79052748 words, keeping 2110562 word types
2017-10-25 07:46:45,586 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:47:39,351 [MainThread  ] [INFO ]  PROGRESS: at sentence #8100000, processed 84962256 words, keeping 2223118 word types
2017-10-25 07:47:40,311 [MainThread  ] [INFO ]  PROGRESS: at sentence #8110000, processed 85067360 words, keeping 2224504 word types
2017-10-25 07:47:41,285 [MainThread  ] [INFO ]  PROGRESS: at sentence #8120000, processed 85172623 words, keeping 2225632 word types
2017-10-25 07:47:42,213 [MainThread  ] [INFO ]  PROGRESS: at sentence #8130000, processed 85270101 words, keeping 2227708 word types
2017-10-25 07:47:43,201 [MainThread  ] [INFO ]  PROGRESS: at sentence #8140000, processed 85377073 words, keeping 2229547 word types
2017-10-25 07:47:44,170 [MainThread  ] [INFO ]  PROGRESS: at sentence #8150000, processed 85481482 words, keeping 2231366 word types
2017-10-25 07:47:45,158 [MainThread  ] [INFO ]  PROGRESS: at sentence #8160000, processed 85586125 words, keeping 2233388 word types
2017-10-25 07:47:46,129 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:48:40,227 [MainThread  ] [INFO ]  PROGRESS: at sentence #8720000, processed 91398012 words, keeping 2341024 word types
2017-10-25 07:48:41,185 [MainThread  ] [INFO ]  PROGRESS: at sentence #8730000, processed 91501934 words, keeping 2342847 word types
2017-10-25 07:48:42,181 [MainThread  ] [INFO ]  PROGRESS: at sentence #8740000, processed 91608204 words, keeping 2344600 word types
2017-10-25 07:48:43,189 [MainThread  ] [INFO ]  PROGRESS: at sentence #8750000, processed 91716967 words, keeping 2346097 word types
2017-10-25 07:48:44,128 [MainThread  ] [INFO ]  PROGRESS: at sentence #8760000, processed 91818079 words, keeping 2347833 word types
2017-10-25 07:48:45,054 [MainThread  ] [INFO ]  PROGRESS: at sentence #8770000, processed 91917540 words, keeping 2350262 word types
2017-10-25 07:48:46,041 [MainThread  ] [INFO ]  PROGRESS: at sentence #8780000, processed 92025185 words, keeping 2351799 word types
2017-10-25 07:48:47,036 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:49:40,501 [MainThread  ] [INFO ]  PROGRESS: at sentence #9340000, processed 97904744 words, keeping 2455897 word types
2017-10-25 07:49:41,472 [MainThread  ] [INFO ]  PROGRESS: at sentence #9350000, processed 98010608 words, keeping 2457831 word types
2017-10-25 07:49:42,433 [MainThread  ] [INFO ]  PROGRESS: at sentence #9360000, processed 98116314 words, keeping 2459374 word types
2017-10-25 07:49:43,428 [MainThread  ] [INFO ]  PROGRESS: at sentence #9370000, processed 98224656 words, keeping 2461422 word types
2017-10-25 07:49:44,360 [MainThread  ] [INFO ]  PROGRESS: at sentence #9380000, processed 98325495 words, keeping 2463414 word types
2017-10-25 07:49:45,345 [MainThread  ] [INFO ]  PROGRESS: at sentence #9390000, processed 98431979 words, keeping 2465337 word types
2017-10-25 07:49:46,292 [MainThread  ] [INFO ]  PROGRESS: at sentence #9400000, processed 98534300 words, keeping 2467166 word types
2017-10-25 07:49:47,272 [MainThread  ] [INFO ]  PROGRESS: at sentence

2017-10-25 07:50:41,342 [MainThread  ] [INFO ]  PROGRESS: at sentence #9960000, processed 104524525 words, keeping 2566625 word types
2017-10-25 07:50:42,327 [MainThread  ] [INFO ]  PROGRESS: at sentence #9970000, processed 104633028 words, keeping 2568627 word types
2017-10-25 07:50:43,284 [MainThread  ] [INFO ]  PROGRESS: at sentence #9980000, processed 104735735 words, keeping 2570872 word types
2017-10-25 07:50:44,245 [MainThread  ] [INFO ]  PROGRESS: at sentence #9990000, processed 104839610 words, keeping 2572411 word types
2017-10-25 07:50:45,237 [MainThread  ] [INFO ]  PROGRESS: at sentence #10000000, processed 104949230 words, keeping 2573785 word types
2017-10-25 07:50:46,194 [MainThread  ] [INFO ]  PROGRESS: at sentence #10010000, processed 105053836 words, keeping 2575204 word types
2017-10-25 07:50:47,188 [MainThread  ] [INFO ]  PROGRESS: at sentence #10020000, processed 105163024 words, keeping 2576866 word types
2017-10-25 07:50:48,215 [MainThread  ] [INFO ]  PROGRESS: a

2017-10-25 07:51:41,466 [MainThread  ] [INFO ]  PROGRESS: at sentence #10570000, processed 111053724 words, keeping 2670145 word types
2017-10-25 07:51:42,425 [MainThread  ] [INFO ]  PROGRESS: at sentence #10580000, processed 111159020 words, keeping 2671873 word types
2017-10-25 07:51:43,371 [MainThread  ] [INFO ]  PROGRESS: at sentence #10590000, processed 111261750 words, keeping 2673566 word types
2017-10-25 07:51:44,327 [MainThread  ] [INFO ]  PROGRESS: at sentence #10600000, processed 111366217 words, keeping 2674913 word types
2017-10-25 07:51:45,314 [MainThread  ] [INFO ]  PROGRESS: at sentence #10610000, processed 111471710 words, keeping 2676858 word types
2017-10-25 07:51:46,303 [MainThread  ] [INFO ]  PROGRESS: at sentence #10620000, processed 111577877 words, keeping 2678818 word types
2017-10-25 07:51:47,311 [MainThread  ] [INFO ]  PROGRESS: at sentence #10630000, processed 111688426 words, keeping 2679952 word types
2017-10-25 07:51:48,268 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:52:40,322 [MainThread  ] [INFO ]  PROGRESS: at sentence #11180000, processed 117407369 words, keeping 2771439 word types
2017-10-25 07:52:41,307 [MainThread  ] [INFO ]  PROGRESS: at sentence #11190000, processed 117512114 words, keeping 2773126 word types
2017-10-25 07:52:42,369 [MainThread  ] [INFO ]  PROGRESS: at sentence #11200000, processed 117626413 words, keeping 2776215 word types
2017-10-25 07:52:43,327 [MainThread  ] [INFO ]  PROGRESS: at sentence #11210000, processed 117728377 words, keeping 2777734 word types
2017-10-25 07:52:44,227 [MainThread  ] [INFO ]  PROGRESS: at sentence #11220000, processed 117825159 words, keeping 2778689 word types
2017-10-25 07:52:45,182 [MainThread  ] [INFO ]  PROGRESS: at sentence #11230000, processed 117929717 words, keeping 2780239 word types
2017-10-25 07:52:46,133 [MainThread  ] [INFO ]  PROGRESS: at sentence #11240000, processed 118032806 words, keeping 2782366 word types
2017-10-25 07:52:47,027 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:53:40,084 [MainThread  ] [INFO ]  PROGRESS: at sentence #11790000, processed 123740223 words, keeping 2876904 word types
2017-10-25 07:53:41,067 [MainThread  ] [INFO ]  PROGRESS: at sentence #11800000, processed 123842174 words, keeping 2878464 word types
2017-10-25 07:53:42,057 [MainThread  ] [INFO ]  PROGRESS: at sentence #11810000, processed 123946320 words, keeping 2879993 word types
2017-10-25 07:53:43,044 [MainThread  ] [INFO ]  PROGRESS: at sentence #11820000, processed 124051063 words, keeping 2881649 word types
2017-10-25 07:53:44,071 [MainThread  ] [INFO ]  PROGRESS: at sentence #11830000, processed 124159076 words, keeping 2883788 word types
2017-10-25 07:53:45,076 [MainThread  ] [INFO ]  PROGRESS: at sentence #11840000, processed 124266778 words, keeping 2885806 word types
2017-10-25 07:53:46,045 [MainThread  ] [INFO ]  PROGRESS: at sentence #11850000, processed 124368040 words, keeping 2887338 word types
2017-10-25 07:53:47,082 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:54:40,879 [MainThread  ] [INFO ]  PROGRESS: at sentence #12400000, processed 130174493 words, keeping 2976149 word types
2017-10-25 07:54:41,856 [MainThread  ] [INFO ]  PROGRESS: at sentence #12410000, processed 130276745 words, keeping 2977793 word types
2017-10-25 07:54:42,894 [MainThread  ] [INFO ]  PROGRESS: at sentence #12420000, processed 130388001 words, keeping 2979701 word types
2017-10-25 07:54:43,895 [MainThread  ] [INFO ]  PROGRESS: at sentence #12430000, processed 130495220 words, keeping 2980892 word types
2017-10-25 07:54:44,885 [MainThread  ] [INFO ]  PROGRESS: at sentence #12440000, processed 130600985 words, keeping 2981940 word types
2017-10-25 07:54:45,697 [MainThread  ] [INFO ]  PROGRESS: at sentence #12450000, processed 130683477 words, keeping 2983958 word types
2017-10-25 07:54:46,732 [MainThread  ] [INFO ]  PROGRESS: at sentence #12460000, processed 130791757 words, keeping 2986448 word types
2017-10-25 07:54:47,703 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:55:41,365 [MainThread  ] [INFO ]  PROGRESS: at sentence #13010000, processed 136580926 words, keeping 3073079 word types
2017-10-25 07:55:42,371 [MainThread  ] [INFO ]  PROGRESS: at sentence #13020000, processed 136685114 words, keeping 3074673 word types
2017-10-25 07:55:43,359 [MainThread  ] [INFO ]  PROGRESS: at sentence #13030000, processed 136788913 words, keeping 3076587 word types
2017-10-25 07:55:44,386 [MainThread  ] [INFO ]  PROGRESS: at sentence #13040000, processed 136897787 words, keeping 3078194 word types
2017-10-25 07:55:45,425 [MainThread  ] [INFO ]  PROGRESS: at sentence #13050000, processed 137008021 words, keeping 3079747 word types
2017-10-25 07:55:46,390 [MainThread  ] [INFO ]  PROGRESS: at sentence #13060000, processed 137111049 words, keeping 3081024 word types
2017-10-25 07:55:47,380 [MainThread  ] [INFO ]  PROGRESS: at sentence #13070000, processed 137216430 words, keeping 3082799 word types
2017-10-25 07:55:48,366 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:56:42,211 [MainThread  ] [INFO ]  PROGRESS: at sentence #13620000, processed 142922558 words, keeping 3172902 word types
2017-10-25 07:56:43,228 [MainThread  ] [INFO ]  PROGRESS: at sentence #13630000, processed 143029396 words, keeping 3174873 word types
2017-10-25 07:56:44,219 [MainThread  ] [INFO ]  PROGRESS: at sentence #13640000, processed 143132671 words, keeping 3176731 word types
2017-10-25 07:56:45,280 [MainThread  ] [INFO ]  PROGRESS: at sentence #13650000, processed 143247821 words, keeping 3178384 word types
2017-10-25 07:56:46,288 [MainThread  ] [INFO ]  PROGRESS: at sentence #13660000, processed 143353948 words, keeping 3180082 word types
2017-10-25 07:56:47,263 [MainThread  ] [INFO ]  PROGRESS: at sentence #13670000, processed 143455740 words, keeping 3181539 word types
2017-10-25 07:56:48,284 [MainThread  ] [INFO ]  PROGRESS: at sentence #13680000, processed 143565093 words, keeping 3182899 word types
2017-10-25 07:56:49,309 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:57:42,847 [MainThread  ] [INFO ]  PROGRESS: at sentence #14230000, processed 149257584 words, keeping 3271434 word types
2017-10-25 07:57:43,838 [MainThread  ] [INFO ]  PROGRESS: at sentence #14240000, processed 149363405 words, keeping 3272562 word types
2017-10-25 07:57:44,844 [MainThread  ] [INFO ]  PROGRESS: at sentence #14250000, processed 149469716 words, keeping 3274029 word types
2017-10-25 07:57:45,864 [MainThread  ] [INFO ]  PROGRESS: at sentence #14260000, processed 149577303 words, keeping 3275161 word types
2017-10-25 07:57:46,894 [MainThread  ] [INFO ]  PROGRESS: at sentence #14270000, processed 149687545 words, keeping 3276407 word types
2017-10-25 07:57:47,851 [MainThread  ] [INFO ]  PROGRESS: at sentence #14280000, processed 149788340 words, keeping 3277873 word types
2017-10-25 07:57:48,851 [MainThread  ] [INFO ]  PROGRESS: at sentence #14290000, processed 149892308 words, keeping 3279419 word types
2017-10-25 07:57:49,878 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:58:44,533 [MainThread  ] [INFO ]  PROGRESS: at sentence #14840000, processed 155737855 words, keeping 3364473 word types
2017-10-25 07:58:45,489 [MainThread  ] [INFO ]  PROGRESS: at sentence #14850000, processed 155838107 words, keeping 3365848 word types
2017-10-25 07:58:46,435 [MainThread  ] [INFO ]  PROGRESS: at sentence #14860000, processed 155935907 words, keeping 3367309 word types
2017-10-25 07:58:47,440 [MainThread  ] [INFO ]  PROGRESS: at sentence #14870000, processed 156036625 words, keeping 3369244 word types
2017-10-25 07:58:48,418 [MainThread  ] [INFO ]  PROGRESS: at sentence #14880000, processed 156139953 words, keeping 3370979 word types
2017-10-25 07:58:49,454 [MainThread  ] [INFO ]  PROGRESS: at sentence #14890000, processed 156247498 words, keeping 3373309 word types
2017-10-25 07:58:50,436 [MainThread  ] [INFO ]  PROGRESS: at sentence #14900000, processed 156349568 words, keeping 3374521 word types
2017-10-25 07:58:51,451 [MainThread  ] [INFO ]  PROGRES

2017-10-25 07:59:45,700 [MainThread  ] [INFO ]  PROGRESS: at sentence #15450000, processed 162165722 words, keeping 3451738 word types
2017-10-25 07:59:46,671 [MainThread  ] [INFO ]  PROGRESS: at sentence #15460000, processed 162268404 words, keeping 3453061 word types
2017-10-25 07:59:47,683 [MainThread  ] [INFO ]  PROGRESS: at sentence #15470000, processed 162373738 words, keeping 3454942 word types
2017-10-25 07:59:48,689 [MainThread  ] [INFO ]  PROGRESS: at sentence #15480000, processed 162480024 words, keeping 3456426 word types
2017-10-25 07:59:49,696 [MainThread  ] [INFO ]  PROGRESS: at sentence #15490000, processed 162585939 words, keeping 3458231 word types
2017-10-25 07:59:50,699 [MainThread  ] [INFO ]  PROGRESS: at sentence #15500000, processed 162690124 words, keeping 3459466 word types
2017-10-25 07:59:51,764 [MainThread  ] [INFO ]  PROGRESS: at sentence #15510000, processed 162803927 words, keeping 3461083 word types
2017-10-25 07:59:52,767 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:00:46,128 [MainThread  ] [INFO ]  PROGRESS: at sentence #16060000, processed 168487657 words, keeping 3540884 word types
2017-10-25 08:00:47,143 [MainThread  ] [INFO ]  PROGRESS: at sentence #16070000, processed 168594980 words, keeping 3542543 word types
2017-10-25 08:00:48,106 [MainThread  ] [INFO ]  PROGRESS: at sentence #16080000, processed 168694961 words, keeping 3544786 word types
2017-10-25 08:00:49,128 [MainThread  ] [INFO ]  PROGRESS: at sentence #16090000, processed 168800641 words, keeping 3546121 word types
2017-10-25 08:00:50,166 [MainThread  ] [INFO ]  PROGRESS: at sentence #16100000, processed 168912121 words, keeping 3547473 word types
2017-10-25 08:00:51,214 [MainThread  ] [INFO ]  PROGRESS: at sentence #16110000, processed 169022715 words, keeping 3548901 word types
2017-10-25 08:00:52,225 [MainThread  ] [INFO ]  PROGRESS: at sentence #16120000, processed 169129364 words, keeping 3550762 word types
2017-10-25 08:00:53,239 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:01:46,927 [MainThread  ] [INFO ]  PROGRESS: at sentence #16670000, processed 174862631 words, keeping 3629902 word types
2017-10-25 08:01:47,936 [MainThread  ] [INFO ]  PROGRESS: at sentence #16680000, processed 174970063 words, keeping 3631522 word types
2017-10-25 08:01:48,915 [MainThread  ] [INFO ]  PROGRESS: at sentence #16690000, processed 175071809 words, keeping 3632823 word types
2017-10-25 08:01:49,869 [MainThread  ] [INFO ]  PROGRESS: at sentence #16700000, processed 175170707 words, keeping 3633915 word types
2017-10-25 08:01:50,885 [MainThread  ] [INFO ]  PROGRESS: at sentence #16710000, processed 175277646 words, keeping 3635039 word types
2017-10-25 08:01:51,930 [MainThread  ] [INFO ]  PROGRESS: at sentence #16720000, processed 175387559 words, keeping 3636455 word types
2017-10-25 08:01:52,854 [MainThread  ] [INFO ]  PROGRESS: at sentence #16730000, processed 175482675 words, keeping 3638125 word types
2017-10-25 08:01:53,840 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:02:47,834 [MainThread  ] [INFO ]  PROGRESS: at sentence #17280000, processed 181170411 words, keeping 3717650 word types
2017-10-25 08:02:48,847 [MainThread  ] [INFO ]  PROGRESS: at sentence #17290000, processed 181277253 words, keeping 3718899 word types
2017-10-25 08:02:49,843 [MainThread  ] [INFO ]  PROGRESS: at sentence #17300000, processed 181379597 words, keeping 3719941 word types
2017-10-25 08:02:50,834 [MainThread  ] [INFO ]  PROGRESS: at sentence #17310000, processed 181482150 words, keeping 3721338 word types
2017-10-25 08:02:51,830 [MainThread  ] [INFO ]  PROGRESS: at sentence #17320000, processed 181586929 words, keeping 3722507 word types
2017-10-25 08:02:52,771 [MainThread  ] [INFO ]  PROGRESS: at sentence #17330000, processed 181681112 words, keeping 3725350 word types
2017-10-25 08:02:53,796 [MainThread  ] [INFO ]  PROGRESS: at sentence #17340000, processed 181787476 words, keeping 3726644 word types
2017-10-25 08:02:54,803 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:03:49,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #17890000, processed 187557404 words, keeping 3807859 word types
2017-10-25 08:03:50,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #17900000, processed 187663026 words, keeping 3809399 word types
2017-10-25 08:03:51,492 [MainThread  ] [INFO ]  PROGRESS: at sentence #17910000, processed 187765141 words, keeping 3811461 word types
2017-10-25 08:03:52,469 [MainThread  ] [INFO ]  PROGRESS: at sentence #17920000, processed 187867562 words, keeping 3812966 word types
2017-10-25 08:03:53,521 [MainThread  ] [INFO ]  PROGRESS: at sentence #17930000, processed 187973897 words, keeping 3814676 word types
2017-10-25 08:03:54,547 [MainThread  ] [INFO ]  PROGRESS: at sentence #17940000, processed 188080998 words, keeping 3815726 word types
2017-10-25 08:03:55,575 [MainThread  ] [INFO ]  PROGRESS: at sentence #17950000, processed 188189459 words, keeping 3817594 word types
2017-10-25 08:03:56,660 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:04:50,733 [MainThread  ] [INFO ]  PROGRESS: at sentence #18500000, processed 193941139 words, keeping 3892337 word types
2017-10-25 08:04:51,729 [MainThread  ] [INFO ]  PROGRESS: at sentence #18510000, processed 194042602 words, keeping 3893912 word types
2017-10-25 08:04:52,780 [MainThread  ] [INFO ]  PROGRESS: at sentence #18520000, processed 194155691 words, keeping 3895375 word types
2017-10-25 08:04:53,839 [MainThread  ] [INFO ]  PROGRESS: at sentence #18530000, processed 194264981 words, keeping 3896929 word types
2017-10-25 08:04:54,846 [MainThread  ] [INFO ]  PROGRESS: at sentence #18540000, processed 194369868 words, keeping 3898154 word types
2017-10-25 08:04:55,839 [MainThread  ] [INFO ]  PROGRESS: at sentence #18550000, processed 194472837 words, keeping 3899488 word types
2017-10-25 08:04:56,843 [MainThread  ] [INFO ]  PROGRESS: at sentence #18560000, processed 194576801 words, keeping 3901128 word types
2017-10-25 08:04:57,837 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:05:52,327 [MainThread  ] [INFO ]  PROGRESS: at sentence #19110000, processed 200344314 words, keeping 3977648 word types
2017-10-25 08:05:53,339 [MainThread  ] [INFO ]  PROGRESS: at sentence #19120000, processed 200449727 words, keeping 3979491 word types
2017-10-25 08:05:54,313 [MainThread  ] [INFO ]  PROGRESS: at sentence #19130000, processed 200551417 words, keeping 3980874 word types
2017-10-25 08:05:55,273 [MainThread  ] [INFO ]  PROGRESS: at sentence #19140000, processed 200651393 words, keeping 3981636 word types
2017-10-25 08:05:56,297 [MainThread  ] [INFO ]  PROGRESS: at sentence #19150000, processed 200758776 words, keeping 3983024 word types
2017-10-25 08:05:57,297 [MainThread  ] [INFO ]  PROGRESS: at sentence #19160000, processed 200861744 words, keeping 3984486 word types
2017-10-25 08:05:58,294 [MainThread  ] [INFO ]  PROGRESS: at sentence #19170000, processed 200966474 words, keeping 3985734 word types
2017-10-25 08:05:59,326 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:06:53,960 [MainThread  ] [INFO ]  PROGRESS: at sentence #19720000, processed 206762768 words, keeping 4066249 word types
2017-10-25 08:06:54,966 [MainThread  ] [INFO ]  PROGRESS: at sentence #19730000, processed 206868851 words, keeping 4067410 word types
2017-10-25 08:06:55,894 [MainThread  ] [INFO ]  PROGRESS: at sentence #19740000, processed 206963910 words, keeping 4068866 word types
2017-10-25 08:06:56,961 [MainThread  ] [INFO ]  PROGRESS: at sentence #19750000, processed 207070437 words, keeping 4070181 word types
2017-10-25 08:06:57,977 [MainThread  ] [INFO ]  PROGRESS: at sentence #19760000, processed 207177737 words, keeping 4071423 word types
2017-10-25 08:06:59,022 [MainThread  ] [INFO ]  PROGRESS: at sentence #19770000, processed 207288832 words, keeping 4072569 word types
2017-10-25 08:07:00,007 [MainThread  ] [INFO ]  PROGRESS: at sentence #19780000, processed 207386321 words, keeping 4074370 word types
2017-10-25 08:07:01,038 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:07:55,954 [MainThread  ] [INFO ]  PROGRESS: at sentence #20330000, processed 213152707 words, keeping 4150468 word types
2017-10-25 08:07:56,899 [MainThread  ] [INFO ]  PROGRESS: at sentence #20340000, processed 213250761 words, keeping 4151997 word types
2017-10-25 08:07:57,900 [MainThread  ] [INFO ]  PROGRESS: at sentence #20350000, processed 213352475 words, keeping 4153402 word types
2017-10-25 08:07:58,946 [MainThread  ] [INFO ]  PROGRESS: at sentence #20360000, processed 213461691 words, keeping 4154504 word types
2017-10-25 08:07:59,969 [MainThread  ] [INFO ]  PROGRESS: at sentence #20370000, processed 213565863 words, keeping 4155959 word types
2017-10-25 08:08:00,969 [MainThread  ] [INFO ]  PROGRESS: at sentence #20380000, processed 213669297 words, keeping 4157091 word types
2017-10-25 08:08:02,033 [MainThread  ] [INFO ]  PROGRESS: at sentence #20390000, processed 213780125 words, keeping 4158496 word types
2017-10-25 08:08:03,055 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:08:58,465 [MainThread  ] [INFO ]  PROGRESS: at sentence #20940000, processed 219585028 words, keeping 4231350 word types
2017-10-25 08:08:59,456 [MainThread  ] [INFO ]  PROGRESS: at sentence #20950000, processed 219688114 words, keeping 4232549 word types
2017-10-25 08:09:00,483 [MainThread  ] [INFO ]  PROGRESS: at sentence #20960000, processed 219795361 words, keeping 4233698 word types
2017-10-25 08:09:01,504 [MainThread  ] [INFO ]  PROGRESS: at sentence #20970000, processed 219902075 words, keeping 4234974 word types
2017-10-25 08:09:02,517 [MainThread  ] [INFO ]  PROGRESS: at sentence #20980000, processed 220006896 words, keeping 4236060 word types
2017-10-25 08:09:03,493 [MainThread  ] [INFO ]  PROGRESS: at sentence #20990000, processed 220107562 words, keeping 4237675 word types
2017-10-25 08:09:04,515 [MainThread  ] [INFO ]  PROGRESS: at sentence #21000000, processed 220213426 words, keeping 4238962 word types
2017-10-25 08:09:05,547 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:10:00,007 [MainThread  ] [INFO ]  PROGRESS: at sentence #21550000, processed 225978819 words, keeping 4312577 word types
2017-10-25 08:10:00,989 [MainThread  ] [INFO ]  PROGRESS: at sentence #21560000, processed 226081364 words, keeping 4313677 word types
2017-10-25 08:10:02,044 [MainThread  ] [INFO ]  PROGRESS: at sentence #21570000, processed 226194491 words, keeping 4315370 word types
2017-10-25 08:10:03,060 [MainThread  ] [INFO ]  PROGRESS: at sentence #21580000, processed 226300168 words, keeping 4316506 word types
2017-10-25 08:10:04,060 [MainThread  ] [INFO ]  PROGRESS: at sentence #21590000, processed 226405289 words, keeping 4317799 word types
2017-10-25 08:10:05,070 [MainThread  ] [INFO ]  PROGRESS: at sentence #21600000, processed 226511751 words, keeping 4319229 word types
2017-10-25 08:10:05,886 [MainThread  ] [INFO ]  PROGRESS: at sentence #21610000, processed 226592391 words, keeping 4320882 word types
2017-10-25 08:10:06,889 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:11:01,498 [MainThread  ] [INFO ]  PROGRESS: at sentence #22160000, processed 232362991 words, keeping 4394381 word types
2017-10-25 08:11:02,508 [MainThread  ] [INFO ]  PROGRESS: at sentence #22170000, processed 232464548 words, keeping 4395769 word types
2017-10-25 08:11:03,551 [MainThread  ] [INFO ]  PROGRESS: at sentence #22180000, processed 232572781 words, keeping 4396715 word types
2017-10-25 08:11:04,550 [MainThread  ] [INFO ]  PROGRESS: at sentence #22190000, processed 232676856 words, keeping 4397727 word types
2017-10-25 08:11:05,567 [MainThread  ] [INFO ]  PROGRESS: at sentence #22200000, processed 232782106 words, keeping 4398947 word types
2017-10-25 08:11:06,603 [MainThread  ] [INFO ]  PROGRESS: at sentence #22210000, processed 232889737 words, keeping 4400223 word types
2017-10-25 08:11:07,581 [MainThread  ] [INFO ]  PROGRESS: at sentence #22220000, processed 232992018 words, keeping 4401167 word types
2017-10-25 08:11:08,605 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:12:03,487 [MainThread  ] [INFO ]  PROGRESS: at sentence #22770000, processed 238769086 words, keeping 4474224 word types
2017-10-25 08:12:04,464 [MainThread  ] [INFO ]  PROGRESS: at sentence #22780000, processed 238871227 words, keeping 4475351 word types
2017-10-25 08:12:05,472 [MainThread  ] [INFO ]  PROGRESS: at sentence #22790000, processed 238976754 words, keeping 4476696 word types
2017-10-25 08:12:06,469 [MainThread  ] [INFO ]  PROGRESS: at sentence #22800000, processed 239079651 words, keeping 4477688 word types
2017-10-25 08:12:07,505 [MainThread  ] [INFO ]  PROGRESS: at sentence #22810000, processed 239184517 words, keeping 4479361 word types
2017-10-25 08:12:08,485 [MainThread  ] [INFO ]  PROGRESS: at sentence #22820000, processed 239284217 words, keeping 4480417 word types
2017-10-25 08:12:09,608 [MainThread  ] [INFO ]  PROGRESS: at sentence #22830000, processed 239401044 words, keeping 4481513 word types
2017-10-25 08:12:10,659 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:13:06,065 [MainThread  ] [INFO ]  PROGRESS: at sentence #23380000, processed 245190516 words, keeping 4554038 word types
2017-10-25 08:13:07,058 [MainThread  ] [INFO ]  PROGRESS: at sentence #23390000, processed 245292496 words, keeping 4555181 word types
2017-10-25 08:13:08,085 [MainThread  ] [INFO ]  PROGRESS: at sentence #23400000, processed 245401392 words, keeping 4556086 word types
2017-10-25 08:13:09,059 [MainThread  ] [INFO ]  PROGRESS: at sentence #23410000, processed 245500803 words, keeping 4557900 word types
2017-10-25 08:13:10,097 [MainThread  ] [INFO ]  PROGRESS: at sentence #23420000, processed 245607731 words, keeping 4558890 word types
2017-10-25 08:13:11,111 [MainThread  ] [INFO ]  PROGRESS: at sentence #23430000, processed 245710836 words, keeping 4561063 word types
2017-10-25 08:13:12,120 [MainThread  ] [INFO ]  PROGRESS: at sentence #23440000, processed 245815082 words, keeping 4562667 word types
2017-10-25 08:13:13,124 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:14:07,550 [MainThread  ] [INFO ]  PROGRESS: at sentence #23990000, processed 251507918 words, keeping 4635239 word types
2017-10-25 08:14:08,593 [MainThread  ] [INFO ]  PROGRESS: at sentence #24000000, processed 251617207 words, keeping 4636130 word types
2017-10-25 08:14:09,627 [MainThread  ] [INFO ]  PROGRESS: at sentence #24010000, processed 251726279 words, keeping 4636756 word types
2017-10-25 08:14:10,616 [MainThread  ] [INFO ]  PROGRESS: at sentence #24020000, processed 251828860 words, keeping 4638100 word types
2017-10-25 08:14:11,669 [MainThread  ] [INFO ]  PROGRESS: at sentence #24030000, processed 251938635 words, keeping 4639705 word types
2017-10-25 08:14:12,638 [MainThread  ] [INFO ]  PROGRESS: at sentence #24040000, processed 252037406 words, keeping 4640898 word types
2017-10-25 08:14:13,648 [MainThread  ] [INFO ]  PROGRESS: at sentence #24050000, processed 252143726 words, keeping 4641851 word types
2017-10-25 08:14:14,649 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:15:09,693 [MainThread  ] [INFO ]  PROGRESS: at sentence #24600000, processed 257947827 words, keeping 4714983 word types
2017-10-25 08:15:10,664 [MainThread  ] [INFO ]  PROGRESS: at sentence #24610000, processed 258046790 words, keeping 4716095 word types
2017-10-25 08:15:11,673 [MainThread  ] [INFO ]  PROGRESS: at sentence #24620000, processed 258152690 words, keeping 4717760 word types
2017-10-25 08:15:12,673 [MainThread  ] [INFO ]  PROGRESS: at sentence #24630000, processed 258254902 words, keeping 4718745 word types
2017-10-25 08:15:13,628 [MainThread  ] [INFO ]  PROGRESS: at sentence #24640000, processed 258352119 words, keeping 4719881 word types
2017-10-25 08:15:14,604 [MainThread  ] [INFO ]  PROGRESS: at sentence #24650000, processed 258450665 words, keeping 4721171 word types
2017-10-25 08:15:15,652 [MainThread  ] [INFO ]  PROGRESS: at sentence #24660000, processed 258557162 words, keeping 4722112 word types
2017-10-25 08:15:16,668 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:16:12,142 [MainThread  ] [INFO ]  PROGRESS: at sentence #25210000, processed 264342746 words, keeping 4793108 word types
2017-10-25 08:16:13,208 [MainThread  ] [INFO ]  PROGRESS: at sentence #25220000, processed 264452902 words, keeping 4794523 word types
2017-10-25 08:16:14,241 [MainThread  ] [INFO ]  PROGRESS: at sentence #25230000, processed 264558405 words, keeping 4795927 word types
2017-10-25 08:16:15,250 [MainThread  ] [INFO ]  PROGRESS: at sentence #25240000, processed 264662873 words, keeping 4796807 word types
2017-10-25 08:16:16,269 [MainThread  ] [INFO ]  PROGRESS: at sentence #25250000, processed 264768140 words, keeping 4798092 word types
2017-10-25 08:16:17,282 [MainThread  ] [INFO ]  PROGRESS: at sentence #25260000, processed 264873323 words, keeping 4799256 word types
2017-10-25 08:16:18,310 [MainThread  ] [INFO ]  PROGRESS: at sentence #25270000, processed 264979809 words, keeping 4800188 word types
2017-10-25 08:16:19,277 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:17:14,225 [MainThread  ] [INFO ]  PROGRESS: at sentence #25820000, processed 270764197 words, keeping 4868076 word types
2017-10-25 08:17:15,243 [MainThread  ] [INFO ]  PROGRESS: at sentence #25830000, processed 270867117 words, keeping 4869738 word types
2017-10-25 08:17:16,242 [MainThread  ] [INFO ]  PROGRESS: at sentence #25840000, processed 270969216 words, keeping 4871222 word types
2017-10-25 08:17:17,275 [MainThread  ] [INFO ]  PROGRESS: at sentence #25850000, processed 271070746 words, keeping 4872578 word types
2017-10-25 08:17:18,306 [MainThread  ] [INFO ]  PROGRESS: at sentence #25860000, processed 271178309 words, keeping 4873663 word types
2017-10-25 08:17:19,317 [MainThread  ] [INFO ]  PROGRESS: at sentence #25870000, processed 271283766 words, keeping 4874631 word types
2017-10-25 08:17:20,254 [MainThread  ] [INFO ]  PROGRESS: at sentence #25880000, processed 271379654 words, keeping 4875521 word types
2017-10-25 08:17:21,287 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:18:16,687 [MainThread  ] [INFO ]  PROGRESS: at sentence #26430000, processed 277117578 words, keeping 4943192 word types
2017-10-25 08:18:17,689 [MainThread  ] [INFO ]  PROGRESS: at sentence #26440000, processed 277223447 words, keeping 4944333 word types
2017-10-25 08:18:18,716 [MainThread  ] [INFO ]  PROGRESS: at sentence #26450000, processed 277329685 words, keeping 4946137 word types
2017-10-25 08:18:19,717 [MainThread  ] [INFO ]  PROGRESS: at sentence #26460000, processed 277434825 words, keeping 4946904 word types
2017-10-25 08:18:20,757 [MainThread  ] [INFO ]  PROGRESS: at sentence #26470000, processed 277544863 words, keeping 4948029 word types
2017-10-25 08:18:21,761 [MainThread  ] [INFO ]  PROGRESS: at sentence #26480000, processed 277650851 words, keeping 4948625 word types
2017-10-25 08:18:22,763 [MainThread  ] [INFO ]  PROGRESS: at sentence #26490000, processed 277754675 words, keeping 4950056 word types
2017-10-25 08:18:23,779 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:19:17,993 [MainThread  ] [INFO ]  PROGRESS: at sentence #27040000, processed 283527985 words, keeping 5020593 word types
2017-10-25 08:19:19,010 [MainThread  ] [INFO ]  PROGRESS: at sentence #27050000, processed 283636194 words, keeping 5021822 word types
2017-10-25 08:19:20,016 [MainThread  ] [INFO ]  PROGRESS: at sentence #27060000, processed 283743099 words, keeping 5022905 word types
2017-10-25 08:19:20,992 [MainThread  ] [INFO ]  PROGRESS: at sentence #27070000, processed 283845330 words, keeping 5024620 word types
2017-10-25 08:19:22,019 [MainThread  ] [INFO ]  PROGRESS: at sentence #27080000, processed 283952119 words, keeping 5025792 word types
2017-10-25 08:19:22,751 [MainThread  ] [INFO ]  PROGRESS: at sentence #27090000, processed 284021627 words, keeping 5027088 word types
2017-10-25 08:19:23,759 [MainThread  ] [INFO ]  PROGRESS: at sentence #27100000, processed 284129610 words, keeping 5028086 word types
2017-10-25 08:19:24,717 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:20:18,440 [MainThread  ] [INFO ]  PROGRESS: at sentence #27650000, processed 289882489 words, keeping 5096528 word types
2017-10-25 08:20:19,467 [MainThread  ] [INFO ]  PROGRESS: at sentence #27660000, processed 289989360 words, keeping 5098020 word types
2017-10-25 08:20:20,457 [MainThread  ] [INFO ]  PROGRESS: at sentence #27670000, processed 290093878 words, keeping 5099294 word types
2017-10-25 08:20:21,425 [MainThread  ] [INFO ]  PROGRESS: at sentence #27680000, processed 290194682 words, keeping 5100088 word types
2017-10-25 08:20:22,505 [MainThread  ] [INFO ]  PROGRESS: at sentence #27690000, processed 290310727 words, keeping 5101183 word types
2017-10-25 08:20:23,464 [MainThread  ] [INFO ]  PROGRESS: at sentence #27700000, processed 290411646 words, keeping 5102142 word types
2017-10-25 08:20:24,442 [MainThread  ] [INFO ]  PROGRESS: at sentence #27710000, processed 290514340 words, keeping 5103564 word types
2017-10-25 08:20:25,423 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:21:19,124 [MainThread  ] [INFO ]  PROGRESS: at sentence #28260000, processed 296276461 words, keeping 5168126 word types
2017-10-25 08:21:20,142 [MainThread  ] [INFO ]  PROGRESS: at sentence #28270000, processed 296385842 words, keeping 5169039 word types
2017-10-25 08:21:21,154 [MainThread  ] [INFO ]  PROGRESS: at sentence #28280000, processed 296492979 words, keeping 5169899 word types
2017-10-25 08:21:22,175 [MainThread  ] [INFO ]  PROGRESS: at sentence #28290000, processed 296599454 words, keeping 5170950 word types
2017-10-25 08:21:23,164 [MainThread  ] [INFO ]  PROGRESS: at sentence #28300000, processed 296702356 words, keeping 5172371 word types
2017-10-25 08:21:24,205 [MainThread  ] [INFO ]  PROGRESS: at sentence #28310000, processed 296810803 words, keeping 5173556 word types
2017-10-25 08:21:25,203 [MainThread  ] [INFO ]  PROGRESS: at sentence #28320000, processed 296916282 words, keeping 5174396 word types
2017-10-25 08:21:26,221 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:22:20,233 [MainThread  ] [INFO ]  PROGRESS: at sentence #28870000, processed 302726849 words, keeping 5238492 word types
2017-10-25 08:22:21,203 [MainThread  ] [INFO ]  PROGRESS: at sentence #28880000, processed 302829740 words, keeping 5239684 word types
2017-10-25 08:22:22,191 [MainThread  ] [INFO ]  PROGRESS: at sentence #28890000, processed 302935790 words, keeping 5240617 word types
2017-10-25 08:22:23,151 [MainThread  ] [INFO ]  PROGRESS: at sentence #28900000, processed 303037194 words, keeping 5241886 word types
2017-10-25 08:22:24,110 [MainThread  ] [INFO ]  PROGRESS: at sentence #28910000, processed 303138142 words, keeping 5243344 word types
2017-10-25 08:22:25,111 [MainThread  ] [INFO ]  PROGRESS: at sentence #28920000, processed 303242932 words, keeping 5244206 word types
2017-10-25 08:22:26,104 [MainThread  ] [INFO ]  PROGRESS: at sentence #28930000, processed 303348222 words, keeping 5245264 word types
2017-10-25 08:22:27,109 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:23:21,066 [MainThread  ] [INFO ]  PROGRESS: at sentence #29480000, processed 309126170 words, keeping 5311972 word types
2017-10-25 08:23:22,055 [MainThread  ] [INFO ]  PROGRESS: at sentence #29490000, processed 309231224 words, keeping 5312793 word types
2017-10-25 08:23:23,080 [MainThread  ] [INFO ]  PROGRESS: at sentence #29500000, processed 309341773 words, keeping 5313665 word types
2017-10-25 08:23:24,120 [MainThread  ] [INFO ]  PROGRESS: at sentence #29510000, processed 309453623 words, keeping 5315194 word types
2017-10-25 08:23:25,181 [MainThread  ] [INFO ]  PROGRESS: at sentence #29520000, processed 309567349 words, keeping 5316619 word types
2017-10-25 08:23:26,136 [MainThread  ] [INFO ]  PROGRESS: at sentence #29530000, processed 309666598 words, keeping 5317863 word types
2017-10-25 08:23:27,111 [MainThread  ] [INFO ]  PROGRESS: at sentence #29540000, processed 309766367 words, keeping 5318919 word types
2017-10-25 08:23:28,184 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:24:22,203 [MainThread  ] [INFO ]  PROGRESS: at sentence #30090000, processed 315483729 words, keeping 5391934 word types
2017-10-25 08:24:23,236 [MainThread  ] [INFO ]  PROGRESS: at sentence #30100000, processed 315595395 words, keeping 5392803 word types
2017-10-25 08:24:24,189 [MainThread  ] [INFO ]  PROGRESS: at sentence #30110000, processed 315696228 words, keeping 5393681 word types
2017-10-25 08:24:25,170 [MainThread  ] [INFO ]  PROGRESS: at sentence #30120000, processed 315800476 words, keeping 5395158 word types
2017-10-25 08:24:26,164 [MainThread  ] [INFO ]  PROGRESS: at sentence #30130000, processed 315904653 words, keeping 5396596 word types
2017-10-25 08:24:27,172 [MainThread  ] [INFO ]  PROGRESS: at sentence #30140000, processed 316010290 words, keeping 5397742 word types
2017-10-25 08:24:28,155 [MainThread  ] [INFO ]  PROGRESS: at sentence #30150000, processed 316112805 words, keeping 5398642 word types
2017-10-25 08:24:29,127 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:25:23,143 [MainThread  ] [INFO ]  PROGRESS: at sentence #30700000, processed 321898212 words, keeping 5463837 word types
2017-10-25 08:25:24,100 [MainThread  ] [INFO ]  PROGRESS: at sentence #30710000, processed 321997302 words, keeping 5464808 word types
2017-10-25 08:25:25,076 [MainThread  ] [INFO ]  PROGRESS: at sentence #30720000, processed 322100523 words, keeping 5465691 word types
2017-10-25 08:25:26,029 [MainThread  ] [INFO ]  PROGRESS: at sentence #30730000, processed 322199381 words, keeping 5466717 word types
2017-10-25 08:25:27,037 [MainThread  ] [INFO ]  PROGRESS: at sentence #30740000, processed 322306611 words, keeping 5467722 word types
2017-10-25 08:25:28,070 [MainThread  ] [INFO ]  PROGRESS: at sentence #30750000, processed 322417561 words, keeping 5468572 word types
2017-10-25 08:25:29,075 [MainThread  ] [INFO ]  PROGRESS: at sentence #30760000, processed 322526028 words, keeping 5469423 word types
2017-10-25 08:25:30,031 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:26:23,437 [MainThread  ] [INFO ]  PROGRESS: at sentence #31310000, processed 328240366 words, keeping 5536236 word types
2017-10-25 08:26:24,438 [MainThread  ] [INFO ]  PROGRESS: at sentence #31320000, processed 328347816 words, keeping 5537750 word types
2017-10-25 08:26:25,436 [MainThread  ] [INFO ]  PROGRESS: at sentence #31330000, processed 328453736 words, keeping 5538872 word types
2017-10-25 08:26:26,453 [MainThread  ] [INFO ]  PROGRESS: at sentence #31340000, processed 328558750 words, keeping 5539618 word types
2017-10-25 08:26:27,446 [MainThread  ] [INFO ]  PROGRESS: at sentence #31350000, processed 328660024 words, keeping 5540569 word types
2017-10-25 08:26:28,395 [MainThread  ] [INFO ]  PROGRESS: at sentence #31360000, processed 328759380 words, keeping 5541794 word types
2017-10-25 08:26:29,354 [MainThread  ] [INFO ]  PROGRESS: at sentence #31370000, processed 328859360 words, keeping 5542735 word types
2017-10-25 08:26:30,363 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:27:25,375 [MainThread  ] [INFO ]  PROGRESS: at sentence #31920000, processed 334689425 words, keeping 5608600 word types
2017-10-25 08:27:26,402 [MainThread  ] [INFO ]  PROGRESS: at sentence #31930000, processed 334793089 words, keeping 5609838 word types
2017-10-25 08:27:27,451 [MainThread  ] [INFO ]  PROGRESS: at sentence #31940000, processed 334900626 words, keeping 5611100 word types
2017-10-25 08:27:28,515 [MainThread  ] [INFO ]  PROGRESS: at sentence #31950000, processed 335010078 words, keeping 5612147 word types
2017-10-25 08:27:29,558 [MainThread  ] [INFO ]  PROGRESS: at sentence #31960000, processed 335116881 words, keeping 5613769 word types
2017-10-25 08:27:30,635 [MainThread  ] [INFO ]  PROGRESS: at sentence #31970000, processed 335226038 words, keeping 5615281 word types
2017-10-25 08:27:31,643 [MainThread  ] [INFO ]  PROGRESS: at sentence #31980000, processed 335327874 words, keeping 5616990 word types
2017-10-25 08:27:32,654 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:28:28,488 [MainThread  ] [INFO ]  PROGRESS: at sentence #32530000, processed 341102646 words, keeping 5676483 word types
2017-10-25 08:28:29,445 [MainThread  ] [INFO ]  PROGRESS: at sentence #32540000, processed 341196951 words, keeping 5677587 word types
2017-10-25 08:28:30,511 [MainThread  ] [INFO ]  PROGRESS: at sentence #32550000, processed 341305432 words, keeping 5679282 word types
2017-10-25 08:28:31,585 [MainThread  ] [INFO ]  PROGRESS: at sentence #32560000, processed 341413806 words, keeping 5680640 word types
2017-10-25 08:28:32,614 [MainThread  ] [INFO ]  PROGRESS: at sentence #32570000, processed 341516608 words, keeping 5681479 word types
2017-10-25 08:28:33,680 [MainThread  ] [INFO ]  PROGRESS: at sentence #32580000, processed 341624946 words, keeping 5682547 word types
2017-10-25 08:28:34,748 [MainThread  ] [INFO ]  PROGRESS: at sentence #32590000, processed 341732326 words, keeping 5683809 word types
2017-10-25 08:28:35,785 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:29:31,616 [MainThread  ] [INFO ]  PROGRESS: at sentence #33140000, processed 347484350 words, keeping 5744654 word types
2017-10-25 08:29:32,572 [MainThread  ] [INFO ]  PROGRESS: at sentence #33150000, processed 347575669 words, keeping 5746034 word types
2017-10-25 08:29:33,593 [MainThread  ] [INFO ]  PROGRESS: at sentence #33160000, processed 347679218 words, keeping 5746981 word types
2017-10-25 08:29:34,613 [MainThread  ] [INFO ]  PROGRESS: at sentence #33170000, processed 347781872 words, keeping 5748050 word types
2017-10-25 08:29:35,684 [MainThread  ] [INFO ]  PROGRESS: at sentence #33180000, processed 347892899 words, keeping 5749183 word types
2017-10-25 08:29:36,762 [MainThread  ] [INFO ]  PROGRESS: at sentence #33190000, processed 348004388 words, keeping 5750717 word types
2017-10-25 08:29:37,726 [MainThread  ] [INFO ]  PROGRESS: at sentence #33200000, processed 348094659 words, keeping 5751985 word types
2017-10-25 08:29:38,724 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:30:35,548 [MainThread  ] [INFO ]  PROGRESS: at sentence #33750000, processed 353873444 words, keeping 5815898 word types
2017-10-25 08:30:36,568 [MainThread  ] [INFO ]  PROGRESS: at sentence #33760000, processed 353976201 words, keeping 5817128 word types
2017-10-25 08:30:37,646 [MainThread  ] [INFO ]  PROGRESS: at sentence #33770000, processed 354086830 words, keeping 5818073 word types
2017-10-25 08:30:38,690 [MainThread  ] [INFO ]  PROGRESS: at sentence #33780000, processed 354193792 words, keeping 5819345 word types
2017-10-25 08:30:39,791 [MainThread  ] [INFO ]  PROGRESS: at sentence #33790000, processed 354305508 words, keeping 5820162 word types
2017-10-25 08:30:40,898 [MainThread  ] [INFO ]  PROGRESS: at sentence #33800000, processed 354420912 words, keeping 5821188 word types
2017-10-25 08:30:41,968 [MainThread  ] [INFO ]  PROGRESS: at sentence #33810000, processed 354529165 words, keeping 5822039 word types
2017-10-25 08:30:43,019 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:31:39,148 [MainThread  ] [INFO ]  PROGRESS: at sentence #34360000, processed 360311159 words, keeping 5884957 word types
2017-10-25 08:31:40,195 [MainThread  ] [INFO ]  PROGRESS: at sentence #34370000, processed 360414471 words, keeping 5886634 word types
2017-10-25 08:31:41,198 [MainThread  ] [INFO ]  PROGRESS: at sentence #34380000, processed 360515033 words, keeping 5887733 word types
2017-10-25 08:31:42,255 [MainThread  ] [INFO ]  PROGRESS: at sentence #34390000, processed 360623331 words, keeping 5889101 word types
2017-10-25 08:31:43,218 [MainThread  ] [INFO ]  PROGRESS: at sentence #34400000, processed 360717815 words, keeping 5890864 word types
2017-10-25 08:31:44,254 [MainThread  ] [INFO ]  PROGRESS: at sentence #34410000, processed 360820995 words, keeping 5891515 word types
2017-10-25 08:31:45,384 [MainThread  ] [INFO ]  PROGRESS: at sentence #34420000, processed 360937345 words, keeping 5893609 word types
2017-10-25 08:31:46,452 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:32:42,691 [MainThread  ] [INFO ]  PROGRESS: at sentence #34970000, processed 366730251 words, keeping 5955821 word types
2017-10-25 08:32:43,737 [MainThread  ] [INFO ]  PROGRESS: at sentence #34980000, processed 366835829 words, keeping 5956994 word types
2017-10-25 08:32:44,744 [MainThread  ] [INFO ]  PROGRESS: at sentence #34990000, processed 366937473 words, keeping 5957889 word types
2017-10-25 08:32:45,763 [MainThread  ] [INFO ]  PROGRESS: at sentence #35000000, processed 367038775 words, keeping 5959055 word types
2017-10-25 08:32:46,804 [MainThread  ] [INFO ]  PROGRESS: at sentence #35010000, processed 367143321 words, keeping 5959938 word types
2017-10-25 08:32:47,788 [MainThread  ] [INFO ]  PROGRESS: at sentence #35020000, processed 367242332 words, keeping 5960845 word types
2017-10-25 08:32:48,888 [MainThread  ] [INFO ]  PROGRESS: at sentence #35030000, processed 367354463 words, keeping 5961749 word types
2017-10-25 08:32:49,937 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:33:45,739 [MainThread  ] [INFO ]  PROGRESS: at sentence #35580000, processed 373072890 words, keeping 6027130 word types
2017-10-25 08:33:46,764 [MainThread  ] [INFO ]  PROGRESS: at sentence #35590000, processed 373176406 words, keeping 6028676 word types
2017-10-25 08:33:47,856 [MainThread  ] [INFO ]  PROGRESS: at sentence #35600000, processed 373288615 words, keeping 6029596 word types
2017-10-25 08:33:48,890 [MainThread  ] [INFO ]  PROGRESS: at sentence #35610000, processed 373391622 words, keeping 6030636 word types
2017-10-25 08:33:49,907 [MainThread  ] [INFO ]  PROGRESS: at sentence #35620000, processed 373495560 words, keeping 6031507 word types
2017-10-25 08:33:50,949 [MainThread  ] [INFO ]  PROGRESS: at sentence #35630000, processed 373599647 words, keeping 6032592 word types
2017-10-25 08:33:52,017 [MainThread  ] [INFO ]  PROGRESS: at sentence #35640000, processed 373707620 words, keeping 6034055 word types
2017-10-25 08:33:53,102 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:34:49,787 [MainThread  ] [INFO ]  PROGRESS: at sentence #36190000, processed 379514917 words, keeping 6095863 word types
2017-10-25 08:34:50,839 [MainThread  ] [INFO ]  PROGRESS: at sentence #36200000, processed 379621756 words, keeping 6096695 word types
2017-10-25 08:34:51,881 [MainThread  ] [INFO ]  PROGRESS: at sentence #36210000, processed 379727900 words, keeping 6098035 word types
2017-10-25 08:34:52,967 [MainThread  ] [INFO ]  PROGRESS: at sentence #36220000, processed 379840893 words, keeping 6099133 word types
2017-10-25 08:34:53,989 [MainThread  ] [INFO ]  PROGRESS: at sentence #36230000, processed 379943844 words, keeping 6099879 word types
2017-10-25 08:34:55,007 [MainThread  ] [INFO ]  PROGRESS: at sentence #36240000, processed 380046844 words, keeping 6101328 word types
2017-10-25 08:34:56,016 [MainThread  ] [INFO ]  PROGRESS: at sentence #36250000, processed 380147022 words, keeping 6102512 word types
2017-10-25 08:34:57,080 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:35:53,161 [MainThread  ] [INFO ]  PROGRESS: at sentence #36800000, processed 385892683 words, keeping 6162666 word types
2017-10-25 08:35:54,243 [MainThread  ] [INFO ]  PROGRESS: at sentence #36810000, processed 386000331 words, keeping 6163643 word types
2017-10-25 08:35:55,269 [MainThread  ] [INFO ]  PROGRESS: at sentence #36820000, processed 386103605 words, keeping 6164741 word types
2017-10-25 08:35:56,328 [MainThread  ] [INFO ]  PROGRESS: at sentence #36830000, processed 386212031 words, keeping 6165809 word types
2017-10-25 08:35:57,400 [MainThread  ] [INFO ]  PROGRESS: at sentence #36840000, processed 386313845 words, keeping 6167448 word types
2017-10-25 08:35:58,459 [MainThread  ] [INFO ]  PROGRESS: at sentence #36850000, processed 386421675 words, keeping 6168522 word types
2017-10-25 08:35:59,521 [MainThread  ] [INFO ]  PROGRESS: at sentence #36860000, processed 386529414 words, keeping 6169611 word types
2017-10-25 08:36:00,504 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:36:56,480 [MainThread  ] [INFO ]  PROGRESS: at sentence #37410000, processed 392248195 words, keeping 6234080 word types
2017-10-25 08:36:57,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #37420000, processed 392351404 words, keeping 6235037 word types
2017-10-25 08:36:58,565 [MainThread  ] [INFO ]  PROGRESS: at sentence #37430000, processed 392458274 words, keeping 6235899 word types
2017-10-25 08:36:59,582 [MainThread  ] [INFO ]  PROGRESS: at sentence #37440000, processed 392560900 words, keeping 6236704 word types
2017-10-25 08:37:00,636 [MainThread  ] [INFO ]  PROGRESS: at sentence #37450000, processed 392668223 words, keeping 6237432 word types
2017-10-25 08:37:01,701 [MainThread  ] [INFO ]  PROGRESS: at sentence #37460000, processed 392776407 words, keeping 6238432 word types
2017-10-25 08:37:02,789 [MainThread  ] [INFO ]  PROGRESS: at sentence #37470000, processed 392887945 words, keeping 6239370 word types
2017-10-25 08:37:03,860 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:38:00,121 [MainThread  ] [INFO ]  PROGRESS: at sentence #38020000, processed 398650428 words, keeping 6296933 word types
2017-10-25 08:38:01,130 [MainThread  ] [INFO ]  PROGRESS: at sentence #38030000, processed 398750935 words, keeping 6297885 word types
2017-10-25 08:38:02,182 [MainThread  ] [INFO ]  PROGRESS: at sentence #38040000, processed 398854992 words, keeping 6299486 word types
2017-10-25 08:38:03,205 [MainThread  ] [INFO ]  PROGRESS: at sentence #38050000, processed 398959478 words, keeping 6300781 word types
2017-10-25 08:38:04,144 [MainThread  ] [INFO ]  PROGRESS: at sentence #38060000, processed 399051479 words, keeping 6301355 word types
2017-10-25 08:38:05,152 [MainThread  ] [INFO ]  PROGRESS: at sentence #38070000, processed 399152459 words, keeping 6302311 word types
2017-10-25 08:38:06,175 [MainThread  ] [INFO ]  PROGRESS: at sentence #38080000, processed 399253112 words, keeping 6303668 word types
2017-10-25 08:38:07,235 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:39:03,688 [MainThread  ] [INFO ]  PROGRESS: at sentence #38630000, processed 405027965 words, keeping 6366782 word types
2017-10-25 08:39:04,750 [MainThread  ] [INFO ]  PROGRESS: at sentence #38640000, processed 405136977 words, keeping 6367918 word types
2017-10-25 08:39:05,801 [MainThread  ] [INFO ]  PROGRESS: at sentence #38650000, processed 405243817 words, keeping 6368909 word types
2017-10-25 08:39:06,765 [MainThread  ] [INFO ]  PROGRESS: at sentence #38660000, processed 405337144 words, keeping 6370102 word types
2017-10-25 08:39:07,798 [MainThread  ] [INFO ]  PROGRESS: at sentence #38670000, processed 405440390 words, keeping 6371251 word types
2017-10-25 08:39:08,854 [MainThread  ] [INFO ]  PROGRESS: at sentence #38680000, processed 405547541 words, keeping 6372260 word types
2017-10-25 08:39:09,870 [MainThread  ] [INFO ]  PROGRESS: at sentence #38690000, processed 405649628 words, keeping 6373622 word types
2017-10-25 08:39:10,959 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:40:07,671 [MainThread  ] [INFO ]  PROGRESS: at sentence #39240000, processed 411386416 words, keeping 6434915 word types
2017-10-25 08:40:08,745 [MainThread  ] [INFO ]  PROGRESS: at sentence #39250000, processed 411494809 words, keeping 6436018 word types
2017-10-25 08:40:09,791 [MainThread  ] [INFO ]  PROGRESS: at sentence #39260000, processed 411600009 words, keeping 6437281 word types
2017-10-25 08:40:10,823 [MainThread  ] [INFO ]  PROGRESS: at sentence #39270000, processed 411703202 words, keeping 6438166 word types
2017-10-25 08:40:11,894 [MainThread  ] [INFO ]  PROGRESS: at sentence #39280000, processed 411810705 words, keeping 6439398 word types
2017-10-25 08:40:12,933 [MainThread  ] [INFO ]  PROGRESS: at sentence #39290000, processed 411909695 words, keeping 6440547 word types
2017-10-25 08:40:13,984 [MainThread  ] [INFO ]  PROGRESS: at sentence #39300000, processed 412013553 words, keeping 6441701 word types
2017-10-25 08:40:15,011 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:41:11,352 [MainThread  ] [INFO ]  PROGRESS: at sentence #39850000, processed 417780017 words, keeping 6503146 word types
2017-10-25 08:41:12,416 [MainThread  ] [INFO ]  PROGRESS: at sentence #39860000, processed 417887674 words, keeping 6503871 word types
2017-10-25 08:41:13,457 [MainThread  ] [INFO ]  PROGRESS: at sentence #39870000, processed 417987908 words, keeping 6504565 word types
2017-10-25 08:41:14,492 [MainThread  ] [INFO ]  PROGRESS: at sentence #39880000, processed 418091715 words, keeping 6505597 word types
2017-10-25 08:41:15,512 [MainThread  ] [INFO ]  PROGRESS: at sentence #39890000, processed 418195140 words, keeping 6506647 word types
2017-10-25 08:41:16,597 [MainThread  ] [INFO ]  PROGRESS: at sentence #39900000, processed 418305357 words, keeping 6508506 word types
2017-10-25 08:41:17,653 [MainThread  ] [INFO ]  PROGRESS: at sentence #39910000, processed 418414129 words, keeping 6509631 word types
2017-10-25 08:41:18,655 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:42:15,453 [MainThread  ] [INFO ]  PROGRESS: at sentence #40460000, processed 424221739 words, keeping 6568948 word types
2017-10-25 08:42:16,487 [MainThread  ] [INFO ]  PROGRESS: at sentence #40470000, processed 424325151 words, keeping 6569606 word types
2017-10-25 08:42:17,541 [MainThread  ] [INFO ]  PROGRESS: at sentence #40480000, processed 424431947 words, keeping 6570411 word types
2017-10-25 08:42:18,546 [MainThread  ] [INFO ]  PROGRESS: at sentence #40490000, processed 424531835 words, keeping 6571234 word types
2017-10-25 08:42:19,599 [MainThread  ] [INFO ]  PROGRESS: at sentence #40500000, processed 424638647 words, keeping 6572083 word types
2017-10-25 08:42:20,640 [MainThread  ] [INFO ]  PROGRESS: at sentence #40510000, processed 424744710 words, keeping 6572874 word types
2017-10-25 08:42:21,717 [MainThread  ] [INFO ]  PROGRESS: at sentence #40520000, processed 424854525 words, keeping 6573899 word types
2017-10-25 08:42:22,734 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:43:19,213 [MainThread  ] [INFO ]  PROGRESS: at sentence #41070000, processed 430625726 words, keeping 6630443 word types
2017-10-25 08:43:20,259 [MainThread  ] [INFO ]  PROGRESS: at sentence #41080000, processed 430731232 words, keeping 6631080 word types
2017-10-25 08:43:21,367 [MainThread  ] [INFO ]  PROGRESS: at sentence #41090000, processed 430844930 words, keeping 6631930 word types
2017-10-25 08:43:22,428 [MainThread  ] [INFO ]  PROGRESS: at sentence #41100000, processed 430954467 words, keeping 6632882 word types
2017-10-25 08:43:23,479 [MainThread  ] [INFO ]  PROGRESS: at sentence #41110000, processed 431060741 words, keeping 6633665 word types
2017-10-25 08:43:24,510 [MainThread  ] [INFO ]  PROGRESS: at sentence #41120000, processed 431164922 words, keeping 6635163 word types
2017-10-25 08:43:25,554 [MainThread  ] [INFO ]  PROGRESS: at sentence #41130000, processed 431269723 words, keeping 6637138 word types
2017-10-25 08:43:26,647 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:44:23,161 [MainThread  ] [INFO ]  PROGRESS: at sentence #41680000, processed 437029392 words, keeping 6693472 word types
2017-10-25 08:44:24,179 [MainThread  ] [INFO ]  PROGRESS: at sentence #41690000, processed 437131549 words, keeping 6694519 word types
2017-10-25 08:44:25,199 [MainThread  ] [INFO ]  PROGRESS: at sentence #41700000, processed 437232966 words, keeping 6695566 word types
2017-10-25 08:44:26,221 [MainThread  ] [INFO ]  PROGRESS: at sentence #41710000, processed 437335631 words, keeping 6696656 word types
2017-10-25 08:44:27,192 [MainThread  ] [INFO ]  PROGRESS: at sentence #41720000, processed 437432236 words, keeping 6697844 word types
2017-10-25 08:44:28,220 [MainThread  ] [INFO ]  PROGRESS: at sentence #41730000, processed 437535191 words, keeping 6698409 word types
2017-10-25 08:44:29,239 [MainThread  ] [INFO ]  PROGRESS: at sentence #41740000, processed 437632206 words, keeping 6699430 word types
2017-10-25 08:44:30,303 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:45:27,101 [MainThread  ] [INFO ]  PROGRESS: at sentence #42290000, processed 443445620 words, keeping 6761553 word types
2017-10-25 08:45:28,108 [MainThread  ] [INFO ]  PROGRESS: at sentence #42300000, processed 443545395 words, keeping 6762717 word types
2017-10-25 08:45:29,162 [MainThread  ] [INFO ]  PROGRESS: at sentence #42310000, processed 443648950 words, keeping 6764362 word types
2017-10-25 08:45:30,185 [MainThread  ] [INFO ]  PROGRESS: at sentence #42320000, processed 443751180 words, keeping 6765202 word types
2017-10-25 08:45:31,250 [MainThread  ] [INFO ]  PROGRESS: at sentence #42330000, processed 443857147 words, keeping 6766129 word types
2017-10-25 08:45:32,325 [MainThread  ] [INFO ]  PROGRESS: at sentence #42340000, processed 443966178 words, keeping 6766928 word types
2017-10-25 08:45:33,407 [MainThread  ] [INFO ]  PROGRESS: at sentence #42350000, processed 444076584 words, keeping 6767752 word types
2017-10-25 08:45:34,416 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:46:31,333 [MainThread  ] [INFO ]  PROGRESS: at sentence #42900000, processed 449770301 words, keeping 6824718 word types
2017-10-25 08:46:32,349 [MainThread  ] [INFO ]  PROGRESS: at sentence #42910000, processed 449869777 words, keeping 6825981 word types
2017-10-25 08:46:33,415 [MainThread  ] [INFO ]  PROGRESS: at sentence #42920000, processed 449977295 words, keeping 6826886 word types
2017-10-25 08:46:34,485 [MainThread  ] [INFO ]  PROGRESS: at sentence #42930000, processed 450086283 words, keeping 6827781 word types
2017-10-25 08:46:35,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #42940000, processed 450188214 words, keeping 6828473 word types
2017-10-25 08:46:36,583 [MainThread  ] [INFO ]  PROGRESS: at sentence #42950000, processed 450297537 words, keeping 6829453 word types
2017-10-25 08:46:37,634 [MainThread  ] [INFO ]  PROGRESS: at sentence #42960000, processed 450403183 words, keeping 6830273 word types
2017-10-25 08:46:38,649 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:47:35,794 [MainThread  ] [INFO ]  PROGRESS: at sentence #43510000, processed 456217109 words, keeping 6884602 word types
2017-10-25 08:47:36,865 [MainThread  ] [INFO ]  PROGRESS: at sentence #43520000, processed 456321901 words, keeping 6886006 word types
2017-10-25 08:47:37,888 [MainThread  ] [INFO ]  PROGRESS: at sentence #43530000, processed 456422828 words, keeping 6887144 word types
2017-10-25 08:47:38,874 [MainThread  ] [INFO ]  PROGRESS: at sentence #43540000, processed 456519750 words, keeping 6888088 word types
2017-10-25 08:47:39,931 [MainThread  ] [INFO ]  PROGRESS: at sentence #43550000, processed 456626367 words, keeping 6888958 word types
2017-10-25 08:47:41,027 [MainThread  ] [INFO ]  PROGRESS: at sentence #43560000, processed 456735235 words, keeping 6889882 word types
2017-10-25 08:47:42,071 [MainThread  ] [INFO ]  PROGRESS: at sentence #43570000, processed 456839979 words, keeping 6891076 word types
2017-10-25 08:47:43,146 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:48:39,747 [MainThread  ] [INFO ]  PROGRESS: at sentence #44120000, processed 462572145 words, keeping 6951896 word types
2017-10-25 08:48:40,775 [MainThread  ] [INFO ]  PROGRESS: at sentence #44130000, processed 462674841 words, keeping 6953122 word types
2017-10-25 08:48:41,904 [MainThread  ] [INFO ]  PROGRESS: at sentence #44140000, processed 462792344 words, keeping 6954190 word types
2017-10-25 08:48:42,965 [MainThread  ] [INFO ]  PROGRESS: at sentence #44150000, processed 462898787 words, keeping 6954976 word types
2017-10-25 08:48:43,917 [MainThread  ] [INFO ]  PROGRESS: at sentence #44160000, processed 462992608 words, keeping 6955600 word types
2017-10-25 08:48:44,949 [MainThread  ] [INFO ]  PROGRESS: at sentence #44170000, processed 463090815 words, keeping 6956980 word types
2017-10-25 08:48:46,027 [MainThread  ] [INFO ]  PROGRESS: at sentence #44180000, processed 463198267 words, keeping 6958173 word types
2017-10-25 08:48:47,069 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:49:44,048 [MainThread  ] [INFO ]  PROGRESS: at sentence #44730000, processed 468976309 words, keeping 7013985 word types
2017-10-25 08:49:45,071 [MainThread  ] [INFO ]  PROGRESS: at sentence #44740000, processed 469074675 words, keeping 7014776 word types
2017-10-25 08:49:46,143 [MainThread  ] [INFO ]  PROGRESS: at sentence #44750000, processed 469181436 words, keeping 7015574 word types
2017-10-25 08:49:47,241 [MainThread  ] [INFO ]  PROGRESS: at sentence #44760000, processed 469292790 words, keeping 7016645 word types
2017-10-25 08:49:48,325 [MainThread  ] [INFO ]  PROGRESS: at sentence #44770000, processed 469401247 words, keeping 7017551 word types
2017-10-25 08:49:49,296 [MainThread  ] [INFO ]  PROGRESS: at sentence #44780000, processed 469492058 words, keeping 7019142 word types
2017-10-25 08:49:50,318 [MainThread  ] [INFO ]  PROGRESS: at sentence #44790000, processed 469590008 words, keeping 7020783 word types
2017-10-25 08:49:51,397 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:50:48,346 [MainThread  ] [INFO ]  PROGRESS: at sentence #45340000, processed 475370668 words, keeping 7078687 word types
2017-10-25 08:50:49,409 [MainThread  ] [INFO ]  PROGRESS: at sentence #45350000, processed 475477694 words, keeping 7079675 word types
2017-10-25 08:50:50,561 [MainThread  ] [INFO ]  PROGRESS: at sentence #45360000, processed 475595725 words, keeping 7080288 word types
2017-10-25 08:50:51,619 [MainThread  ] [INFO ]  PROGRESS: at sentence #45370000, processed 475698350 words, keeping 7081361 word types
2017-10-25 08:50:52,652 [MainThread  ] [INFO ]  PROGRESS: at sentence #45380000, processed 475799830 words, keeping 7082105 word types
2017-10-25 08:50:53,761 [MainThread  ] [INFO ]  PROGRESS: at sentence #45390000, processed 475912746 words, keeping 7082818 word types
2017-10-25 08:50:54,869 [MainThread  ] [INFO ]  PROGRESS: at sentence #45400000, processed 476023772 words, keeping 7083740 word types
2017-10-25 08:50:55,951 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:51:52,586 [MainThread  ] [INFO ]  PROGRESS: at sentence #45950000, processed 481774206 words, keeping 7137502 word types
2017-10-25 08:51:53,680 [MainThread  ] [INFO ]  PROGRESS: at sentence #45960000, processed 481884489 words, keeping 7138569 word types
2017-10-25 08:51:54,732 [MainThread  ] [INFO ]  PROGRESS: at sentence #45970000, processed 481990121 words, keeping 7139692 word types
2017-10-25 08:51:55,771 [MainThread  ] [INFO ]  PROGRESS: at sentence #45980000, processed 482093592 words, keeping 7140560 word types
2017-10-25 08:51:56,852 [MainThread  ] [INFO ]  PROGRESS: at sentence #45990000, processed 482203007 words, keeping 7141716 word types
2017-10-25 08:51:57,978 [MainThread  ] [INFO ]  PROGRESS: at sentence #46000000, processed 482310825 words, keeping 7142520 word types
2017-10-25 08:51:59,032 [MainThread  ] [INFO ]  PROGRESS: at sentence #46010000, processed 482416097 words, keeping 7143162 word types
2017-10-25 08:52:00,079 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:52:56,963 [MainThread  ] [INFO ]  PROGRESS: at sentence #46560000, processed 488190217 words, keeping 7193456 word types
2017-10-25 08:52:57,997 [MainThread  ] [INFO ]  PROGRESS: at sentence #46570000, processed 488292574 words, keeping 7194269 word types
2017-10-25 08:52:59,056 [MainThread  ] [INFO ]  PROGRESS: at sentence #46580000, processed 488397181 words, keeping 7195584 word types
2017-10-25 08:53:00,045 [MainThread  ] [INFO ]  PROGRESS: at sentence #46590000, processed 488493055 words, keeping 7196601 word types
2017-10-25 08:53:01,805 [MainThread  ] [INFO ]  PROGRESS: at sentence #46600000, processed 488597318 words, keeping 7198320 word types
2017-10-25 08:53:02,932 [MainThread  ] [INFO ]  PROGRESS: at sentence #46610000, processed 488707466 words, keeping 7199586 word types
2017-10-25 08:53:03,983 [MainThread  ] [INFO ]  PROGRESS: at sentence #46620000, processed 488812119 words, keeping 7200300 word types
2017-10-25 08:53:05,030 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:54:02,136 [MainThread  ] [INFO ]  PROGRESS: at sentence #47170000, processed 494582134 words, keeping 7251914 word types
2017-10-25 08:54:03,235 [MainThread  ] [INFO ]  PROGRESS: at sentence #47180000, processed 494689857 words, keeping 7253201 word types
2017-10-25 08:54:04,293 [MainThread  ] [INFO ]  PROGRESS: at sentence #47190000, processed 494796624 words, keeping 7254260 word types
2017-10-25 08:54:05,386 [MainThread  ] [INFO ]  PROGRESS: at sentence #47200000, processed 494906921 words, keeping 7255339 word types
2017-10-25 08:54:06,447 [MainThread  ] [INFO ]  PROGRESS: at sentence #47210000, processed 495011652 words, keeping 7256448 word types
2017-10-25 08:54:07,550 [MainThread  ] [INFO ]  PROGRESS: at sentence #47220000, processed 495122128 words, keeping 7257503 word types
2017-10-25 08:54:08,575 [MainThread  ] [INFO ]  PROGRESS: at sentence #47230000, processed 495223648 words, keeping 7258482 word types
2017-10-25 08:54:09,612 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:55:07,000 [MainThread  ] [INFO ]  PROGRESS: at sentence #47780000, processed 501037594 words, keeping 7312417 word types
2017-10-25 08:55:08,027 [MainThread  ] [INFO ]  PROGRESS: at sentence #47790000, processed 501140524 words, keeping 7313072 word types
2017-10-25 08:55:08,920 [MainThread  ] [INFO ]  PROGRESS: at sentence #47800000, processed 501225550 words, keeping 7314365 word types
2017-10-25 08:55:09,943 [MainThread  ] [INFO ]  PROGRESS: at sentence #47810000, processed 501327285 words, keeping 7315361 word types
2017-10-25 08:55:11,009 [MainThread  ] [INFO ]  PROGRESS: at sentence #47820000, processed 501434175 words, keeping 7316336 word types
2017-10-25 08:55:12,059 [MainThread  ] [INFO ]  PROGRESS: at sentence #47830000, processed 501538250 words, keeping 7317347 word types
2017-10-25 08:55:13,119 [MainThread  ] [INFO ]  PROGRESS: at sentence #47840000, processed 501644920 words, keeping 7318094 word types
2017-10-25 08:55:14,166 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:56:11,182 [MainThread  ] [INFO ]  PROGRESS: at sentence #48390000, processed 507413677 words, keeping 7374525 word types
2017-10-25 08:56:12,242 [MainThread  ] [INFO ]  PROGRESS: at sentence #48400000, processed 507518398 words, keeping 7375952 word types
2017-10-25 08:56:13,271 [MainThread  ] [INFO ]  PROGRESS: at sentence #48410000, processed 507620482 words, keeping 7377075 word types
2017-10-25 08:56:14,351 [MainThread  ] [INFO ]  PROGRESS: at sentence #48420000, processed 507729076 words, keeping 7378059 word types
2017-10-25 08:56:15,424 [MainThread  ] [INFO ]  PROGRESS: at sentence #48430000, processed 507836384 words, keeping 7378680 word types
2017-10-25 08:56:16,460 [MainThread  ] [INFO ]  PROGRESS: at sentence #48440000, processed 507939074 words, keeping 7379382 word types
2017-10-25 08:56:17,509 [MainThread  ] [INFO ]  PROGRESS: at sentence #48450000, processed 508041850 words, keeping 7380647 word types
2017-10-25 08:56:18,524 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:57:15,356 [MainThread  ] [INFO ]  PROGRESS: at sentence #49000000, processed 513753232 words, keeping 7434864 word types
2017-10-25 08:57:16,427 [MainThread  ] [INFO ]  PROGRESS: at sentence #49010000, processed 513860181 words, keeping 7435760 word types
2017-10-25 08:57:17,512 [MainThread  ] [INFO ]  PROGRESS: at sentence #49020000, processed 513968864 words, keeping 7436726 word types
2017-10-25 08:57:18,546 [MainThread  ] [INFO ]  PROGRESS: at sentence #49030000, processed 514071666 words, keeping 7437673 word types
2017-10-25 08:57:19,573 [MainThread  ] [INFO ]  PROGRESS: at sentence #49040000, processed 514173167 words, keeping 7438429 word types
2017-10-25 08:57:20,665 [MainThread  ] [INFO ]  PROGRESS: at sentence #49050000, processed 514283114 words, keeping 7439369 word types
2017-10-25 08:57:21,707 [MainThread  ] [INFO ]  PROGRESS: at sentence #49060000, processed 514386180 words, keeping 7440621 word types
2017-10-25 08:57:22,699 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:58:20,167 [MainThread  ] [INFO ]  PROGRESS: at sentence #49610000, processed 520162839 words, keeping 7496083 word types
2017-10-25 08:58:21,238 [MainThread  ] [INFO ]  PROGRESS: at sentence #49620000, processed 520268987 words, keeping 7496693 word types
2017-10-25 08:58:22,334 [MainThread  ] [INFO ]  PROGRESS: at sentence #49630000, processed 520380215 words, keeping 7497788 word types
2017-10-25 08:58:23,341 [MainThread  ] [INFO ]  PROGRESS: at sentence #49640000, processed 520479705 words, keeping 7498690 word types
2017-10-25 08:58:24,368 [MainThread  ] [INFO ]  PROGRESS: at sentence #49650000, processed 520581182 words, keeping 7499669 word types
2017-10-25 08:58:25,433 [MainThread  ] [INFO ]  PROGRESS: at sentence #49660000, processed 520686807 words, keeping 7500441 word types
2017-10-25 08:58:26,473 [MainThread  ] [INFO ]  PROGRESS: at sentence #49670000, processed 520789865 words, keeping 7501501 word types
2017-10-25 08:58:27,481 [MainThread  ] [INFO ]  PROGRES

2017-10-25 08:59:24,683 [MainThread  ] [INFO ]  PROGRESS: at sentence #50220000, processed 526538135 words, keeping 7554693 word types
2017-10-25 08:59:25,768 [MainThread  ] [INFO ]  PROGRESS: at sentence #50230000, processed 526647890 words, keeping 7555439 word types
2017-10-25 08:59:26,874 [MainThread  ] [INFO ]  PROGRESS: at sentence #50240000, processed 526758038 words, keeping 7556296 word types
2017-10-25 08:59:27,932 [MainThread  ] [INFO ]  PROGRESS: at sentence #50250000, processed 526864183 words, keeping 7557087 word types
2017-10-25 08:59:28,953 [MainThread  ] [INFO ]  PROGRESS: at sentence #50260000, processed 526965367 words, keeping 7557921 word types
2017-10-25 08:59:30,055 [MainThread  ] [INFO ]  PROGRESS: at sentence #50270000, processed 527075967 words, keeping 7558571 word types
2017-10-25 08:59:31,113 [MainThread  ] [INFO ]  PROGRESS: at sentence #50280000, processed 527181315 words, keeping 7559493 word types
2017-10-25 08:59:32,233 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:00:29,451 [MainThread  ] [INFO ]  PROGRESS: at sentence #50830000, processed 532969633 words, keeping 7610598 word types
2017-10-25 09:00:30,520 [MainThread  ] [INFO ]  PROGRESS: at sentence #50840000, processed 533076832 words, keeping 7611257 word types
2017-10-25 09:00:31,542 [MainThread  ] [INFO ]  PROGRESS: at sentence #50850000, processed 533177224 words, keeping 7612121 word types
2017-10-25 09:00:32,522 [MainThread  ] [INFO ]  PROGRESS: at sentence #50860000, processed 533272686 words, keeping 7614093 word types
2017-10-25 09:00:33,554 [MainThread  ] [INFO ]  PROGRESS: at sentence #50870000, processed 533374433 words, keeping 7614753 word types
2017-10-25 09:00:34,657 [MainThread  ] [INFO ]  PROGRESS: at sentence #50880000, processed 533484397 words, keeping 7615503 word types
2017-10-25 09:00:35,674 [MainThread  ] [INFO ]  PROGRESS: at sentence #50890000, processed 533584674 words, keeping 7616494 word types
2017-10-25 09:00:36,743 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:01:34,046 [MainThread  ] [INFO ]  PROGRESS: at sentence #51440000, processed 539343714 words, keeping 7672201 word types
2017-10-25 09:01:35,102 [MainThread  ] [INFO ]  PROGRESS: at sentence #51450000, processed 539446172 words, keeping 7673314 word types
2017-10-25 09:01:36,150 [MainThread  ] [INFO ]  PROGRESS: at sentence #51460000, processed 539549204 words, keeping 7674021 word types
2017-10-25 09:01:37,224 [MainThread  ] [INFO ]  PROGRESS: at sentence #51470000, processed 539657310 words, keeping 7674526 word types
2017-10-25 09:01:38,256 [MainThread  ] [INFO ]  PROGRESS: at sentence #51480000, processed 539758493 words, keeping 7675310 word types
2017-10-25 09:01:39,335 [MainThread  ] [INFO ]  PROGRESS: at sentence #51490000, processed 539866402 words, keeping 7676734 word types
2017-10-25 09:01:40,389 [MainThread  ] [INFO ]  PROGRESS: at sentence #51500000, processed 539971336 words, keeping 7677963 word types
2017-10-25 09:01:41,456 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:02:39,887 [MainThread  ] [INFO ]  PROGRESS: at sentence #52050000, processed 545789828 words, keeping 7726677 word types
2017-10-25 09:02:40,944 [MainThread  ] [INFO ]  PROGRESS: at sentence #52060000, processed 545892817 words, keeping 7727521 word types
2017-10-25 09:02:41,974 [MainThread  ] [INFO ]  PROGRESS: at sentence #52070000, processed 545994692 words, keeping 7728453 word types
2017-10-25 09:02:43,021 [MainThread  ] [INFO ]  PROGRESS: at sentence #52080000, processed 546097252 words, keeping 7729726 word types
2017-10-25 09:02:44,056 [MainThread  ] [INFO ]  PROGRESS: at sentence #52090000, processed 546199776 words, keeping 7730378 word types
2017-10-25 09:02:45,115 [MainThread  ] [INFO ]  PROGRESS: at sentence #52100000, processed 546302296 words, keeping 7730836 word types
2017-10-25 09:02:46,162 [MainThread  ] [INFO ]  PROGRESS: at sentence #52110000, processed 546403505 words, keeping 7731933 word types
2017-10-25 09:02:47,235 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:03:44,674 [MainThread  ] [INFO ]  PROGRESS: at sentence #52660000, processed 552190782 words, keeping 7784143 word types
2017-10-25 09:03:45,708 [MainThread  ] [INFO ]  PROGRESS: at sentence #52670000, processed 552292611 words, keeping 7785214 word types
2017-10-25 09:03:46,790 [MainThread  ] [INFO ]  PROGRESS: at sentence #52680000, processed 552401393 words, keeping 7785957 word types
2017-10-25 09:03:47,897 [MainThread  ] [INFO ]  PROGRESS: at sentence #52690000, processed 552512440 words, keeping 7786560 word types
2017-10-25 09:03:48,892 [MainThread  ] [INFO ]  PROGRESS: at sentence #52700000, processed 552610173 words, keeping 7787431 word types
2017-10-25 09:03:50,079 [MainThread  ] [INFO ]  PROGRESS: at sentence #52710000, processed 552728512 words, keeping 7788984 word types
2017-10-25 09:03:51,133 [MainThread  ] [INFO ]  PROGRESS: at sentence #52720000, processed 552830526 words, keeping 7790612 word types
2017-10-25 09:03:52,191 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:04:49,271 [MainThread  ] [INFO ]  PROGRESS: at sentence #53270000, processed 558548349 words, keeping 7841120 word types
2017-10-25 09:04:50,363 [MainThread  ] [INFO ]  PROGRESS: at sentence #53280000, processed 558652915 words, keeping 7842749 word types
2017-10-25 09:04:51,471 [MainThread  ] [INFO ]  PROGRESS: at sentence #53290000, processed 558758693 words, keeping 7843750 word types
2017-10-25 09:04:52,518 [MainThread  ] [INFO ]  PROGRESS: at sentence #53300000, processed 558857444 words, keeping 7845025 word types
2017-10-25 09:04:53,571 [MainThread  ] [INFO ]  PROGRESS: at sentence #53310000, processed 558958263 words, keeping 7845823 word types
2017-10-25 09:04:54,630 [MainThread  ] [INFO ]  PROGRESS: at sentence #53320000, processed 559062918 words, keeping 7846468 word types
2017-10-25 09:04:55,717 [MainThread  ] [INFO ]  PROGRESS: at sentence #53330000, processed 559169173 words, keeping 7847621 word types
2017-10-25 09:04:56,766 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:05:54,184 [MainThread  ] [INFO ]  PROGRESS: at sentence #53880000, processed 564963082 words, keeping 7897180 word types
2017-10-25 09:05:55,235 [MainThread  ] [INFO ]  PROGRESS: at sentence #53890000, processed 565066538 words, keeping 7898402 word types
2017-10-25 09:05:56,312 [MainThread  ] [INFO ]  PROGRESS: at sentence #53900000, processed 565173440 words, keeping 7899299 word types
2017-10-25 09:05:57,352 [MainThread  ] [INFO ]  PROGRESS: at sentence #53910000, processed 565274089 words, keeping 7900350 word types
2017-10-25 09:05:58,414 [MainThread  ] [INFO ]  PROGRESS: at sentence #53920000, processed 565376426 words, keeping 7902057 word types
2017-10-25 09:05:59,448 [MainThread  ] [INFO ]  PROGRESS: at sentence #53930000, processed 565479038 words, keeping 7902672 word types
2017-10-25 09:06:00,521 [MainThread  ] [INFO ]  PROGRESS: at sentence #53940000, processed 565584740 words, keeping 7903375 word types
2017-10-25 09:06:01,626 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:06:59,059 [MainThread  ] [INFO ]  PROGRESS: at sentence #54490000, processed 571364050 words, keeping 7956854 word types
2017-10-25 09:07:00,183 [MainThread  ] [INFO ]  PROGRESS: at sentence #54500000, processed 571475316 words, keeping 7957519 word types
2017-10-25 09:07:01,216 [MainThread  ] [INFO ]  PROGRESS: at sentence #54510000, processed 571573179 words, keeping 7958704 word types
2017-10-25 09:07:02,293 [MainThread  ] [INFO ]  PROGRESS: at sentence #54520000, processed 571679000 words, keeping 7959945 word types
2017-10-25 09:07:03,396 [MainThread  ] [INFO ]  PROGRESS: at sentence #54530000, processed 571785396 words, keeping 7961017 word types
2017-10-25 09:07:04,446 [MainThread  ] [INFO ]  PROGRESS: at sentence #54540000, processed 571886320 words, keeping 7961819 word types
2017-10-25 09:07:05,496 [MainThread  ] [INFO ]  PROGRESS: at sentence #54550000, processed 571985713 words, keeping 7962535 word types
2017-10-25 09:07:06,509 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:08:04,625 [MainThread  ] [INFO ]  PROGRESS: at sentence #55100000, processed 577851386 words, keeping 8016766 word types
2017-10-25 09:08:05,718 [MainThread  ] [INFO ]  PROGRESS: at sentence #55110000, processed 577960943 words, keeping 8017635 word types
2017-10-25 09:08:06,837 [MainThread  ] [INFO ]  PROGRESS: at sentence #55120000, processed 578072117 words, keeping 8018771 word types
2017-10-25 09:08:07,913 [MainThread  ] [INFO ]  PROGRESS: at sentence #55130000, processed 578177541 words, keeping 8019588 word types
2017-10-25 09:08:09,043 [MainThread  ] [INFO ]  PROGRESS: at sentence #55140000, processed 578291499 words, keeping 8020695 word types
2017-10-25 09:08:10,020 [MainThread  ] [INFO ]  PROGRESS: at sentence #55150000, processed 578385045 words, keeping 8022359 word types
2017-10-25 09:08:11,103 [MainThread  ] [INFO ]  PROGRESS: at sentence #55160000, processed 578492442 words, keeping 8023203 word types
2017-10-25 09:08:12,164 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:09:10,034 [MainThread  ] [INFO ]  PROGRESS: at sentence #55710000, processed 584310902 words, keeping 8076025 word types
2017-10-25 09:09:11,109 [MainThread  ] [INFO ]  PROGRESS: at sentence #55720000, processed 584418214 words, keeping 8076773 word types
2017-10-25 09:09:12,154 [MainThread  ] [INFO ]  PROGRESS: at sentence #55730000, processed 584519577 words, keeping 8077454 word types
2017-10-25 09:09:13,240 [MainThread  ] [INFO ]  PROGRESS: at sentence #55740000, processed 584629001 words, keeping 8078402 word types
2017-10-25 09:09:14,296 [MainThread  ] [INFO ]  PROGRESS: at sentence #55750000, processed 584733482 words, keeping 8079377 word types
2017-10-25 09:09:15,344 [MainThread  ] [INFO ]  PROGRESS: at sentence #55760000, processed 584837486 words, keeping 8080349 word types
2017-10-25 09:09:16,405 [MainThread  ] [INFO ]  PROGRESS: at sentence #55770000, processed 584942913 words, keeping 8080996 word types
2017-10-25 09:09:17,507 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:10:15,144 [MainThread  ] [INFO ]  PROGRESS: at sentence #56320000, processed 590733535 words, keeping 8134496 word types
2017-10-25 09:10:16,176 [MainThread  ] [INFO ]  PROGRESS: at sentence #56330000, processed 590835150 words, keeping 8135162 word types
2017-10-25 09:10:17,245 [MainThread  ] [INFO ]  PROGRESS: at sentence #56340000, processed 590940208 words, keeping 8136020 word types
2017-10-25 09:10:18,298 [MainThread  ] [INFO ]  PROGRESS: at sentence #56350000, processed 591043880 words, keeping 8136686 word types
2017-10-25 09:10:19,395 [MainThread  ] [INFO ]  PROGRESS: at sentence #56360000, processed 591154220 words, keeping 8138208 word types
2017-10-25 09:10:20,465 [MainThread  ] [INFO ]  PROGRESS: at sentence #56370000, processed 591260667 words, keeping 8138822 word types
2017-10-25 09:10:21,537 [MainThread  ] [INFO ]  PROGRESS: at sentence #56380000, processed 591366184 words, keeping 8139429 word types
2017-10-25 09:10:22,592 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:11:21,275 [MainThread  ] [INFO ]  PROGRESS: at sentence #56930000, processed 597167667 words, keeping 8198111 word types
2017-10-25 09:11:22,344 [MainThread  ] [INFO ]  PROGRESS: at sentence #56940000, processed 597272429 words, keeping 8198777 word types
2017-10-25 09:11:23,386 [MainThread  ] [INFO ]  PROGRESS: at sentence #56950000, processed 597374877 words, keeping 8199873 word types
2017-10-25 09:11:24,515 [MainThread  ] [INFO ]  PROGRESS: at sentence #56960000, processed 597489360 words, keeping 8200943 word types
2017-10-25 09:11:25,602 [MainThread  ] [INFO ]  PROGRESS: at sentence #56970000, processed 597597011 words, keeping 8202002 word types
2017-10-25 09:11:26,642 [MainThread  ] [INFO ]  PROGRESS: at sentence #56980000, processed 597697862 words, keeping 8203211 word types
2017-10-25 09:11:27,703 [MainThread  ] [INFO ]  PROGRESS: at sentence #56990000, processed 597803854 words, keeping 8204416 word types
2017-10-25 09:11:28,783 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:12:26,270 [MainThread  ] [INFO ]  PROGRESS: at sentence #57540000, processed 603565280 words, keeping 8255109 word types
2017-10-25 09:12:27,339 [MainThread  ] [INFO ]  PROGRESS: at sentence #57550000, processed 603667230 words, keeping 8256140 word types
2017-10-25 09:12:28,395 [MainThread  ] [INFO ]  PROGRESS: at sentence #57560000, processed 603771075 words, keeping 8257394 word types
2017-10-25 09:12:29,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #57570000, processed 603882991 words, keeping 8258388 word types
2017-10-25 09:12:30,691 [MainThread  ] [INFO ]  PROGRESS: at sentence #57580000, processed 604000124 words, keeping 8259560 word types
2017-10-25 09:12:31,634 [MainThread  ] [INFO ]  PROGRESS: at sentence #57590000, processed 604083394 words, keeping 8261644 word types
2017-10-25 09:12:32,728 [MainThread  ] [INFO ]  PROGRESS: at sentence #57600000, processed 604189096 words, keeping 8262412 word types
2017-10-25 09:12:33,795 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:13:31,045 [MainThread  ] [INFO ]  PROGRESS: at sentence #58150000, processed 609933018 words, keeping 8317677 word types
2017-10-25 09:13:32,144 [MainThread  ] [INFO ]  PROGRESS: at sentence #58160000, processed 610038634 words, keeping 8318423 word types
2017-10-25 09:13:33,183 [MainThread  ] [INFO ]  PROGRESS: at sentence #58170000, processed 610139969 words, keeping 8319392 word types
2017-10-25 09:13:34,252 [MainThread  ] [INFO ]  PROGRESS: at sentence #58180000, processed 610243840 words, keeping 8320075 word types
2017-10-25 09:13:35,260 [MainThread  ] [INFO ]  PROGRESS: at sentence #58190000, processed 610341398 words, keeping 8320685 word types
2017-10-25 09:13:36,356 [MainThread  ] [INFO ]  PROGRESS: at sentence #58200000, processed 610449699 words, keeping 8321469 word types
2017-10-25 09:13:37,414 [MainThread  ] [INFO ]  PROGRESS: at sentence #58210000, processed 610554240 words, keeping 8322191 word types
2017-10-25 09:13:38,511 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:14:36,161 [MainThread  ] [INFO ]  PROGRESS: at sentence #58760000, processed 616346377 words, keeping 8373161 word types
2017-10-25 09:14:37,238 [MainThread  ] [INFO ]  PROGRESS: at sentence #58770000, processed 616452026 words, keeping 8374319 word types
2017-10-25 09:14:38,301 [MainThread  ] [INFO ]  PROGRESS: at sentence #58780000, processed 616557617 words, keeping 8374827 word types
2017-10-25 09:14:39,385 [MainThread  ] [INFO ]  PROGRESS: at sentence #58790000, processed 616665453 words, keeping 8375548 word types
2017-10-25 09:14:40,482 [MainThread  ] [INFO ]  PROGRESS: at sentence #58800000, processed 616774290 words, keeping 8376519 word types
2017-10-25 09:14:41,580 [MainThread  ] [INFO ]  PROGRESS: at sentence #58810000, processed 616883126 words, keeping 8377371 word types
2017-10-25 09:14:42,549 [MainThread  ] [INFO ]  PROGRESS: at sentence #58820000, processed 616973364 words, keeping 8378310 word types
2017-10-25 09:14:43,673 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:15:41,044 [MainThread  ] [INFO ]  PROGRESS: at sentence #59370000, processed 622705635 words, keeping 8425615 word types
2017-10-25 09:15:42,057 [MainThread  ] [INFO ]  PROGRESS: at sentence #59380000, processed 622805379 words, keeping 8426477 word types
2017-10-25 09:15:43,057 [MainThread  ] [INFO ]  PROGRESS: at sentence #59390000, processed 622901194 words, keeping 8427417 word types
2017-10-25 09:15:44,148 [MainThread  ] [INFO ]  PROGRESS: at sentence #59400000, processed 623009470 words, keeping 8428204 word types
2017-10-25 09:15:45,133 [MainThread  ] [INFO ]  PROGRESS: at sentence #59410000, processed 623101457 words, keeping 8428784 word types
2017-10-25 09:15:46,216 [MainThread  ] [INFO ]  PROGRESS: at sentence #59420000, processed 623210222 words, keeping 8429415 word types
2017-10-25 09:15:47,300 [MainThread  ] [INFO ]  PROGRESS: at sentence #59430000, processed 623316923 words, keeping 8430193 word types
2017-10-25 09:15:48,362 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:16:46,381 [MainThread  ] [INFO ]  PROGRESS: at sentence #59980000, processed 629142864 words, keeping 8481110 word types
2017-10-25 09:16:47,450 [MainThread  ] [INFO ]  PROGRESS: at sentence #59990000, processed 629248992 words, keeping 8481753 word types
2017-10-25 09:16:48,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #60000000, processed 629352763 words, keeping 8482837 word types
2017-10-25 09:16:49,597 [MainThread  ] [INFO ]  PROGRESS: at sentence #60010000, processed 629459744 words, keeping 8484259 word types
2017-10-25 09:16:50,620 [MainThread  ] [INFO ]  PROGRESS: at sentence #60020000, processed 629560071 words, keeping 8485481 word types
2017-10-25 09:16:51,690 [MainThread  ] [INFO ]  PROGRESS: at sentence #60030000, processed 629663165 words, keeping 8486143 word types
2017-10-25 09:16:52,787 [MainThread  ] [INFO ]  PROGRESS: at sentence #60040000, processed 629770072 words, keeping 8486869 word types
2017-10-25 09:16:53,830 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:17:51,068 [MainThread  ] [INFO ]  PROGRESS: at sentence #60590000, processed 635478600 words, keeping 8538516 word types
2017-10-25 09:17:52,151 [MainThread  ] [INFO ]  PROGRESS: at sentence #60600000, processed 635581660 words, keeping 8539435 word types
2017-10-25 09:17:53,294 [MainThread  ] [INFO ]  PROGRESS: at sentence #60610000, processed 635694375 words, keeping 8540466 word types
2017-10-25 09:17:54,423 [MainThread  ] [INFO ]  PROGRESS: at sentence #60620000, processed 635808069 words, keeping 8541835 word types
2017-10-25 09:17:55,454 [MainThread  ] [INFO ]  PROGRESS: at sentence #60630000, processed 635908114 words, keeping 8542543 word types
2017-10-25 09:17:56,542 [MainThread  ] [INFO ]  PROGRESS: at sentence #60640000, processed 636014279 words, keeping 8543322 word types
2017-10-25 09:17:57,667 [MainThread  ] [INFO ]  PROGRESS: at sentence #60650000, processed 636127181 words, keeping 8544182 word types
2017-10-25 09:17:58,813 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:18:57,090 [MainThread  ] [INFO ]  PROGRESS: at sentence #61200000, processed 642000826 words, keeping 8599167 word types
2017-10-25 09:18:58,180 [MainThread  ] [INFO ]  PROGRESS: at sentence #61210000, processed 642107618 words, keeping 8599907 word types
2017-10-25 09:18:59,250 [MainThread  ] [INFO ]  PROGRESS: at sentence #61220000, processed 642212520 words, keeping 8600558 word types
2017-10-25 09:19:00,326 [MainThread  ] [INFO ]  PROGRESS: at sentence #61230000, processed 642318411 words, keeping 8601174 word types
2017-10-25 09:19:01,399 [MainThread  ] [INFO ]  PROGRESS: at sentence #61240000, processed 642425928 words, keeping 8601695 word types
2017-10-25 09:19:02,476 [MainThread  ] [INFO ]  PROGRESS: at sentence #61250000, processed 642531829 words, keeping 8602453 word types
2017-10-25 09:19:03,557 [MainThread  ] [INFO ]  PROGRESS: at sentence #61260000, processed 642638215 words, keeping 8603258 word types
2017-10-25 09:19:04,610 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:20:03,417 [MainThread  ] [INFO ]  PROGRESS: at sentence #61810000, processed 648452648 words, keeping 8650825 word types
2017-10-25 09:20:04,391 [MainThread  ] [INFO ]  PROGRESS: at sentence #61820000, processed 648546502 words, keeping 8651771 word types
2017-10-25 09:20:05,493 [MainThread  ] [INFO ]  PROGRESS: at sentence #61830000, processed 648655876 words, keeping 8652657 word types
2017-10-25 09:20:06,555 [MainThread  ] [INFO ]  PROGRESS: at sentence #61840000, processed 648758262 words, keeping 8653416 word types
2017-10-25 09:20:07,608 [MainThread  ] [INFO ]  PROGRESS: at sentence #61850000, processed 648862897 words, keeping 8654308 word types
2017-10-25 09:20:08,724 [MainThread  ] [INFO ]  PROGRESS: at sentence #61860000, processed 648975766 words, keeping 8655433 word types
2017-10-25 09:20:09,778 [MainThread  ] [INFO ]  PROGRESS: at sentence #61870000, processed 649076703 words, keeping 8656979 word types
2017-10-25 09:20:10,846 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:21:08,703 [MainThread  ] [INFO ]  PROGRESS: at sentence #62420000, processed 654885561 words, keeping 8703235 word types
2017-10-25 09:21:09,778 [MainThread  ] [INFO ]  PROGRESS: at sentence #62430000, processed 654992333 words, keeping 8703948 word types
2017-10-25 09:21:10,870 [MainThread  ] [INFO ]  PROGRESS: at sentence #62440000, processed 655101375 words, keeping 8705175 word types
2017-10-25 09:21:12,028 [MainThread  ] [INFO ]  PROGRESS: at sentence #62450000, processed 655217047 words, keeping 8706770 word types
2017-10-25 09:21:13,058 [MainThread  ] [INFO ]  PROGRESS: at sentence #62460000, processed 655318275 words, keeping 8707567 word types
2017-10-25 09:21:14,152 [MainThread  ] [INFO ]  PROGRESS: at sentence #62470000, processed 655427012 words, keeping 8708374 word types
2017-10-25 09:21:15,260 [MainThread  ] [INFO ]  PROGRESS: at sentence #62480000, processed 655536225 words, keeping 8710065 word types
2017-10-25 09:21:16,333 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:22:14,260 [MainThread  ] [INFO ]  PROGRESS: at sentence #63030000, processed 661327576 words, keeping 8759096 word types
2017-10-25 09:22:15,326 [MainThread  ] [INFO ]  PROGRESS: at sentence #63040000, processed 661429535 words, keeping 8760171 word types
2017-10-25 09:22:16,318 [MainThread  ] [INFO ]  PROGRESS: at sentence #63050000, processed 661523663 words, keeping 8761088 word types
2017-10-25 09:22:17,422 [MainThread  ] [INFO ]  PROGRESS: at sentence #63060000, processed 661633337 words, keeping 8761833 word types
2017-10-25 09:22:18,485 [MainThread  ] [INFO ]  PROGRESS: at sentence #63070000, processed 661738264 words, keeping 8762258 word types
2017-10-25 09:22:19,544 [MainThread  ] [INFO ]  PROGRESS: at sentence #63080000, processed 661841770 words, keeping 8763277 word types
2017-10-25 09:22:20,615 [MainThread  ] [INFO ]  PROGRESS: at sentence #63090000, processed 661947243 words, keeping 8764014 word types
2017-10-25 09:22:21,756 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:23:19,754 [MainThread  ] [INFO ]  PROGRESS: at sentence #63640000, processed 667784367 words, keeping 8811593 word types
2017-10-25 09:23:20,803 [MainThread  ] [INFO ]  PROGRESS: at sentence #63650000, processed 667886379 words, keeping 8812274 word types
2017-10-25 09:23:21,809 [MainThread  ] [INFO ]  PROGRESS: at sentence #63660000, processed 667982060 words, keeping 8814029 word types
2017-10-25 09:23:22,738 [MainThread  ] [INFO ]  PROGRESS: at sentence #63670000, processed 668065014 words, keeping 8815030 word types
2017-10-25 09:23:23,822 [MainThread  ] [INFO ]  PROGRESS: at sentence #63680000, processed 668170535 words, keeping 8816290 word types
2017-10-25 09:23:24,924 [MainThread  ] [INFO ]  PROGRESS: at sentence #63690000, processed 668280516 words, keeping 8817175 word types
2017-10-25 09:23:25,990 [MainThread  ] [INFO ]  PROGRESS: at sentence #63700000, processed 668385884 words, keeping 8817898 word types
2017-10-25 09:23:26,954 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:24:24,959 [MainThread  ] [INFO ]  PROGRESS: at sentence #64250000, processed 674183328 words, keeping 8868384 word types
2017-10-25 09:24:26,055 [MainThread  ] [INFO ]  PROGRESS: at sentence #64260000, processed 674291340 words, keeping 8869594 word types
2017-10-25 09:24:27,117 [MainThread  ] [INFO ]  PROGRESS: at sentence #64270000, processed 674394376 words, keeping 8870595 word types
2017-10-25 09:24:28,248 [MainThread  ] [INFO ]  PROGRESS: at sentence #64280000, processed 674507696 words, keeping 8871664 word types
2017-10-25 09:24:29,315 [MainThread  ] [INFO ]  PROGRESS: at sentence #64290000, processed 674613409 words, keeping 8872319 word types
2017-10-25 09:24:30,359 [MainThread  ] [INFO ]  PROGRESS: at sentence #64300000, processed 674714757 words, keeping 8872816 word types
2017-10-25 09:24:31,375 [MainThread  ] [INFO ]  PROGRESS: at sentence #64310000, processed 674813387 words, keeping 8873844 word types
2017-10-25 09:24:32,484 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:25:29,852 [MainThread  ] [INFO ]  PROGRESS: at sentence #64860000, processed 680511066 words, keeping 8925261 word types
2017-10-25 09:25:30,945 [MainThread  ] [INFO ]  PROGRESS: at sentence #64870000, processed 680621727 words, keeping 8925797 word types
2017-10-25 09:25:32,017 [MainThread  ] [INFO ]  PROGRESS: at sentence #64880000, processed 680728118 words, keeping 8926658 word types
2017-10-25 09:25:33,051 [MainThread  ] [INFO ]  PROGRESS: at sentence #64890000, processed 680828680 words, keeping 8927594 word types
2017-10-25 09:25:34,146 [MainThread  ] [INFO ]  PROGRESS: at sentence #64900000, processed 680936814 words, keeping 8928563 word types
2017-10-25 09:25:35,242 [MainThread  ] [INFO ]  PROGRESS: at sentence #64910000, processed 681046568 words, keeping 8929424 word types
2017-10-25 09:25:36,226 [MainThread  ] [INFO ]  PROGRESS: at sentence #64920000, processed 681141100 words, keeping 8930416 word types
2017-10-25 09:25:37,298 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:26:34,975 [MainThread  ] [INFO ]  PROGRESS: at sentence #65470000, processed 686904001 words, keeping 8980526 word types
2017-10-25 09:26:35,957 [MainThread  ] [INFO ]  PROGRESS: at sentence #65480000, processed 686995848 words, keeping 8981603 word types
2017-10-25 09:26:36,918 [MainThread  ] [INFO ]  PROGRESS: at sentence #65490000, processed 687086708 words, keeping 8982925 word types
2017-10-25 09:26:38,036 [MainThread  ] [INFO ]  PROGRESS: at sentence #65500000, processed 687199140 words, keeping 8983832 word types
2017-10-25 09:26:39,118 [MainThread  ] [INFO ]  PROGRESS: at sentence #65510000, processed 687305383 words, keeping 8986342 word types
2017-10-25 09:26:40,147 [MainThread  ] [INFO ]  PROGRESS: at sentence #65520000, processed 687406155 words, keeping 8987497 word types
2017-10-25 09:26:41,261 [MainThread  ] [INFO ]  PROGRESS: at sentence #65530000, processed 687517335 words, keeping 8988261 word types
2017-10-25 09:26:42,331 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:27:39,787 [MainThread  ] [INFO ]  PROGRESS: at sentence #66080000, processed 693243204 words, keeping 9032895 word types
2017-10-25 09:27:40,799 [MainThread  ] [INFO ]  PROGRESS: at sentence #66090000, processed 693340898 words, keeping 9033915 word types
2017-10-25 09:27:41,874 [MainThread  ] [INFO ]  PROGRESS: at sentence #66100000, processed 693446214 words, keeping 9034636 word types
2017-10-25 09:27:42,940 [MainThread  ] [INFO ]  PROGRESS: at sentence #66110000, processed 693550346 words, keeping 9036020 word types
2017-10-25 09:27:44,078 [MainThread  ] [INFO ]  PROGRESS: at sentence #66120000, processed 693664907 words, keeping 9037363 word types
2017-10-25 09:27:45,101 [MainThread  ] [INFO ]  PROGRESS: at sentence #66130000, processed 693764129 words, keeping 9038768 word types
2017-10-25 09:27:47,028 [MainThread  ] [INFO ]  PROGRESS: at sentence #66140000, processed 693867819 words, keeping 9040942 word types
2017-10-25 09:27:48,092 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:28:45,748 [MainThread  ] [INFO ]  PROGRESS: at sentence #66690000, processed 699617644 words, keeping 9094221 word types
2017-10-25 09:28:46,807 [MainThread  ] [INFO ]  PROGRESS: at sentence #66700000, processed 699719729 words, keeping 9094948 word types
2017-10-25 09:28:47,901 [MainThread  ] [INFO ]  PROGRESS: at sentence #66710000, processed 699826669 words, keeping 9096364 word types
2017-10-25 09:28:48,965 [MainThread  ] [INFO ]  PROGRESS: at sentence #66720000, processed 699930365 words, keeping 9097127 word types
2017-10-25 09:28:50,046 [MainThread  ] [INFO ]  PROGRESS: at sentence #66730000, processed 700037232 words, keeping 9098108 word types
2017-10-25 09:28:51,084 [MainThread  ] [INFO ]  PROGRESS: at sentence #66740000, processed 700138320 words, keeping 9099280 word types
2017-10-25 09:28:52,131 [MainThread  ] [INFO ]  PROGRESS: at sentence #66750000, processed 700241396 words, keeping 9099741 word types
2017-10-25 09:28:53,216 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:29:51,412 [MainThread  ] [INFO ]  PROGRESS: at sentence #67300000, processed 706024500 words, keeping 9151835 word types
2017-10-25 09:29:52,537 [MainThread  ] [INFO ]  PROGRESS: at sentence #67310000, processed 706136981 words, keeping 9153631 word types
2017-10-25 09:29:53,589 [MainThread  ] [INFO ]  PROGRESS: at sentence #67320000, processed 706239321 words, keeping 9154298 word types
2017-10-25 09:29:54,668 [MainThread  ] [INFO ]  PROGRESS: at sentence #67330000, processed 706344963 words, keeping 9155294 word types
2017-10-25 09:29:55,690 [MainThread  ] [INFO ]  PROGRESS: at sentence #67340000, processed 706443366 words, keeping 9156049 word types
2017-10-25 09:29:56,641 [MainThread  ] [INFO ]  PROGRESS: at sentence #67350000, processed 706528811 words, keeping 9156796 word types
2017-10-25 09:29:57,692 [MainThread  ] [INFO ]  PROGRESS: at sentence #67360000, processed 706632245 words, keeping 9157311 word types
2017-10-25 09:29:58,747 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:30:57,031 [MainThread  ] [INFO ]  PROGRESS: at sentence #67910000, processed 712456432 words, keeping 9210839 word types
2017-10-25 09:30:58,134 [MainThread  ] [INFO ]  PROGRESS: at sentence #67920000, processed 712566810 words, keeping 9211557 word types
2017-10-25 09:30:59,204 [MainThread  ] [INFO ]  PROGRESS: at sentence #67930000, processed 712672875 words, keeping 9212351 word types
2017-10-25 09:31:00,292 [MainThread  ] [INFO ]  PROGRESS: at sentence #67940000, processed 712778306 words, keeping 9213360 word types
2017-10-25 09:31:01,413 [MainThread  ] [INFO ]  PROGRESS: at sentence #67950000, processed 712887595 words, keeping 9214543 word types
2017-10-25 09:31:02,496 [MainThread  ] [INFO ]  PROGRESS: at sentence #67960000, processed 712992654 words, keeping 9215701 word types
2017-10-25 09:31:03,639 [MainThread  ] [INFO ]  PROGRESS: at sentence #67970000, processed 713104063 words, keeping 9216501 word types
2017-10-25 09:31:04,758 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:32:02,882 [MainThread  ] [INFO ]  PROGRESS: at sentence #68520000, processed 718907091 words, keeping 9268702 word types
2017-10-25 09:32:03,980 [MainThread  ] [INFO ]  PROGRESS: at sentence #68530000, processed 719016656 words, keeping 9269401 word types
2017-10-25 09:32:05,068 [MainThread  ] [INFO ]  PROGRESS: at sentence #68540000, processed 719125888 words, keeping 9269921 word types
2017-10-25 09:32:06,137 [MainThread  ] [INFO ]  PROGRESS: at sentence #68550000, processed 719231106 words, keeping 9272598 word types
2017-10-25 09:32:07,267 [MainThread  ] [INFO ]  PROGRESS: at sentence #68560000, processed 719344878 words, keeping 9273604 word types
2017-10-25 09:32:08,393 [MainThread  ] [INFO ]  PROGRESS: at sentence #68570000, processed 719456851 words, keeping 9274342 word types
2017-10-25 09:32:09,465 [MainThread  ] [INFO ]  PROGRESS: at sentence #68580000, processed 719564026 words, keeping 9275167 word types
2017-10-25 09:32:10,508 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:33:08,392 [MainThread  ] [INFO ]  PROGRESS: at sentence #69130000, processed 725339738 words, keeping 9322496 word types
2017-10-25 09:33:09,451 [MainThread  ] [INFO ]  PROGRESS: at sentence #69140000, processed 725442950 words, keeping 9323779 word types
2017-10-25 09:33:10,491 [MainThread  ] [INFO ]  PROGRESS: at sentence #69150000, processed 725543454 words, keeping 9324575 word types
2017-10-25 09:33:11,608 [MainThread  ] [INFO ]  PROGRESS: at sentence #69160000, processed 725653339 words, keeping 9325155 word types
2017-10-25 09:33:12,699 [MainThread  ] [INFO ]  PROGRESS: at sentence #69170000, processed 725757562 words, keeping 9325909 word types
2017-10-25 09:33:13,772 [MainThread  ] [INFO ]  PROGRESS: at sentence #69180000, processed 725863305 words, keeping 9326889 word types
2017-10-25 09:33:14,857 [MainThread  ] [INFO ]  PROGRESS: at sentence #69190000, processed 725971166 words, keeping 9327929 word types
2017-10-25 09:33:15,928 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:34:13,875 [MainThread  ] [INFO ]  PROGRESS: at sentence #69740000, processed 731775789 words, keeping 9375035 word types
2017-10-25 09:34:14,985 [MainThread  ] [INFO ]  PROGRESS: at sentence #69750000, processed 731888067 words, keeping 9375876 word types
2017-10-25 09:34:16,070 [MainThread  ] [INFO ]  PROGRESS: at sentence #69760000, processed 731995554 words, keeping 9377068 word types
2017-10-25 09:34:17,147 [MainThread  ] [INFO ]  PROGRESS: at sentence #69770000, processed 732101218 words, keeping 9377702 word types
2017-10-25 09:34:18,263 [MainThread  ] [INFO ]  PROGRESS: at sentence #69780000, processed 732207561 words, keeping 9378707 word types
2017-10-25 09:34:19,313 [MainThread  ] [INFO ]  PROGRESS: at sentence #69790000, processed 732310404 words, keeping 9379303 word types
2017-10-25 09:34:20,382 [MainThread  ] [INFO ]  PROGRESS: at sentence #69800000, processed 732416038 words, keeping 9379924 word types
2017-10-25 09:34:21,342 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:35:19,244 [MainThread  ] [INFO ]  PROGRESS: at sentence #70350000, processed 738193440 words, keeping 9426494 word types
2017-10-25 09:35:20,273 [MainThread  ] [INFO ]  PROGRESS: at sentence #70360000, processed 738292428 words, keeping 9427416 word types
2017-10-25 09:35:22,282 [MainThread  ] [INFO ]  PROGRESS: at sentence #70370000, processed 738399211 words, keeping 9428894 word types
2017-10-25 09:35:23,374 [MainThread  ] [INFO ]  PROGRESS: at sentence #70380000, processed 738503514 words, keeping 9431200 word types
2017-10-25 09:35:24,460 [MainThread  ] [INFO ]  PROGRESS: at sentence #70390000, processed 738610498 words, keeping 9431988 word types
2017-10-25 09:35:25,508 [MainThread  ] [INFO ]  PROGRESS: at sentence #70400000, processed 738710139 words, keeping 9432320 word types
2017-10-25 09:35:26,615 [MainThread  ] [INFO ]  PROGRESS: at sentence #70410000, processed 738820951 words, keeping 9433451 word types
2017-10-25 09:35:27,638 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:36:25,623 [MainThread  ] [INFO ]  PROGRESS: at sentence #70960000, processed 744606869 words, keeping 9482059 word types
2017-10-25 09:36:26,742 [MainThread  ] [INFO ]  PROGRESS: at sentence #70970000, processed 744718972 words, keeping 9483768 word types
2017-10-25 09:36:27,784 [MainThread  ] [INFO ]  PROGRESS: at sentence #70980000, processed 744821030 words, keeping 9484688 word types
2017-10-25 09:36:28,859 [MainThread  ] [INFO ]  PROGRESS: at sentence #70990000, processed 744927363 words, keeping 9485326 word types
2017-10-25 09:36:29,916 [MainThread  ] [INFO ]  PROGRESS: at sentence #71000000, processed 745030445 words, keeping 9486033 word types
2017-10-25 09:36:31,016 [MainThread  ] [INFO ]  PROGRESS: at sentence #71010000, processed 745140308 words, keeping 9486898 word types
2017-10-25 09:36:32,127 [MainThread  ] [INFO ]  PROGRESS: at sentence #71020000, processed 745252136 words, keeping 9487968 word types
2017-10-25 09:36:33,205 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:37:30,916 [MainThread  ] [INFO ]  PROGRESS: at sentence #71570000, processed 751019912 words, keeping 9535172 word types
2017-10-25 09:37:31,987 [MainThread  ] [INFO ]  PROGRESS: at sentence #71580000, processed 751124760 words, keeping 9536798 word types
2017-10-25 09:37:33,057 [MainThread  ] [INFO ]  PROGRESS: at sentence #71590000, processed 751229966 words, keeping 9537638 word types
2017-10-25 09:37:34,125 [MainThread  ] [INFO ]  PROGRESS: at sentence #71600000, processed 751332459 words, keeping 9538765 word types
2017-10-25 09:37:35,168 [MainThread  ] [INFO ]  PROGRESS: at sentence #71610000, processed 751435608 words, keeping 9539355 word types
2017-10-25 09:37:36,243 [MainThread  ] [INFO ]  PROGRESS: at sentence #71620000, processed 751539397 words, keeping 9540064 word types
2017-10-25 09:37:37,231 [MainThread  ] [INFO ]  PROGRESS: at sentence #71630000, processed 751634426 words, keeping 9541222 word types
2017-10-25 09:37:38,243 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:38:35,807 [MainThread  ] [INFO ]  PROGRESS: at sentence #72180000, processed 757361725 words, keeping 9592385 word types
2017-10-25 09:38:36,869 [MainThread  ] [INFO ]  PROGRESS: at sentence #72190000, processed 757464041 words, keeping 9593611 word types
2017-10-25 09:38:38,007 [MainThread  ] [INFO ]  PROGRESS: at sentence #72200000, processed 757574915 words, keeping 9594787 word types
2017-10-25 09:38:39,145 [MainThread  ] [INFO ]  PROGRESS: at sentence #72210000, processed 757689170 words, keeping 9596097 word types
2017-10-25 09:38:40,295 [MainThread  ] [INFO ]  PROGRESS: at sentence #72220000, processed 757806426 words, keeping 9597798 word types
2017-10-25 09:38:41,352 [MainThread  ] [INFO ]  PROGRESS: at sentence #72230000, processed 757910355 words, keeping 9598840 word types
2017-10-25 09:38:42,468 [MainThread  ] [INFO ]  PROGRESS: at sentence #72240000, processed 758020480 words, keeping 9599593 word types
2017-10-25 09:38:43,548 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:39:41,591 [MainThread  ] [INFO ]  PROGRESS: at sentence #72790000, processed 763821294 words, keeping 9651018 word types
2017-10-25 09:39:42,644 [MainThread  ] [INFO ]  PROGRESS: at sentence #72800000, processed 763925211 words, keeping 9651806 word types
2017-10-25 09:39:43,701 [MainThread  ] [INFO ]  PROGRESS: at sentence #72810000, processed 764030434 words, keeping 9652475 word types
2017-10-25 09:39:44,780 [MainThread  ] [INFO ]  PROGRESS: at sentence #72820000, processed 764138631 words, keeping 9653227 word types
2017-10-25 09:39:45,860 [MainThread  ] [INFO ]  PROGRESS: at sentence #72830000, processed 764246653 words, keeping 9653856 word types
2017-10-25 09:39:46,914 [MainThread  ] [INFO ]  PROGRESS: at sentence #72840000, processed 764349057 words, keeping 9654671 word types
2017-10-25 09:39:48,005 [MainThread  ] [INFO ]  PROGRESS: at sentence #72850000, processed 764455942 words, keeping 9655265 word types
2017-10-25 09:39:49,087 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:40:47,090 [MainThread  ] [INFO ]  PROGRESS: at sentence #73400000, processed 770257069 words, keeping 9702440 word types
2017-10-25 09:40:48,163 [MainThread  ] [INFO ]  PROGRESS: at sentence #73410000, processed 770360640 words, keeping 9703377 word types
2017-10-25 09:40:49,207 [MainThread  ] [INFO ]  PROGRESS: at sentence #73420000, processed 770460786 words, keeping 9704195 word types
2017-10-25 09:40:50,286 [MainThread  ] [INFO ]  PROGRESS: at sentence #73430000, processed 770568534 words, keeping 9704974 word types
2017-10-25 09:40:51,376 [MainThread  ] [INFO ]  PROGRESS: at sentence #73440000, processed 770675809 words, keeping 9705900 word types
2017-10-25 09:40:52,451 [MainThread  ] [INFO ]  PROGRESS: at sentence #73450000, processed 770781998 words, keeping 9706536 word types
2017-10-25 09:40:53,501 [MainThread  ] [INFO ]  PROGRESS: at sentence #73460000, processed 770884690 words, keeping 9707196 word types
2017-10-25 09:40:54,541 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:41:52,484 [MainThread  ] [INFO ]  PROGRESS: at sentence #74010000, processed 776677576 words, keeping 9756512 word types
2017-10-25 09:41:53,576 [MainThread  ] [INFO ]  PROGRESS: at sentence #74020000, processed 776786546 words, keeping 9757508 word types
2017-10-25 09:41:54,631 [MainThread  ] [INFO ]  PROGRESS: at sentence #74030000, processed 776891277 words, keeping 9758293 word types
2017-10-25 09:41:55,660 [MainThread  ] [INFO ]  PROGRESS: at sentence #74040000, processed 776990975 words, keeping 9758840 word types
2017-10-25 09:41:56,765 [MainThread  ] [INFO ]  PROGRESS: at sentence #74050000, processed 777099117 words, keeping 9759745 word types
2017-10-25 09:41:57,844 [MainThread  ] [INFO ]  PROGRESS: at sentence #74060000, processed 777206306 words, keeping 9760691 word types
2017-10-25 09:41:58,857 [MainThread  ] [INFO ]  PROGRESS: at sentence #74070000, processed 777302530 words, keeping 9762253 word types
2017-10-25 09:41:59,929 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:42:57,502 [MainThread  ] [INFO ]  PROGRESS: at sentence #74620000, processed 783062459 words, keeping 9809137 word types
2017-10-25 09:42:58,559 [MainThread  ] [INFO ]  PROGRESS: at sentence #74630000, processed 783166480 words, keeping 9809736 word types
2017-10-25 09:42:59,596 [MainThread  ] [INFO ]  PROGRESS: at sentence #74640000, processed 783267879 words, keeping 9810356 word types
2017-10-25 09:43:00,681 [MainThread  ] [INFO ]  PROGRESS: at sentence #74650000, processed 783374232 words, keeping 9811180 word types
2017-10-25 09:43:01,759 [MainThread  ] [INFO ]  PROGRESS: at sentence #74660000, processed 783480078 words, keeping 9812371 word types
2017-10-25 09:43:02,831 [MainThread  ] [INFO ]  PROGRESS: at sentence #74670000, processed 783586560 words, keeping 9812933 word types
2017-10-25 09:43:03,913 [MainThread  ] [INFO ]  PROGRESS: at sentence #74680000, processed 783692614 words, keeping 9813827 word types
2017-10-25 09:43:04,933 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:44:03,008 [MainThread  ] [INFO ]  PROGRESS: at sentence #75230000, processed 789471830 words, keeping 9863762 word types
2017-10-25 09:44:04,110 [MainThread  ] [INFO ]  PROGRESS: at sentence #75240000, processed 789580670 words, keeping 9864598 word types
2017-10-25 09:44:05,154 [MainThread  ] [INFO ]  PROGRESS: at sentence #75250000, processed 789681955 words, keeping 9865284 word types
2017-10-25 09:44:06,208 [MainThread  ] [INFO ]  PROGRESS: at sentence #75260000, processed 789786585 words, keeping 9866102 word types
2017-10-25 09:44:07,288 [MainThread  ] [INFO ]  PROGRESS: at sentence #75270000, processed 789892172 words, keeping 9866851 word types
2017-10-25 09:44:08,374 [MainThread  ] [INFO ]  PROGRESS: at sentence #75280000, processed 790000871 words, keeping 9867615 word types
2017-10-25 09:44:09,464 [MainThread  ] [INFO ]  PROGRESS: at sentence #75290000, processed 790109532 words, keeping 9868373 word types
2017-10-25 09:44:10,548 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:45:08,723 [MainThread  ] [INFO ]  PROGRESS: at sentence #75840000, processed 795840581 words, keeping 9918452 word types
2017-10-25 09:45:09,796 [MainThread  ] [INFO ]  PROGRESS: at sentence #75850000, processed 795947239 words, keeping 9919450 word types
2017-10-25 09:45:10,892 [MainThread  ] [INFO ]  PROGRESS: at sentence #75860000, processed 796056968 words, keeping 9920478 word types
2017-10-25 09:45:12,011 [MainThread  ] [INFO ]  PROGRESS: at sentence #75870000, processed 796166348 words, keeping 9921288 word types
2017-10-25 09:45:13,107 [MainThread  ] [INFO ]  PROGRESS: at sentence #75880000, processed 796275523 words, keeping 9922330 word types
2017-10-25 09:45:14,069 [MainThread  ] [INFO ]  PROGRESS: at sentence #75890000, processed 796366989 words, keeping 9923100 word types
2017-10-25 09:45:15,124 [MainThread  ] [INFO ]  PROGRESS: at sentence #75900000, processed 796471782 words, keeping 9924320 word types
2017-10-25 09:45:16,174 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:46:13,236 [MainThread  ] [INFO ]  PROGRESS: at sentence #76450000, processed 802147401 words, keeping 9974976 word types
2017-10-25 09:46:14,272 [MainThread  ] [INFO ]  PROGRESS: at sentence #76460000, processed 802248604 words, keeping 9976003 word types
2017-10-25 09:46:15,328 [MainThread  ] [INFO ]  PROGRESS: at sentence #76470000, processed 802351629 words, keeping 9977250 word types
2017-10-25 09:46:16,372 [MainThread  ] [INFO ]  PROGRESS: at sentence #76480000, processed 802453619 words, keeping 9978177 word types
2017-10-25 09:46:17,393 [MainThread  ] [INFO ]  PROGRESS: at sentence #76490000, processed 802548430 words, keeping 9979363 word types
2017-10-25 09:46:18,518 [MainThread  ] [INFO ]  PROGRESS: at sentence #76500000, processed 802658482 words, keeping 9980615 word types
2017-10-25 09:46:19,594 [MainThread  ] [INFO ]  PROGRESS: at sentence #76510000, processed 802764925 words, keeping 9981270 word types
2017-10-25 09:46:20,609 [MainThread  ] [INFO ]  PROGRES

2017-10-25 09:47:18,595 [MainThread  ] [INFO ]  PROGRESS: at sentence #77060000, processed 808559725 words, keeping 10026679 word types
2017-10-25 09:47:19,625 [MainThread  ] [INFO ]  PROGRESS: at sentence #77070000, processed 808659720 words, keeping 10027492 word types
2017-10-25 09:47:20,735 [MainThread  ] [INFO ]  PROGRESS: at sentence #77080000, processed 808770658 words, keeping 10028057 word types
2017-10-25 09:47:21,819 [MainThread  ] [INFO ]  PROGRESS: at sentence #77090000, processed 808875087 words, keeping 10028750 word types
2017-10-25 09:47:22,915 [MainThread  ] [INFO ]  PROGRESS: at sentence #77100000, processed 808984690 words, keeping 10029554 word types
2017-10-25 09:47:23,986 [MainThread  ] [INFO ]  PROGRESS: at sentence #77110000, processed 809088024 words, keeping 10030513 word types
2017-10-25 09:47:25,019 [MainThread  ] [INFO ]  PROGRESS: at sentence #77120000, processed 809184898 words, keeping 10032031 word types
2017-10-25 09:47:26,147 [MainThread  ] [INFO ]  

2017-10-25 09:48:25,001 [MainThread  ] [INFO ]  PROGRESS: at sentence #77670000, processed 815055188 words, keeping 10079093 word types
2017-10-25 09:48:26,119 [MainThread  ] [INFO ]  PROGRESS: at sentence #77680000, processed 815162250 words, keeping 10080010 word types
2017-10-25 09:48:27,223 [MainThread  ] [INFO ]  PROGRESS: at sentence #77690000, processed 815270023 words, keeping 10080520 word types
2017-10-25 09:48:28,320 [MainThread  ] [INFO ]  PROGRESS: at sentence #77700000, processed 815375650 words, keeping 10081676 word types
2017-10-25 09:48:29,412 [MainThread  ] [INFO ]  PROGRESS: at sentence #77710000, processed 815482257 words, keeping 10084632 word types
2017-10-25 09:48:30,515 [MainThread  ] [INFO ]  PROGRESS: at sentence #77720000, processed 815590555 words, keeping 10085219 word types
2017-10-25 09:48:31,465 [MainThread  ] [INFO ]  PROGRESS: at sentence #77730000, processed 815679680 words, keeping 10086634 word types
2017-10-25 09:48:32,562 [MainThread  ] [INFO ]  

2017-10-25 09:49:31,373 [MainThread  ] [INFO ]  PROGRESS: at sentence #78280000, processed 821538774 words, keeping 10134225 word types
2017-10-25 09:49:32,401 [MainThread  ] [INFO ]  PROGRESS: at sentence #78290000, processed 821638701 words, keeping 10134615 word types
2017-10-25 09:49:33,491 [MainThread  ] [INFO ]  PROGRESS: at sentence #78300000, processed 821741640 words, keeping 10135563 word types
2017-10-25 09:49:34,610 [MainThread  ] [INFO ]  PROGRESS: at sentence #78310000, processed 821851824 words, keeping 10136516 word types
2017-10-25 09:49:35,694 [MainThread  ] [INFO ]  PROGRESS: at sentence #78320000, processed 821955985 words, keeping 10137267 word types
2017-10-25 09:49:36,808 [MainThread  ] [INFO ]  PROGRESS: at sentence #78330000, processed 822068898 words, keeping 10138414 word types
2017-10-25 09:49:37,845 [MainThread  ] [INFO ]  PROGRESS: at sentence #78340000, processed 822169487 words, keeping 10138978 word types
2017-10-25 09:49:38,935 [MainThread  ] [INFO ]  

2017-10-25 09:50:37,100 [MainThread  ] [INFO ]  PROGRESS: at sentence #78890000, processed 827924890 words, keeping 10183135 word types
2017-10-25 09:50:38,192 [MainThread  ] [INFO ]  PROGRESS: at sentence #78900000, processed 828033086 words, keeping 10184164 word types
2017-10-25 09:50:39,263 [MainThread  ] [INFO ]  PROGRESS: at sentence #78910000, processed 828138656 words, keeping 10184977 word types
2017-10-25 09:50:40,373 [MainThread  ] [INFO ]  PROGRESS: at sentence #78920000, processed 828246734 words, keeping 10185948 word types
2017-10-25 09:50:41,454 [MainThread  ] [INFO ]  PROGRESS: at sentence #78930000, processed 828351437 words, keeping 10186593 word types
2017-10-25 09:50:42,469 [MainThread  ] [INFO ]  PROGRESS: at sentence #78940000, processed 828445237 words, keeping 10187597 word types
2017-10-25 09:50:43,562 [MainThread  ] [INFO ]  PROGRESS: at sentence #78950000, processed 828550328 words, keeping 10188498 word types
2017-10-25 09:50:44,647 [MainThread  ] [INFO ]  

2017-10-25 09:51:42,719 [MainThread  ] [INFO ]  PROGRESS: at sentence #79500000, processed 834326395 words, keeping 10237137 word types
2017-10-25 09:51:43,750 [MainThread  ] [INFO ]  PROGRESS: at sentence #79510000, processed 834424542 words, keeping 10237950 word types
2017-10-25 09:51:44,812 [MainThread  ] [INFO ]  PROGRESS: at sentence #79520000, processed 834526216 words, keeping 10239474 word types
2017-10-25 09:51:45,946 [MainThread  ] [INFO ]  PROGRESS: at sentence #79530000, processed 834634903 words, keeping 10240733 word types
2017-10-25 09:51:47,083 [MainThread  ] [INFO ]  PROGRESS: at sentence #79540000, processed 834745414 words, keeping 10241552 word types
2017-10-25 09:51:48,200 [MainThread  ] [INFO ]  PROGRESS: at sentence #79550000, processed 834853296 words, keeping 10242100 word types
2017-10-25 09:51:49,202 [MainThread  ] [INFO ]  PROGRESS: at sentence #79560000, processed 834948651 words, keeping 10243033 word types
2017-10-25 09:51:50,315 [MainThread  ] [INFO ]  

2017-10-25 09:52:49,325 [MainThread  ] [INFO ]  PROGRESS: at sentence #80110000, processed 840736142 words, keeping 10285031 word types
2017-10-25 09:52:50,436 [MainThread  ] [INFO ]  PROGRESS: at sentence #80120000, processed 840844461 words, keeping 10285574 word types
2017-10-25 09:52:51,524 [MainThread  ] [INFO ]  PROGRESS: at sentence #80130000, processed 840952032 words, keeping 10286213 word types
2017-10-25 09:52:52,575 [MainThread  ] [INFO ]  PROGRESS: at sentence #80140000, processed 841053861 words, keeping 10287021 word types
2017-10-25 09:52:53,684 [MainThread  ] [INFO ]  PROGRESS: at sentence #80150000, processed 841164806 words, keeping 10287846 word types
2017-10-25 09:52:54,763 [MainThread  ] [INFO ]  PROGRESS: at sentence #80160000, processed 841270162 words, keeping 10288554 word types
2017-10-25 09:52:55,827 [MainThread  ] [INFO ]  PROGRESS: at sentence #80170000, processed 841373274 words, keeping 10289370 word types
2017-10-25 09:52:56,864 [MainThread  ] [INFO ]  

2017-10-25 09:53:54,448 [MainThread  ] [INFO ]  PROGRESS: at sentence #80720000, processed 847005581 words, keeping 10332265 word types
2017-10-25 09:53:55,507 [MainThread  ] [INFO ]  PROGRESS: at sentence #80730000, processed 847109894 words, keeping 10332861 word types
2017-10-25 09:53:56,540 [MainThread  ] [INFO ]  PROGRESS: at sentence #80740000, processed 847211047 words, keeping 10333416 word types
2017-10-25 09:53:57,651 [MainThread  ] [INFO ]  PROGRESS: at sentence #80750000, processed 847323043 words, keeping 10333995 word types
2017-10-25 09:53:58,681 [MainThread  ] [INFO ]  PROGRESS: at sentence #80760000, processed 847423145 words, keeping 10334826 word types
2017-10-25 09:53:59,738 [MainThread  ] [INFO ]  PROGRESS: at sentence #80770000, processed 847527844 words, keeping 10335355 word types
2017-10-25 09:54:00,841 [MainThread  ] [INFO ]  PROGRESS: at sentence #80780000, processed 847638421 words, keeping 10336074 word types
2017-10-25 09:54:01,902 [MainThread  ] [INFO ]  

2017-10-25 09:54:59,889 [MainThread  ] [INFO ]  PROGRESS: at sentence #81330000, processed 853426957 words, keeping 10384461 word types
2017-10-25 09:55:00,968 [MainThread  ] [INFO ]  PROGRESS: at sentence #81340000, processed 853533858 words, keeping 10385608 word types
2017-10-25 09:55:02,018 [MainThread  ] [INFO ]  PROGRESS: at sentence #81350000, processed 853634573 words, keeping 10386549 word types
2017-10-25 09:55:03,104 [MainThread  ] [INFO ]  PROGRESS: at sentence #81360000, processed 853741584 words, keeping 10387679 word types
2017-10-25 09:55:04,188 [MainThread  ] [INFO ]  PROGRESS: at sentence #81370000, processed 853849138 words, keeping 10388301 word types
2017-10-25 09:55:05,253 [MainThread  ] [INFO ]  PROGRESS: at sentence #81380000, processed 853953104 words, keeping 10389613 word types
2017-10-25 09:55:06,309 [MainThread  ] [INFO ]  PROGRESS: at sentence #81390000, processed 854055585 words, keeping 10390481 word types
2017-10-25 09:55:07,373 [MainThread  ] [INFO ]  

2017-10-25 09:56:05,735 [MainThread  ] [INFO ]  PROGRESS: at sentence #81940000, processed 859929582 words, keeping 10434996 word types
2017-10-25 09:56:06,828 [MainThread  ] [INFO ]  PROGRESS: at sentence #81950000, processed 860038473 words, keeping 10435812 word types
2017-10-25 09:56:07,912 [MainThread  ] [INFO ]  PROGRESS: at sentence #81960000, processed 860147322 words, keeping 10437945 word types
2017-10-25 09:56:09,023 [MainThread  ] [INFO ]  PROGRESS: at sentence #81970000, processed 860257162 words, keeping 10438796 word types
2017-10-25 09:56:10,133 [MainThread  ] [INFO ]  PROGRESS: at sentence #81980000, processed 860367358 words, keeping 10440019 word types
2017-10-25 09:56:11,232 [MainThread  ] [INFO ]  PROGRESS: at sentence #81990000, processed 860476907 words, keeping 10440709 word types
2017-10-25 09:56:12,342 [MainThread  ] [INFO ]  PROGRESS: at sentence #82000000, processed 860585553 words, keeping 10441358 word types
2017-10-25 09:56:13,387 [MainThread  ] [INFO ]  

2017-10-25 09:57:11,615 [MainThread  ] [INFO ]  PROGRESS: at sentence #82550000, processed 866406658 words, keeping 10487446 word types
2017-10-25 09:57:12,685 [MainThread  ] [INFO ]  PROGRESS: at sentence #82560000, processed 866511471 words, keeping 10488268 word types
2017-10-25 09:57:13,763 [MainThread  ] [INFO ]  PROGRESS: at sentence #82570000, processed 866617554 words, keeping 10489274 word types
2017-10-25 09:57:14,866 [MainThread  ] [INFO ]  PROGRESS: at sentence #82580000, processed 866726978 words, keeping 10489768 word types
2017-10-25 09:57:15,957 [MainThread  ] [INFO ]  PROGRESS: at sentence #82590000, processed 866831896 words, keeping 10490534 word types
2017-10-25 09:57:16,994 [MainThread  ] [INFO ]  PROGRESS: at sentence #82600000, processed 866933194 words, keeping 10491379 word types
2017-10-25 09:57:18,119 [MainThread  ] [INFO ]  PROGRESS: at sentence #82610000, processed 867043542 words, keeping 10491941 word types
2017-10-25 09:57:19,279 [MainThread  ] [INFO ]  

2017-10-25 09:58:17,603 [MainThread  ] [INFO ]  PROGRESS: at sentence #83160000, processed 872912885 words, keeping 10536864 word types
2017-10-25 09:58:18,628 [MainThread  ] [INFO ]  PROGRESS: at sentence #83170000, processed 873010852 words, keeping 10537575 word types
2017-10-25 09:58:19,712 [MainThread  ] [INFO ]  PROGRESS: at sentence #83180000, processed 873118422 words, keeping 10538176 word types
2017-10-25 09:58:20,802 [MainThread  ] [INFO ]  PROGRESS: at sentence #83190000, processed 873223836 words, keeping 10539601 word types
2017-10-25 09:58:21,869 [MainThread  ] [INFO ]  PROGRESS: at sentence #83200000, processed 873328764 words, keeping 10540087 word types
2017-10-25 09:58:22,940 [MainThread  ] [INFO ]  PROGRESS: at sentence #83210000, processed 873431282 words, keeping 10540779 word types
2017-10-25 09:58:24,085 [MainThread  ] [INFO ]  PROGRESS: at sentence #83220000, processed 873544693 words, keeping 10541408 word types
2017-10-25 09:58:25,177 [MainThread  ] [INFO ]  

2017-10-25 09:59:23,358 [MainThread  ] [INFO ]  PROGRESS: at sentence #83770000, processed 879340339 words, keeping 10589894 word types
2017-10-25 09:59:24,404 [MainThread  ] [INFO ]  PROGRESS: at sentence #83780000, processed 879443054 words, keeping 10590332 word types
2017-10-25 09:59:25,488 [MainThread  ] [INFO ]  PROGRESS: at sentence #83790000, processed 879550552 words, keeping 10591234 word types
2017-10-25 09:59:26,522 [MainThread  ] [INFO ]  PROGRESS: at sentence #83800000, processed 879651410 words, keeping 10592312 word types
2017-10-25 09:59:27,564 [MainThread  ] [INFO ]  PROGRESS: at sentence #83810000, processed 879753094 words, keeping 10593014 word types
2017-10-25 09:59:28,638 [MainThread  ] [INFO ]  PROGRESS: at sentence #83820000, processed 879859549 words, keeping 10594067 word types
2017-10-25 09:59:29,709 [MainThread  ] [INFO ]  PROGRESS: at sentence #83830000, processed 879965244 words, keeping 10595464 word types
2017-10-25 09:59:30,719 [MainThread  ] [INFO ]  

2017-10-25 10:00:29,385 [MainThread  ] [INFO ]  PROGRESS: at sentence #84380000, processed 885728576 words, keeping 10638946 word types
2017-10-25 10:00:30,481 [MainThread  ] [INFO ]  PROGRESS: at sentence #84390000, processed 885835948 words, keeping 10640063 word types
2017-10-25 10:00:31,548 [MainThread  ] [INFO ]  PROGRESS: at sentence #84400000, processed 885941171 words, keeping 10640808 word types
2017-10-25 10:00:32,654 [MainThread  ] [INFO ]  PROGRESS: at sentence #84410000, processed 886052338 words, keeping 10641397 word types
2017-10-25 10:00:33,665 [MainThread  ] [INFO ]  PROGRESS: at sentence #84420000, processed 886149663 words, keeping 10642223 word types
2017-10-25 10:00:34,754 [MainThread  ] [INFO ]  PROGRESS: at sentence #84430000, processed 886257747 words, keeping 10643021 word types
2017-10-25 10:00:35,846 [MainThread  ] [INFO ]  PROGRESS: at sentence #84440000, processed 886363743 words, keeping 10643787 word types
2017-10-25 10:00:36,864 [MainThread  ] [INFO ]  

2017-10-25 10:01:34,586 [MainThread  ] [INFO ]  PROGRESS: at sentence #84990000, processed 892129525 words, keeping 10692589 word types
2017-10-25 10:01:35,672 [MainThread  ] [INFO ]  PROGRESS: at sentence #85000000, processed 892237155 words, keeping 10693653 word types
2017-10-25 10:01:36,722 [MainThread  ] [INFO ]  PROGRESS: at sentence #85010000, processed 892336886 words, keeping 10694823 word types
2017-10-25 10:01:37,848 [MainThread  ] [INFO ]  PROGRESS: at sentence #85020000, processed 892448970 words, keeping 10695502 word types
2017-10-25 10:01:38,932 [MainThread  ] [INFO ]  PROGRESS: at sentence #85030000, processed 892555104 words, keeping 10696195 word types
2017-10-25 10:01:39,970 [MainThread  ] [INFO ]  PROGRESS: at sentence #85040000, processed 892656919 words, keeping 10696972 word types
2017-10-25 10:01:41,058 [MainThread  ] [INFO ]  PROGRESS: at sentence #85050000, processed 892762597 words, keeping 10697834 word types
2017-10-25 10:01:42,123 [MainThread  ] [INFO ]  

2017-10-25 10:02:39,752 [MainThread  ] [INFO ]  PROGRESS: at sentence #85600000, processed 898514904 words, keeping 10748709 word types
2017-10-25 10:02:40,777 [MainThread  ] [INFO ]  PROGRESS: at sentence #85610000, processed 898613355 words, keeping 10749253 word types
2017-10-25 10:02:41,842 [MainThread  ] [INFO ]  PROGRESS: at sentence #85620000, processed 898719548 words, keeping 10750004 word types
2017-10-25 10:02:42,955 [MainThread  ] [INFO ]  PROGRESS: at sentence #85630000, processed 898829963 words, keeping 10751021 word types
2017-10-25 10:02:43,906 [MainThread  ] [INFO ]  PROGRESS: at sentence #85640000, processed 898918924 words, keeping 10751897 word types
2017-10-25 10:02:44,995 [MainThread  ] [INFO ]  PROGRESS: at sentence #85650000, processed 899027410 words, keeping 10752735 word types
2017-10-25 10:02:46,067 [MainThread  ] [INFO ]  PROGRESS: at sentence #85660000, processed 899130936 words, keeping 10753309 word types
2017-10-25 10:02:47,195 [MainThread  ] [INFO ]  

2017-10-25 10:03:45,039 [MainThread  ] [INFO ]  PROGRESS: at sentence #86210000, processed 904924291 words, keeping 10797114 word types
2017-10-25 10:03:46,091 [MainThread  ] [INFO ]  PROGRESS: at sentence #86220000, processed 905025915 words, keeping 10798861 word types
2017-10-25 10:03:47,167 [MainThread  ] [INFO ]  PROGRESS: at sentence #86230000, processed 905128990 words, keeping 10799987 word types
2017-10-25 10:03:48,256 [MainThread  ] [INFO ]  PROGRESS: at sentence #86240000, processed 905235860 words, keeping 10800529 word types
2017-10-25 10:03:49,305 [MainThread  ] [INFO ]  PROGRESS: at sentence #86250000, processed 905338992 words, keeping 10801149 word types
2017-10-25 10:03:50,400 [MainThread  ] [INFO ]  PROGRESS: at sentence #86260000, processed 905444445 words, keeping 10802323 word types
2017-10-25 10:03:51,468 [MainThread  ] [INFO ]  PROGRESS: at sentence #86270000, processed 905550297 words, keeping 10802959 word types
2017-10-25 10:03:52,531 [MainThread  ] [INFO ]  

2017-10-25 10:04:49,934 [MainThread  ] [INFO ]  PROGRESS: at sentence #86820000, processed 911264903 words, keeping 10847040 word types
2017-10-25 10:04:50,993 [MainThread  ] [INFO ]  PROGRESS: at sentence #86830000, processed 911369123 words, keeping 10847989 word types
2017-10-25 10:04:52,097 [MainThread  ] [INFO ]  PROGRESS: at sentence #86840000, processed 911478228 words, keeping 10849465 word types
2017-10-25 10:04:53,196 [MainThread  ] [INFO ]  PROGRESS: at sentence #86850000, processed 911585446 words, keeping 10850405 word types
2017-10-25 10:04:54,371 [MainThread  ] [INFO ]  PROGRESS: at sentence #86860000, processed 911701608 words, keeping 10851018 word types
2017-10-25 10:04:55,431 [MainThread  ] [INFO ]  PROGRESS: at sentence #86870000, processed 911802224 words, keeping 10851899 word types
2017-10-25 10:04:56,571 [MainThread  ] [INFO ]  PROGRESS: at sentence #86880000, processed 911915557 words, keeping 10852840 word types
2017-10-25 10:04:57,708 [MainThread  ] [INFO ]  

2017-10-25 10:05:56,012 [MainThread  ] [INFO ]  PROGRESS: at sentence #87430000, processed 917697837 words, keeping 10897199 word types
2017-10-25 10:05:57,084 [MainThread  ] [INFO ]  PROGRESS: at sentence #87440000, processed 917803617 words, keeping 10897735 word types
2017-10-25 10:05:58,108 [MainThread  ] [INFO ]  PROGRESS: at sentence #87450000, processed 917901390 words, keeping 10898508 word types
2017-10-25 10:05:59,201 [MainThread  ] [INFO ]  PROGRESS: at sentence #87460000, processed 918008167 words, keeping 10899018 word types
2017-10-25 10:06:00,263 [MainThread  ] [INFO ]  PROGRESS: at sentence #87470000, processed 918112427 words, keeping 10899685 word types
2017-10-25 10:06:01,381 [MainThread  ] [INFO ]  PROGRESS: at sentence #87480000, processed 918221581 words, keeping 10900873 word types
2017-10-25 10:06:02,488 [MainThread  ] [INFO ]  PROGRESS: at sentence #87490000, processed 918330186 words, keeping 10901882 word types
2017-10-25 10:06:03,559 [MainThread  ] [INFO ]  

2017-10-25 10:07:01,884 [MainThread  ] [INFO ]  PROGRESS: at sentence #88040000, processed 924121737 words, keeping 10946562 word types
2017-10-25 10:07:03,033 [MainThread  ] [INFO ]  PROGRESS: at sentence #88050000, processed 924237557 words, keeping 10947505 word types
2017-10-25 10:07:04,126 [MainThread  ] [INFO ]  PROGRESS: at sentence #88060000, processed 924346778 words, keeping 10947968 word types
2017-10-25 10:07:05,232 [MainThread  ] [INFO ]  PROGRESS: at sentence #88070000, processed 924457850 words, keeping 10948673 word types
2017-10-25 10:07:06,269 [MainThread  ] [INFO ]  PROGRESS: at sentence #88080000, processed 924559391 words, keeping 10949851 word types
2017-10-25 10:07:07,314 [MainThread  ] [INFO ]  PROGRESS: at sentence #88090000, processed 924660185 words, keeping 10951117 word types
2017-10-25 10:07:08,398 [MainThread  ] [INFO ]  PROGRESS: at sentence #88100000, processed 924765835 words, keeping 10951843 word types
2017-10-25 10:07:09,481 [MainThread  ] [INFO ]  

2017-10-25 10:08:07,438 [MainThread  ] [INFO ]  PROGRESS: at sentence #88650000, processed 930533589 words, keeping 11000002 word types
2017-10-25 10:08:08,444 [MainThread  ] [INFO ]  PROGRESS: at sentence #88660000, processed 930630596 words, keeping 11000665 word types
2017-10-25 10:08:09,455 [MainThread  ] [INFO ]  PROGRESS: at sentence #88670000, processed 930727688 words, keeping 11001102 word types
2017-10-25 10:08:10,523 [MainThread  ] [INFO ]  PROGRESS: at sentence #88680000, processed 930833027 words, keeping 11002019 word types
2017-10-25 10:08:11,556 [MainThread  ] [INFO ]  PROGRESS: at sentence #88690000, processed 930932575 words, keeping 11002998 word types
2017-10-25 10:08:12,587 [MainThread  ] [INFO ]  PROGRESS: at sentence #88700000, processed 931034239 words, keeping 11003586 word types
2017-10-25 10:08:13,715 [MainThread  ] [INFO ]  PROGRESS: at sentence #88710000, processed 931147430 words, keeping 11004362 word types
2017-10-25 10:08:14,750 [MainThread  ] [INFO ]  

2017-10-25 10:09:13,567 [MainThread  ] [INFO ]  PROGRESS: at sentence #89260000, processed 936900194 words, keeping 11048589 word types
2017-10-25 10:09:14,739 [MainThread  ] [INFO ]  PROGRESS: at sentence #89270000, processed 937015028 words, keeping 11049218 word types
2017-10-25 10:09:15,859 [MainThread  ] [INFO ]  PROGRESS: at sentence #89280000, processed 937128026 words, keeping 11049956 word types
2017-10-25 10:09:16,953 [MainThread  ] [INFO ]  PROGRESS: at sentence #89290000, processed 937235268 words, keeping 11051240 word types
2017-10-25 10:09:17,994 [MainThread  ] [INFO ]  PROGRESS: at sentence #89300000, processed 937336417 words, keeping 11051986 word types
2017-10-25 10:09:19,055 [MainThread  ] [INFO ]  PROGRESS: at sentence #89310000, processed 937439218 words, keeping 11052760 word types
2017-10-25 10:09:20,097 [MainThread  ] [INFO ]  PROGRESS: at sentence #89320000, processed 937541115 words, keeping 11053272 word types
2017-10-25 10:09:21,161 [MainThread  ] [INFO ]  

2017-10-25 10:10:19,204 [MainThread  ] [INFO ]  PROGRESS: at sentence #89870000, processed 943334694 words, keeping 11098719 word types
2017-10-25 10:10:20,315 [MainThread  ] [INFO ]  PROGRESS: at sentence #89880000, processed 943443159 words, keeping 11099275 word types
2017-10-25 10:10:21,444 [MainThread  ] [INFO ]  PROGRESS: at sentence #89890000, processed 943553846 words, keeping 11100409 word types
2017-10-25 10:10:22,547 [MainThread  ] [INFO ]  PROGRESS: at sentence #89900000, processed 943664219 words, keeping 11101014 word types
2017-10-25 10:10:23,626 [MainThread  ] [INFO ]  PROGRESS: at sentence #89910000, processed 943769359 words, keeping 11102010 word types
2017-10-25 10:10:24,687 [MainThread  ] [INFO ]  PROGRESS: at sentence #89920000, processed 943872913 words, keeping 11102663 word types
2017-10-25 10:10:25,776 [MainThread  ] [INFO ]  PROGRESS: at sentence #89930000, processed 943976602 words, keeping 11103116 word types
2017-10-25 10:10:26,865 [MainThread  ] [INFO ]  

2017-10-25 10:11:25,507 [MainThread  ] [INFO ]  PROGRESS: at sentence #90480000, processed 949760947 words, keeping 11146610 word types
2017-10-25 10:11:26,608 [MainThread  ] [INFO ]  PROGRESS: at sentence #90490000, processed 949872058 words, keeping 11147355 word types
2017-10-25 10:11:27,645 [MainThread  ] [INFO ]  PROGRESS: at sentence #90500000, processed 949973450 words, keeping 11148107 word types
2017-10-25 10:11:28,764 [MainThread  ] [INFO ]  PROGRESS: at sentence #90510000, processed 950086409 words, keeping 11148772 word types
2017-10-25 10:11:29,848 [MainThread  ] [INFO ]  PROGRESS: at sentence #90520000, processed 950194474 words, keeping 11149550 word types
2017-10-25 10:11:30,914 [MainThread  ] [INFO ]  PROGRESS: at sentence #90530000, processed 950300260 words, keeping 11150296 word types
2017-10-25 10:11:31,984 [MainThread  ] [INFO ]  PROGRESS: at sentence #90540000, processed 950406932 words, keeping 11150836 word types
2017-10-25 10:11:33,056 [MainThread  ] [INFO ]  

2017-10-25 10:12:33,011 [MainThread  ] [INFO ]  PROGRESS: at sentence #91090000, processed 956248299 words, keeping 11198746 word types
2017-10-25 10:12:34,170 [MainThread  ] [INFO ]  PROGRESS: at sentence #91100000, processed 956353935 words, keeping 11199409 word types
2017-10-25 10:12:35,361 [MainThread  ] [INFO ]  PROGRESS: at sentence #91110000, processed 956464656 words, keeping 11200237 word types
2017-10-25 10:12:36,546 [MainThread  ] [INFO ]  PROGRESS: at sentence #91120000, processed 956572975 words, keeping 11201167 word types
2017-10-25 10:12:37,580 [MainThread  ] [INFO ]  PROGRESS: at sentence #91130000, processed 956661409 words, keeping 11202600 word types
2017-10-25 10:12:38,738 [MainThread  ] [INFO ]  PROGRESS: at sentence #91140000, processed 956769940 words, keeping 11203067 word types
2017-10-25 10:12:39,902 [MainThread  ] [INFO ]  PROGRESS: at sentence #91150000, processed 956877288 words, keeping 11204873 word types
2017-10-25 10:12:41,066 [MainThread  ] [INFO ]  

2017-10-25 10:13:42,867 [MainThread  ] [INFO ]  PROGRESS: at sentence #91700000, processed 962651907 words, keeping 11246888 word types
2017-10-25 10:13:43,955 [MainThread  ] [INFO ]  PROGRESS: at sentence #91710000, processed 962751091 words, keeping 11247612 word types
2017-10-25 10:13:45,083 [MainThread  ] [INFO ]  PROGRESS: at sentence #91720000, processed 962854764 words, keeping 11248228 word types
2017-10-25 10:13:46,257 [MainThread  ] [INFO ]  PROGRESS: at sentence #91730000, processed 962959640 words, keeping 11248620 word types
2017-10-25 10:13:47,377 [MainThread  ] [INFO ]  PROGRESS: at sentence #91740000, processed 963060580 words, keeping 11249610 word types
2017-10-25 10:13:48,576 [MainThread  ] [INFO ]  PROGRESS: at sentence #91750000, processed 963172112 words, keeping 11250547 word types
2017-10-25 10:13:49,733 [MainThread  ] [INFO ]  PROGRESS: at sentence #91760000, processed 963279403 words, keeping 11250929 word types
2017-10-25 10:13:50,857 [MainThread  ] [INFO ]  

2017-10-25 10:14:53,104 [MainThread  ] [INFO ]  PROGRESS: at sentence #92310000, processed 969072419 words, keeping 11298490 word types
2017-10-25 10:14:54,245 [MainThread  ] [INFO ]  PROGRESS: at sentence #92320000, processed 969177462 words, keeping 11298968 word types
2017-10-25 10:14:55,369 [MainThread  ] [INFO ]  PROGRESS: at sentence #92330000, processed 969280523 words, keeping 11299579 word types
2017-10-25 10:14:56,572 [MainThread  ] [INFO ]  PROGRESS: at sentence #92340000, processed 969393066 words, keeping 11300249 word types
2017-10-25 10:14:57,730 [MainThread  ] [INFO ]  PROGRESS: at sentence #92350000, processed 969500073 words, keeping 11300837 word types
2017-10-25 10:14:58,852 [MainThread  ] [INFO ]  PROGRESS: at sentence #92360000, processed 969602367 words, keeping 11301594 word types
2017-10-25 10:15:00,011 [MainThread  ] [INFO ]  PROGRESS: at sentence #92370000, processed 969709561 words, keeping 11302183 word types
2017-10-25 10:15:01,164 [MainThread  ] [INFO ]  

2017-10-25 10:16:03,338 [MainThread  ] [INFO ]  PROGRESS: at sentence #92920000, processed 975516101 words, keeping 11346494 word types
2017-10-25 10:16:04,504 [MainThread  ] [INFO ]  PROGRESS: at sentence #92930000, processed 975625433 words, keeping 11347220 word types
2017-10-25 10:16:05,653 [MainThread  ] [INFO ]  PROGRESS: at sentence #92940000, processed 975729114 words, keeping 11347700 word types
2017-10-25 10:16:06,803 [MainThread  ] [INFO ]  PROGRESS: at sentence #92950000, processed 975836909 words, keeping 11348335 word types
2017-10-25 10:16:07,945 [MainThread  ] [INFO ]  PROGRESS: at sentence #92960000, processed 975942778 words, keeping 11348966 word types
2017-10-25 10:16:09,075 [MainThread  ] [INFO ]  PROGRESS: at sentence #92970000, processed 976045760 words, keeping 11349644 word types
2017-10-25 10:16:10,233 [MainThread  ] [INFO ]  PROGRESS: at sentence #92980000, processed 976150844 words, keeping 11350422 word types
2017-10-25 10:16:11,409 [MainThread  ] [INFO ]  

2017-10-25 10:17:13,246 [MainThread  ] [INFO ]  PROGRESS: at sentence #93530000, processed 981902930 words, keeping 11395550 word types
2017-10-25 10:17:14,395 [MainThread  ] [INFO ]  PROGRESS: at sentence #93540000, processed 982008408 words, keeping 11396454 word types
2017-10-25 10:17:15,531 [MainThread  ] [INFO ]  PROGRESS: at sentence #93550000, processed 982113051 words, keeping 11396955 word types
2017-10-25 10:17:16,626 [MainThread  ] [INFO ]  PROGRESS: at sentence #93560000, processed 982213180 words, keeping 11397529 word types
2017-10-25 10:17:17,767 [MainThread  ] [INFO ]  PROGRESS: at sentence #93570000, processed 982319667 words, keeping 11398012 word types
2017-10-25 10:17:18,929 [MainThread  ] [INFO ]  PROGRESS: at sentence #93580000, processed 982427146 words, keeping 11398530 word types
2017-10-25 10:17:20,089 [MainThread  ] [INFO ]  PROGRESS: at sentence #93590000, processed 982535545 words, keeping 11399156 word types
2017-10-25 10:17:21,234 [MainThread  ] [INFO ]  

In [8]:
model.save('../data/lingvo/w2v_300_sg_5_w10_trigram')

2017-10-24 16:58:41,140 [MainThread  ] [INFO ]  saving Word2Vec object under ../data/lingvo/w2v_300_sg_5_w10_trigram, separately None
2017-10-24 16:58:41,141 [MainThread  ] [INFO ]  not storing attribute syn0norm
2017-10-24 16:58:41,142 [MainThread  ] [INFO ]  storing np array 'syn0' to ../data/lingvo/w2v_300_sg_5_w10_trigram.wv.syn0.npy
2017-10-24 16:58:59,723 [MainThread  ] [INFO ]  storing np array 'syn1neg' to ../data/lingvo/w2v_300_sg_5_w10_trigram.syn1neg.npy
2017-10-24 16:59:18,031 [MainThread  ] [INFO ]  not storing attribute cum_table
2017-10-24 16:59:46,558 [MainThread  ] [INFO ]  saved ../data/lingvo/w2v_300_sg_5_w10_trigram


In [11]:
for w ,s in model.wv.most_similar(u'стол'):
    print('%s %s' % (w,s))

нпм 0.297611892223
выполпеп 0.277081847191
130g 0.268700033426
балаковский_аэс 0.257275521755
(125/25 0.256969988346
оранжевый_спектросмещать 0.253416627645
замена_к32е. 0.252790808678
сверхразведение_матричный 0.250886440277
цикло-(D-Val-NMeArg-Gly-Asp-Mamb(5-aca 0.249853163958
аэро_космоснимок 0.24928149581


# Save for Misha

In [5]:
trigram_ph = gensim.models.phrases.Phraser.load('../data/lingvo/trigram_ph')

2017-10-24 09:33:56,826 [MainThread  ] [INFO ]  loading Phraser object from ../data/lingvo/trigram_ph
2017-10-24 09:33:58,577 [MainThread  ] [INFO ]  loaded ../data/lingvo/trigram_ph


In [29]:
c = 0
l = []
for k,v in tqdm_notebook(trigram_ph.phrasegrams.items()):
    if v[0] > 10 and v[1] > 30:
        the_string = '_'.join(k)
        if re.search('[а-яА-Яa-zA-Z]', k[0]) and re.search('[а-яА-Яa-zA-Z]', k[1]):
            c+=1
            l.append([the_string, v[0], v[1]])
#         print('%s_%s %s' % (k[0], k[1], v))
        
df = pd.DataFrame(l)




In [None]:
# df.sort_values(by=1, ascending=False)

In [30]:
len(df[0].unique())

1279338

In [34]:
with GzipFile('../data/collocations_bigram_all.txt.gz', 'w') as f:
    for tag in sorted(df[0].unique()):
        f.write(tag + '\n')

In [2]:
import xml.etree.ElementTree as ET

tree = ET.parse('../data/TestDocumentsResult.xml')
root = tree.getroot()

In [30]:
gold = {}
for child in root:
    sims = []
    for similars in child:
        for sim in similars:
            sims.append(sim.attrib['docId'])
    gold[child.attrib['docId']] = sims