In [1]:
# mask v2 0.80463
import pandas as pd
import numpy as np
import gc
from base import Cache
from tqdm import tqdm
from gensim.models import Word2Vec
import sys
from tensorflow.keras.preprocessing.sequence import pad_sequences
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('precision', 5)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
pd.set_option('max_colwidth', 200)
pd.set_option('display.width', 5000)
# # log
# class Logger(object):
#     def __init__(self, fileN="Default.log"):
#         self.terminal = sys.stdout
#         self.log = open(fileN, "a", encoding='utf-8')
# 
#     def write(self, message):
#         self.terminal.write(message)
#         self.log.write(message)
# 
#     def flush(self):
#         pass
# sys.stdout = Logger("zlh0918log.txt")

def reduce_mem(df, use_float16=False):
    start_mem = df.memory_usage().sum() / 1024**2
    tm_cols = df.select_dtypes('datetime').columns
    for col in df.columns:
        if col in tm_cols:
            continue
        col_type = df[col].dtypes
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(
                        np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(
                        np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(
                        np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(
                        np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if use_float16 and c_min > np.finfo(
                        np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(
                        np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    print('{:.2f} Mb, {:.2f} Mb ({:.2f} %)'.format(
        start_mem, end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

def w2v_pro(df_raw, sentence_id, word_id, emb_size=128, 
            dropna=False, n_jobs=16, method='cbow', 
            hs=1,negative=0,epoch=10,return_model=True):
    if method.lower() in ['sg','skipgram']:
        sg=1
        logger.info("## Use skip-gram ##")
    elif method.lower() in ['cbow']:
        sg=0
        logger.info("## Use CBOW ##")
    else:
        raise NotImplementedError
    list_col_nm = f'{sentence_id}__{word_id}_list'
    if (n_jobs is None) or (n_jobs <= 0):
        n_jobs = multiprocessing.cpu_count()
    logger.info(f"========== W2V:  {sentence_id} {word_id} ==========")
    df = df_raw[[sentence_id, word_id]].copy()
    if df[sentence_id].isnull().sum() > 0:
        logger.warning("NaNs exist in sentence_id column!!")
    if dropna:
        df = df.dropna(subset=[sentence_id, word_id])
    else:
        df = df.fillna('NULL')
    df = df.astype(str)
    tmp = df.groupby(sentence_id,
                     as_index=False)[word_id].agg({list_col_nm: list})
    sentences = tmp[list_col_nm].values.tolist()
    all_words_vocabulary = df[word_id].unique().tolist()
    del tmp[list_col_nm]
    gc.collect()
    model = Word2Vec(
        sentences,
        size=emb_size,
        window=30,
        workers=n_jobs,
        min_count=1,  # 最低词频. min_count>1会出现OOV
        sg=sg,  # 1 for skip-gram; otherwise CBOW.
        hs=hs,  # If 1, hierarchical softmax will be used for model training
        negative=negative,  # hs=1 + negative 负采样
        iter=epoch,
        seed=0)
    
    # get word embedding matrix
    emb_dict = {}
    for word_i in all_words_vocabulary:
        if word_i in model.wv:
            emb_dict[word_i] = model.wv[word_i]
        else:
            emb_dict[word_i] = np.zeros(emb_size)
            
    return {"word_emb_dict": emb_dict}

def get_sequence(data,col,max_len=None):
    key2index = {}
    def split(x):
        for key in x:
            if key not in key2index:
                # Notice : input value 0 is a special "padding", 
                # so we do not use 0 to encode valid feature for sequence input
                key2index[key] = len(key2index) + 1 # 从1开始，0用于padding
        return list(map(lambda x: key2index[x], x))
    
    # preprocess the sequence feature
    id_list = list(map(split, data[col].values))# 转index
    id_list_length = np.array(list(map(len, id_list)))
    # max_len = max(genres_length)
    if max_len is None:
        max_len = int(np.percentile(id_list_length,99))
    id_list = pad_sequences(id_list, maxlen=max_len, padding='post',truncating='post')
    return id_list,key2index

def gen_list_df(feature):
    print(f'{feature} start!')
    data = Cache.reload_cache('CACHE_data_step_1_feature_0917_r5.pkl')
    if feature =='label':
        data['label'] = data['label'].fillna(2).astype(int)# mask 2
    data = data[['uid',feature,'pt_d']]
    gc.collect()
    print(data.shape)
    data_group = data.groupby(['uid'])
    gc.collect()
    index_list = []
    feature_list = []
    print('index_list start')
    for name,group in tqdm(data_group):
        index_list.append(name)    
    print('feature_list start')
    for i in tqdm(index_list):
        index_get_group = data_group.get_group(i)
        ptd_set = set(index_get_group['pt_d'].values.flatten().tolist())
        for j in ptd_set:
            feature_list_ = []
            buf_list = []
            buf_list = index_get_group.query('pt_d < @j')[feature].values.flatten().tolist()
            buf_list.append(2)# padding 0
            feature_list_.append(buf_list)# 行为序列
            feature_list_.append(j)# pt_d
            feature_list_.append(i)# uid
            feature_list.append(feature_list_)

    list_df = pd.DataFrame(feature_list)
    del index_list,feature_list,feature_list_,data_group,index_get_group,ptd_set
    gc.collect()
    list_df.columns=['list','pt_d','uid']
    list_df['list'] = list_df['list'].map(lambda x: [str(i) for i in x])# 转str
    list_df = list_df.drop_duplicates(subset=['pt_d','uid'])
#     data_uid_ptd = data[['uid','pt_d']]
    list_df = data.merge(list_df,how='left',on=('uid','pt_d'))# 顺序还是用data的顺序
    # 加入当天本样本
    if feature!='label':
        list_df['list'] = list_df[feature].map(lambda x:[str(x)]) + list_df['list']
    print('w2v start!')
    emb_size = 32
    model = Word2Vec(
    list_df['list'].values.tolist(),
    size=emb_size,
    window=5,
    workers=5,
    min_count=1,  # 最低词频. min_count>1会出现OOV
    sg=0,  # 1 for skip-gram; otherwise CBOW.
    hs=0,  # If 1, hierarchical softmax will be used for model training
    negative=5,  # hs=1 + negative 负采样
    iter=5,
    seed=0)
    # 1 获取seq
    id_list,key2index = get_sequence(list_df,'list',max_len=40)
    # 2 获取key2index
    emb_dict = {}
    for word_i in list(model.wv.vocab.keys()):
        if word_i in model.wv:
            emb_dict[word_i] = model.wv[word_i]
        else:
            emb_dict[word_i] = np.zeros(emb_size)
    # 3 保存
    id_list_dict={}
    id_list_dict['id_list'] = id_list
    id_list_dict['key2index'] = key2index
    id_list_dict['emb'] = emb_dict
    Cache.cache_data(id_list_dict, nm_marker=f'EMB_INPUTSEQ_V2_{feature}')
    print(f'{feature} done!')

from multiprocessing import Pool
if __name__ == '__main__':
    # 获取过去的list + 当前的一行
    # 得到id_list_dict和tx一样
    poc_feature_list = ['creat_type_cd','tags','spread_app_id','task_id','adv_id','label']#'task_id','adv_id','dev_id','inter_type_cd','spread_app_id','tags','app_first_class','app_second_class','his_app_size','his_on_shelf_time','app_score',,'creat_type_cd','adv_prim_id','indu_name'
    with Pool(6) as p:
        p.map(gen_list_df, poc_feature_list)


spread_app_id start!
label start!
task_id start!
creat_type_cd start!
adv_id start!
tags start!


[2020-09-19 20:07:41] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl
[2020-09-19 20:07:41] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl
[2020-09-19 20:07:41] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl
[2020-09-19 20:07:42] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl
[2020-09-19 20:07:42] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl
[2020-09-19 20:07:42] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl


(8601298, 3)
(8601298, 3)
index_list start
(8601298, 3)
index_list start
index_list start
(8601298, 3)
index_list start
(8601298, 3)
(8601298, 3)
index_list start
index_list start


100%|██████████| 1139171/1139171 [01:57<00:00, 9706.09it/s] 


feature_list start


100%|██████████| 1139171/1139171 [01:57<00:00, 9654.81it/s] 


feature_list start


 91%|█████████▏| 1040363/1139171 [01:57<00:10, 9462.32it/s]


feature_list start


  0%|          | 2225/1139171 [00:10<1:20:35, 235.10it/s]s]


feature_list start


100%|██████████| 1139171/1139171 [02:08<00:00, 8860.80it/s]


feature_list start


100%|██████████| 1139171/1139171 [02:09<00:00, 8767.73it/s]


feature_list start


100%|██████████| 1139171/1139171 [1:24:14<00:00, 225.36it/s]
100%|██████████| 1139171/1139171 [1:24:23<00:00, 224.97it/s]
100%|██████████| 1139171/1139171 [1:24:38<00:00, 224.33it/s]
 96%|█████████▌| 1088706/1139171 [1:24:49<04:23, 191.82it/s]

w2v start!


 96%|█████████▌| 1091238/1139171 [1:24:48<03:17, 242.29it/s][2020-09-19 21:35:12] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-19 21:35:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-19 21:35:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #10000, processed 115381 words, keeping 8 word types
[2020-09-19 21:35:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #20000, processed 222690 words, keeping 8 word types
 96%|█████████▌| 1093549/1139171 [1:24:50<03:40, 207.00it/s][2020-09-19 21:35:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #30000, processed 322708 words, keeping 8 word types
[2020-09-19 21:35:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #40000, processed 436895 words, keeping 8 word types
 96%|█████████▌| 1088755/1139171 [1:24:50<03:55, 214.33it/s][2020-09-19 21:35:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #50000, p

[2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #530000, processed 5532556 words, keeping 8 word types
 96%|█████████▌| 1091445/1139171 [1:24:48<03:22, 235.17it/s][2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #540000, processed 5635955 words, keeping 8 word types
 96%|█████████▌| 1088962/1139171 [1:24:50<03:21, 249.29it/s][2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #550000, processed 5744999 words, keeping 8 word types
 96%|█████████▌| 1093767/1139171 [1:24:51<03:14, 232.88it/s][2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #560000, processed 5852758 words, keeping 8 word types
[2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #570000, processed 5962024 words, keeping 8 word types
[2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #580000, processed 6076133 words, keeping 8 word types
[2020-09-19 21:35:13] - word

[2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1060000, processed 11266552 words, keeping 8 word types
[2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1070000, processed 11382741 words, keeping 8 word types
[2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1080000, processed 11498953 words, keeping 8 word types
 96%|█████████▌| 1089185/1139171 [1:24:51<03:17, 253.07it/s][2020-09-19 21:35:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1090000, processed 11611308 words, keeping 8 word types
[2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1100000, processed 11718567 words, keeping 8 word types
 96%|█████████▌| 1091634/1139171 [1:24:49<03:46, 210.34it/s][2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1110000, processed 11822060 words, keeping 8 word types
 96%|█████████▌| 1093985/1139171 [1:24:52<03:20, 224.94it/s][2020-09-19 21:3

 96%|█████████▌| 1089315/1139171 [1:24:52<04:42, 176.36it/s][2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1590000, processed 16930924 words, keeping 8 word types
[2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1600000, processed 17042812 words, keeping 8 word types
 96%|█████████▌| 1094158/1139171 [1:24:53<03:26, 218.33it/s][2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1610000, processed 17151051 words, keeping 8 word types
[2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1620000, processed 17257240 words, keeping 8 word types
[2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1630000, processed 17362433 words, keeping 8 word types
 96%|█████████▌| 1091815/1139171 [1:24:50<03:39, 216.01it/s][2020-09-19 21:35:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1640000, processed 17480225 words, keeping 8 word types
[2020-09-19 21:3

 96%|█████████▌| 1094325/1139171 [1:24:54<03:29, 214.46it/s][2020-09-19 21:35:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2100000, processed 22409690 words, keeping 8 word types
[2020-09-19 21:35:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2110000, processed 22511849 words, keeping 8 word types
 96%|█████████▌| 1091976/1139171 [1:24:51<03:53, 201.84it/s][2020-09-19 21:35:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2120000, processed 22623159 words, keeping 8 word types
[2020-09-19 21:35:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2130000, processed 22736758 words, keeping 8 word types
 96%|█████████▌| 1089478/1139171 [1:24:53<04:27, 185.89it/s][2020-09-19 21:35:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2140000, processed 22849050 words, keeping 8 word types
 96%|█████████▌| 1094354/1139171 [1:24:54<03:14, 230.77it/s][2020-09-19 21:35:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2150000, pro

[2020-09-19 21:35:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2610000, processed 27838082 words, keeping 8 word types
[2020-09-19 21:35:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2620000, processed 27951863 words, keeping 8 word types
 96%|█████████▌| 1094550/1139171 [1:24:55<03:08, 236.30it/s][2020-09-19 21:35:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2630000, processed 28056717 words, keeping 8 word types
 96%|█████████▌| 1092157/1139171 [1:24:52<03:49, 205.08it/s][2020-09-19 21:35:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2640000, processed 28156903 words, keeping 8 word types
 96%|█████████▌| 1089654/1139171 [1:24:54<03:59, 206.48it/s][2020-09-19 21:35:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2650000, processed 28263550 words, keeping 8 word types
[2020-09-19 21:35:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2660000, processed 28368335 words, keeping 8 word types
[2020-09-19 21:3

[2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3130000, processed 33434884 words, keeping 8 word types
[2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3140000, processed 33543120 words, keeping 8 word types
[2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3150000, processed 33640547 words, keeping 8 word types
 96%|█████████▌| 1092320/1139171 [1:24:53<04:23, 177.53it/s][2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3160000, processed 33740852 words, keeping 8 word types
[2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3170000, processed 33853471 words, keeping 8 word types
[2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3180000, processed 33971014 words, keeping 8 word types
 96%|█████████▌| 1089846/1139171 [1:24:55<03:47, 217.17it/s][2020-09-19 21:35:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence

[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3670000, processed 39220050 words, keeping 8 word types
[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3680000, processed 39320908 words, keeping 8 word types
[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3690000, processed 39430050 words, keeping 8 word types
[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3700000, processed 39521597 words, keeping 8 word types
[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3710000, processed 39616131 words, keeping 8 word types
 96%|█████████▌| 1090008/1139171 [1:24:55<04:04, 201.06it/s][2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3720000, processed 39721055 words, keeping 8 word types
 96%|█████████▌| 1094939/1139171 [1:24:56<03:49, 192.43it/s][2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence

 96%|█████████▌| 1095122/1139171 [1:24:57<03:12, 228.44it/s][2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4190000, processed 44717526 words, keeping 8 word types
[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4200000, processed 44819273 words, keeping 8 word types
[2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4210000, processed 44922783 words, keeping 8 word types
 96%|█████████▌| 1092677/1139171 [1:24:54<03:30, 220.85it/s][2020-09-19 21:35:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4220000, processed 45027207 words, keeping 8 word types
[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4230000, processed 45125600 words, keeping 8 word types
[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4240000, processed 45225944 words, keeping 8 word types
 96%|█████████▌| 1090191/1139171 [1:24:56<03:43, 218.82it/s][2020-09-19 21:3

[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4730000, processed 50513187 words, keeping 8 word types
 96%|█████████▌| 1090370/1139171 [1:24:57<03:22, 241.43it/s][2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4740000, processed 50620482 words, keeping 8 word types
[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4750000, processed 50727251 words, keeping 8 word types
[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4760000, processed 50839878 words, keeping 8 word types
 96%|█████████▌| 1092894/1139171 [1:24:55<03:01, 255.65it/s][2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4770000, processed 50940561 words, keeping 8 word types
[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4780000, processed 51056086 words, keeping 8 word types
[2020-09-19 21:35:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence

 96%|█████████▌| 1090550/1139171 [1:24:58<03:19, 243.33it/s][2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5260000, processed 56286469 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5270000, processed 56388419 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5280000, processed 56493575 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5290000, processed 56597709 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5300000, processed 56703158 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5310000, processed 56798684 words, keeping 8 word types
 96%|█████████▌| 1093084/1139171 [1:24:56<03:14, 236.70it/s][2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence

w2v start!


[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5480000, processed 58641980 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5490000, processed 58749740 words, keeping 8 word types
[2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5500000, processed 58857366 words, keeping 8 word types
 96%|█████████▌| 1093154/1139171 [1:24:56<03:27, 221.79it/s][2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5510000, processed 58973747 words, keeping 8 word types
 96%|█████████▌| 1090658/1139171 [1:24:58<03:11, 253.54it/s][2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5520000, processed 59084670 words, keeping 8 word types
 96%|█████████▌| 1095548/1139171 [1:24:59<03:49, 189.90it/s][2020-09-19 21:35:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5530000, processed 59179202 words, keeping 8 word types
[2020-09-19 21:3

[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #280000, processed 2934371 words, keeping 31 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5760000, processed 61638883 words, keeping 8 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #290000, processed 3048037 words, keeping 31 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5770000, processed 61749809 words, keeping 8 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #300000, processed 3149852 words, keeping 31 word types
 96%|█████████▌| 1090764/1139171 [1:24:59<04:02, 200.01it/s][2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5780000, processed 61872829 words, keeping 8 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #310000, processed 3241969 words, keeping 31 word types
[2020-

[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #560000, processed 5852758 words, keeping 31 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6050000, processed 64775207 words, keeping 8 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #570000, processed 5962024 words, keeping 31 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #580000, processed 6076133 words, keeping 31 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6060000, processed 64883564 words, keeping 8 word types
 96%|█████████▌| 1090856/1139171 [1:24:59<03:50, 209.63it/s][2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6070000, processed 64985275 words, keeping 8 word types
[2020-09-19 21:35:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #590000, processed 6176995 words, keeping 31 word types
[2020-

[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6310000, processed 67580111 words, keeping 8 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #860000, processed 9075577 words, keeping 31 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6320000, processed 67694334 words, keeping 8 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #870000, processed 9179635 words, keeping 31 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6330000, processed 67802610 words, keeping 8 word types
 96%|█████████▌| 1090950/1139171 [1:25:00<03:46, 212.60it/s][2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #880000, processed 9291939 words, keeping 31 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6340000, processed 67899719 words, keeping 8 word types
[2020

 96%|█████████▌| 1091045/1139171 [1:25:00<03:41, 217.04it/s][2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6590000, processed 70524208 words, keeping 8 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1150000, processed 12252873 words, keeping 31 word types
 96%|█████████▌| 1096008/1139171 [1:25:01<02:41, 267.46it/s][2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1160000, processed 12372410 words, keeping 31 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6600000, processed 70621601 words, keeping 8 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1170000, processed 12476925 words, keeping 31 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6610000, processed 70719478 words, keeping 8 word types
[2020-09-19 21:35:22] - word2vec.py[line:1384] - INFO: PROGRESS: at sente

[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1450000, processed 15466430 words, keeping 31 word types
 96%|█████████▌| 1096119/1139171 [1:25:01<02:56, 244.35it/s][2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6850000, processed 73248034 words, keeping 8 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1460000, processed 15578069 words, keeping 31 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6860000, processed 73351897 words, keeping 8 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1470000, processed 15685123 words, keeping 31 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1480000, processed 15785319 words, keeping 31 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6870000, processed 73468487 words, keeping 8 word type

[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7100000, processed 75988112 words, keeping 8 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7110000, processed 76086751 words, keeping 8 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1770000, processed 18847591 words, keeping 31 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1780000, processed 18957244 words, keeping 31 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7120000, processed 76212871 words, keeping 8 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7130000, processed 76314205 words, keeping 8 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1790000, processed 19064518 words, keeping 31 word types
[2020-09-19 21:35:23] - word2vec.py[line:1384] - INFO: PROG

[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2040000, processed 21754971 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7400000, processed 79258561 words, keeping 8 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2050000, processed 21857570 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7410000, processed 79368601 words, keeping 8 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2060000, processed 21972940 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7420000, processed 79473041 words, keeping 8 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2070000, processed 22082021 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PRO

 96%|█████████▌| 1093990/1139171 [1:25:00<03:15, 230.62it/s][2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2300000, processed 24548313 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7710000, processed 82570428 words, keeping 8 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2310000, processed 24658500 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7720000, processed 82687657 words, keeping 8 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7730000, processed 82789826 words, keeping 8 word types
 96%|█████████▌| 1091418/1139171 [1:25:02<04:23, 181.30it/s][2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2320000, processed 24761243 words, keeping 31 word types
[2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sente

 96%|█████████▌| 1094092/1139171 [1:25:00<03:07, 240.49it/s][2020-09-19 21:35:24] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2530000, processed 26997406 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8040000, processed 86058185 words, keeping 8 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2540000, processed 27093965 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8050000, processed 86164090 words, keeping 8 word types
 96%|█████████▌| 1091522/1139171 [1:25:02<03:26, 230.69it/s][2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8060000, processed 86260222 words, keeping 8 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2550000, processed 27193884 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sente

[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8360000, processed 89482379 words, keeping 8 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2780000, processed 29710779 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8370000, processed 89584073 words, keeping 8 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2790000, processed 29818722 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8380000, processed 89690011 words, keeping 8 word types
 96%|█████████▌| 1091624/1139171 [1:25:03<03:25, 231.50it/s][2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2800000, processed 29928703 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8390000, processed 89806167 words, keeping 8 word types

[2020-09-19 21:35:25] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 92062788 word corpus (100% of original 92062788, drops 0)
[2020-09-19 21:35:25] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 8 items
[2020-09-19 21:35:25] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3070000, processed 32805129 words, keeping 31 word types
[2020-09-19 21:35:25] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-19 21:35:25] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 7254971 word corpus (7.9% of prior 92062788)
[2020-09-19 21:35:25] - base_any2vec.py[line:1008] - INFO: estimated required memory for 8 words and 32 dimensions: 6048 bytes
[2020-09-19 21:35:25] - word2vec.py[line:1699] - INFO: resetting layer weights
[2020-09-19 21:35:25] - base_any2vec.py[line:1196] - INFO: training model with 5 workers on 8 vocabulary and 32 features, using sg=0 hs=0 sample=0.001 negative=5 window=5
 96%|█████████▋

 96%|█████████▌| 1094419/1139171 [1:25:02<03:21, 221.85it/s][2020-09-19 21:35:26] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3530000, processed 37726563 words, keeping 31 word types
 96%|█████████▋| 1096931/1139171 [1:25:05<02:51, 246.29it/s][2020-09-19 21:35:26] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3540000, processed 37827474 words, keeping 31 word types
[2020-09-19 21:35:26] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3550000, processed 37937925 words, keeping 31 word types
[2020-09-19 21:35:26] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3560000, processed 38047617 words, keeping 31 word types
[2020-09-19 21:35:26] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3570000, processed 38154024 words, keeping 31 word types
[2020-09-19 21:35:26] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3580000, processed 38261841 words, keeping 31 word types
 96%|█████████▌| 1091912/1139171 [1:25:04<03:46, 208.99it/s][2020-09-1

 96%|█████████▋| 1097146/1139171 [1:25:06<02:57, 236.20it/s][2020-09-19 21:35:27] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4050000, processed 43232237 words, keeping 31 word types
[2020-09-19 21:35:27] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4060000, processed 43339822 words, keeping 31 word types
 96%|█████████▌| 1092067/1139171 [1:25:05<04:03, 193.30it/s][2020-09-19 21:35:27] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4070000, processed 43451324 words, keeping 31 word types
 96%|█████████▌| 1094619/1139171 [1:25:03<03:36, 205.43it/s][2020-09-19 21:35:27] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4080000, processed 43557540 words, keeping 31 word types
[2020-09-19 21:35:27] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4090000, processed 43664319 words, keeping 31 word types
[2020-09-19 21:35:27] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4100000, processed 43771202 words, keeping 31 word types
[2020-09-1

 96%|█████████▌| 1094781/1139171 [1:25:04<03:49, 193.50it/s][2020-09-19 21:35:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4560000, processed 48652327 words, keeping 31 word types
[2020-09-19 21:35:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4570000, processed 48762451 words, keeping 31 word types
[2020-09-19 21:35:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4580000, processed 48879638 words, keeping 31 word types
[2020-09-19 21:35:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4590000, processed 48997815 words, keeping 31 word types
 96%|█████████▋| 1097347/1139171 [1:25:06<03:18, 210.22it/s][2020-09-19 21:35:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4600000, processed 49106240 words, keeping 31 word types
 96%|█████████▌| 1092249/1139171 [1:25:06<03:30, 222.49it/s][2020-09-19 21:35:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4610000, processed 49217394 words, keeping 31 word types
 96%|█████

 96%|█████████▋| 1097524/1139171 [1:25:07<03:14, 214.48it/s][2020-09-19 21:35:29] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5070000, processed 54228840 words, keeping 31 word types
[2020-09-19 21:35:29] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5080000, processed 54350933 words, keeping 31 word types
 96%|█████████▌| 1092454/1139171 [1:25:06<03:21, 231.33it/s][2020-09-19 21:35:29] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5090000, processed 54476991 words, keeping 31 word types
 96%|█████████▌| 1094978/1139171 [1:25:05<03:38, 201.96it/s][2020-09-19 21:35:29] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5100000, processed 54584228 words, keeping 31 word types
[2020-09-19 21:35:29] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5110000, processed 54688389 words, keeping 31 word types
[2020-09-19 21:35:29] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5120000, processed 54792498 words, keeping 31 word types
 96%|█████

 96%|█████████▌| 1095177/1139171 [1:25:05<02:52, 254.50it/s][2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5570000, processed 59599355 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5580000, processed 59702761 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5590000, processed 59803085 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5600000, processed 59907306 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5610000, processed 60004764 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5620000, processed 60115290 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5630000, processed 60221816 words, keeping 31 word t

 96%|█████████▌| 1092792/1139171 [1:25:08<03:32, 218.76it/s][2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6070000, processed 64985275 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6080000, processed 65094365 words, keeping 31 word types
 96%|█████████▌| 1095384/1139171 [1:25:06<02:57, 246.64it/s][2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6090000, processed 65199600 words, keeping 31 word types
[2020-09-19 21:35:30] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6100000, processed 65326562 words, keeping 31 word types
 96%|█████████▋| 1097932/1139171 [1:25:09<02:56, 233.48it/s][2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6110000, processed 65432177 words, keeping 31 word types
[2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6120000, processed 65545412 words, keeping 31 word types
 96%|█████

[2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6580000, processed 70415645 words, keeping 31 word types
[2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6590000, processed 70524208 words, keeping 31 word types
 96%|█████████▌| 1093035/1139171 [1:25:09<02:59, 256.94it/s][2020-09-19 21:35:31] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 18.51% examples, 222071 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6600000, processed 70621601 words, keeping 31 word types
[2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6610000, processed 70719478 words, keeping 31 word types
 96%|█████████▌| 1095577/1139171 [1:25:07<03:50, 188.93it/s][2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6620000, processed 70817091 words, keeping 31 word types
[2020-09-19 21:35:31] - word2vec.py[line:1384] - INFO: PROGRESS: a

[2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7110000, processed 76086751 words, keeping 31 word types
[2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7120000, processed 76212871 words, keeping 31 word types
 96%|█████████▌| 1093227/1139171 [1:25:10<03:21, 228.30it/s][2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7130000, processed 76314205 words, keeping 31 word types
 96%|█████████▌| 1095752/1139171 [1:25:08<03:33, 203.43it/s][2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7140000, processed 76424836 words, keeping 31 word types
[2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7150000, processed 76533722 words, keeping 31 word types
[2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7160000, processed 76648221 words, keeping 31 word types
[2020-09-19 21:35:32] - word2vec.py[line:1384] - INFO: PROGRESS: at se

 96%|█████████▌| 1093446/1139171 [1:25:11<03:18, 229.84it/s][2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7630000, processed 81697502 words, keeping 31 word types
[2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7640000, processed 81804326 words, keeping 31 word types
[2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7650000, processed 81910518 words, keeping 31 word types
[2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7660000, processed 82019221 words, keeping 31 word types
[2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7670000, processed 82125007 words, keeping 31 word types
[2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7680000, processed 82237593 words, keeping 31 word types
 96%|█████████▌| 1095976/1139171 [1:25:09<02:59, 240.67it/s][2020-09-19 21:35:33] - word2vec.py[line:1384] - INFO: PROGRESS: at se

 96%|█████████▌| 1096165/1139171 [1:25:10<02:57, 242.65it/s][2020-09-19 21:35:34] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8130000, processed 86997836 words, keeping 31 word types
 96%|█████████▋| 1098722/1139171 [1:25:13<02:59, 225.08it/s][2020-09-19 21:35:34] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8140000, processed 87108512 words, keeping 31 word types
[2020-09-19 21:35:34] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8150000, processed 87207679 words, keeping 31 word types
[2020-09-19 21:35:34] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8160000, processed 87327702 words, keeping 31 word types
 96%|█████████▌| 1093639/1139171 [1:25:12<03:34, 212.30it/s][2020-09-19 21:35:34] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8170000, processed 87439263 words, keeping 31 word types
[2020-09-19 21:35:34] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8180000, processed 87541749 words, keeping 31 word types
[2020-09-1

[2020-09-19 21:35:35] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 31 items
[2020-09-19 21:35:35] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 18 most-common words
[2020-09-19 21:35:35] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 12467977 word corpus (13.5% of prior 92062788)
[2020-09-19 21:35:35] - base_any2vec.py[line:1008] - INFO: estimated required memory for 31 words and 32 dimensions: 23436 bytes
[2020-09-19 21:35:35] - word2vec.py[line:1699] - INFO: resetting layer weights
[2020-09-19 21:35:35] - base_any2vec.py[line:1196] - INFO: training model with 5 workers on 31 vocabulary and 32 features, using sg=0 hs=0 sample=0.001 negative=5 window=5
 96%|█████████▌| 1096372/1139171 [1:25:11<03:07, 228.67it/s]

w2v start!


 96%|█████████▌| 1096395/1139171 [1:25:11<03:16, 217.81it/s][2020-09-19 21:35:35] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-19 21:35:35] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
 96%|█████████▌| 1093872/1139171 [1:25:13<02:57, 254.62it/s][2020-09-19 21:35:35] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #10000, processed 115381 words, keeping 69 word types
[2020-09-19 21:35:35] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #20000, processed 222690 words, keeping 71 word types
 96%|█████████▋| 1098934/1139171 [1:25:14<03:25, 195.45it/s][2020-09-19 21:35:35] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #30000, processed 322708 words, keeping 72 word types
[2020-09-19 21:35:35] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #40000, processed 436895 words, keeping 74 word types
[2020-09-19 21:35:35] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5000

 96%|█████████▋| 1096604/1139171 [1:25:12<02:53, 245.08it/s][2020-09-19 21:35:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #510000, processed 5335418 words, keeping 74 word types
[2020-09-19 21:35:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #520000, processed 5437517 words, keeping 74 word types
[2020-09-19 21:35:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #530000, processed 5532556 words, keeping 74 word types
[2020-09-19 21:35:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #540000, processed 5635955 words, keeping 74 word types
 96%|█████████▋| 1099129/1139171 [1:25:14<02:53, 230.52it/s][2020-09-19 21:35:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #550000, processed 5744999 words, keeping 74 word types
 96%|█████████▌| 1094042/1139171 [1:25:14<04:18, 174.40it/s][2020-09-19 21:35:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #560000, processed 5852758 words, keeping 74 word types
 96%|█████████▋| 10966

 96%|█████████▋| 1099294/1139171 [1:25:15<03:19, 200.17it/s][2020-09-19 21:35:37] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1020000, processed 10839774 words, keeping 74 word types
[2020-09-19 21:35:37] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 5.78% examples, 353344 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:35:37] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1030000, processed 10944295 words, keeping 74 word types
 96%|█████████▌| 1094215/1139171 [1:25:15<03:39, 205.26it/s][2020-09-19 21:35:37] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1040000, processed 11047323 words, keeping 74 word types
[2020-09-19 21:35:37] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1050000, processed 11157886 words, keeping 74 word types
[2020-09-19 21:35:37] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1060000, processed 11266552 words, keeping 74 word types
[2020-09-19 21:35:37] - word2vec.py[line:1384] - INFO: PROGRESS: at

 96%|█████████▌| 1094433/1139171 [1:25:15<02:50, 262.90it/s][2020-09-19 21:35:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1520000, processed 16178505 words, keeping 74 word types
[2020-09-19 21:35:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1530000, processed 16283476 words, keeping 74 word types
 96%|█████████▋| 1097003/1139171 [1:25:14<03:25, 204.88it/s][2020-09-19 21:35:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1540000, processed 16400828 words, keeping 74 word types
 97%|█████████▋| 1099466/1139171 [1:25:16<03:48, 173.93it/s][2020-09-19 21:35:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1550000, processed 16505412 words, keeping 74 word types
[2020-09-19 21:35:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1560000, processed 16601138 words, keeping 74 word types
[2020-09-19 21:35:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1570000, processed 16701033 words, keeping 74 word types
[2020-09-1

 96%|█████████▋| 1097186/1139171 [1:25:14<03:17, 212.42it/s][2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2030000, processed 21637385 words, keeping 74 word types
[2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2040000, processed 21754971 words, keeping 74 word types
 97%|█████████▋| 1099623/1139171 [1:25:17<03:32, 186.49it/s][2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2050000, processed 21857570 words, keeping 74 word types
[2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2060000, processed 21972940 words, keeping 74 word types
 96%|█████████▌| 1094679/1139171 [1:25:16<02:52, 258.18it/s][2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2070000, processed 22082021 words, keeping 74 word types
[2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2080000, processed 22195696 words, keeping 74 word types
 96%|█████

[2020-09-19 21:35:39] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 42.14% examples, 217155 words/s, in_qsize 10, out_qsize 2
[2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2530000, processed 26997406 words, keeping 74 word types
 96%|█████████▋| 1097358/1139171 [1:25:15<03:36, 193.12it/s][2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2540000, processed 27093965 words, keeping 74 word types
[2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2550000, processed 27193884 words, keeping 74 word types
 96%|█████████▌| 1094888/1139171 [1:25:17<03:06, 237.69it/s][2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2560000, processed 27299380 words, keeping 74 word types
 97%|█████████▋| 1099830/1139171 [1:25:18<03:11, 205.34it/s][2020-09-19 21:35:39] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2570000, processed 27410812 words, keeping 74 word types
[2020

[2020-09-19 21:35:40] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3040000, processed 32497863 words, keeping 74 word types
[2020-09-19 21:35:40] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3050000, processed 32603422 words, keeping 74 word types
[2020-09-19 21:35:40] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3060000, processed 32709503 words, keeping 74 word types
 97%|█████████▋| 1099977/1139171 [1:25:19<03:48, 171.59it/s][2020-09-19 21:35:40] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3070000, processed 32805129 words, keeping 74 word types
 96%|█████████▋| 1097530/1139171 [1:25:16<03:39, 189.74it/s][2020-09-19 21:35:40] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3080000, processed 32911269 words, keeping 74 word types
[2020-09-19 21:35:40] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3090000, processed 33016163 words, keeping 74 word types
 96%|█████████▌| 1095081/1139171 [1:25:18<03:46, 194.81it/s][2020-09-1

[2020-09-19 21:35:41] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3540000, processed 37827474 words, keeping 74 word types
 96%|█████████▋| 1097705/1139171 [1:25:17<03:36, 191.81it/s][2020-09-19 21:35:41] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3550000, processed 37937925 words, keeping 74 word types
[2020-09-19 21:35:41] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3560000, processed 38047617 words, keeping 74 word types
 96%|█████████▌| 1095249/1139171 [1:25:19<03:51, 189.76it/s][2020-09-19 21:35:41] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3570000, processed 38154024 words, keeping 74 word types
 97%|█████████▋| 1100148/1139171 [1:25:20<03:24, 190.38it/s][2020-09-19 21:35:41] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3580000, processed 38261841 words, keeping 74 word types
[2020-09-19 21:35:41] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3590000, processed 38358686 words, keeping 74 word types
[2020-09-1

[2020-09-19 21:35:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4030000, processed 43018852 words, keeping 74 word types
[2020-09-19 21:35:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4040000, processed 43129902 words, keeping 74 word types
 96%|█████████▋| 1097894/1139171 [1:25:18<03:00, 228.63it/s][2020-09-19 21:35:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4050000, processed 43232237 words, keeping 74 word types
[2020-09-19 21:35:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4060000, processed 43339822 words, keeping 74 word types
 97%|█████████▋| 1100317/1139171 [1:25:21<03:15, 198.48it/s][2020-09-19 21:35:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4070000, processed 43451324 words, keeping 74 word types
[2020-09-19 21:35:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4080000, processed 43557540 words, keeping 74 word types
 96%|█████████▌| 1095420/1139171 [1:25:20<03:56, 184.95it/s][2020-09-1

 96%|█████████▌| 1095563/1139171 [1:25:21<03:48, 190.81it/s][2020-09-19 21:35:43] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4530000, processed 48323313 words, keeping 75 word types
[2020-09-19 21:35:43] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4540000, processed 48442492 words, keeping 75 word types
 96%|█████████▋| 1098095/1139171 [1:25:19<03:00, 227.55it/s][2020-09-19 21:35:43] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4550000, processed 48550189 words, keeping 75 word types
[2020-09-19 21:35:43] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4560000, processed 48652327 words, keeping 75 word types
[2020-09-19 21:35:43] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4570000, processed 48762451 words, keeping 75 word types
[2020-09-19 21:35:43] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4580000, processed 48879638 words, keeping 75 word types
 96%|█████████▌| 1095583/1139171 [1:25:21<03:47, 191.28it/s][2020-09-1

[2020-09-19 21:35:44] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5030000, processed 53783045 words, keeping 75 word types
[2020-09-19 21:35:44] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 26.09% examples, 358697 words/s, in_qsize 10, out_qsize 2
[2020-09-19 21:35:44] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5040000, processed 53891920 words, keeping 75 word types
[2020-09-19 21:35:44] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5050000, processed 53995036 words, keeping 75 word types
 96%|█████████▌| 1095774/1139171 [1:25:22<03:38, 198.99it/s][2020-09-19 21:35:44] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5060000, processed 54101899 words, keeping 75 word types
 97%|█████████▋| 1100712/1139171 [1:25:22<02:50, 226.21it/s][2020-09-19 21:35:44] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5070000, processed 54228840 words, keeping 75 word types
[2020-09-19 21:35:44] - word2vec.py[line:1384] - INFO: PROGRESS: 

 96%|█████████▌| 1095958/1139171 [1:25:22<03:14, 222.41it/s][2020-09-19 21:35:45] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5530000, processed 59179202 words, keeping 75 word types
[2020-09-19 21:35:45] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5540000, processed 59283121 words, keeping 75 word types
 96%|█████████▋| 1098521/1139171 [1:25:21<03:31, 192.28it/s][2020-09-19 21:35:45] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5550000, processed 59385710 words, keeping 75 word types
[2020-09-19 21:35:45] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5560000, processed 59495679 words, keeping 75 word types
 97%|█████████▋| 1100905/1139171 [1:25:23<03:18, 192.97it/s][2020-09-19 21:35:45] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5570000, processed 59599355 words, keeping 75 word types
 96%|█████████▌| 1095982/1139171 [1:25:23<03:10, 226.98it/s][2020-09-19 21:35:45] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5580000

 97%|█████████▋| 1101077/1139171 [1:25:24<02:56, 216.28it/s][2020-09-19 21:35:46] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6020000, processed 64445164 words, keeping 75 word types
[2020-09-19 21:35:46] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6030000, processed 64552896 words, keeping 75 word types
 96%|█████████▋| 1098678/1139171 [1:25:22<03:55, 171.62it/s][2020-09-19 21:35:46] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6040000, processed 64666036 words, keeping 75 word types
[2020-09-19 21:35:46] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6050000, processed 64775207 words, keeping 75 word types
[2020-09-19 21:35:46] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6060000, processed 64883564 words, keeping 75 word types
[2020-09-19 21:35:46] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6070000, processed 64985275 words, keeping 75 word types
 96%|█████████▌| 1096164/1139171 [1:25:23<03:39, 195.85it/s][2020-09-1

 96%|█████████▌| 1096325/1139171 [1:25:24<03:15, 219.64it/s][2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6530000, processed 69890947 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6540000, processed 70005981 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6550000, processed 70111236 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6560000, processed 70207482 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6570000, processed 70310373 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6580000, processed 70415645 words, keeping 75 word types
 96%|█████████▌| 1096348/1139171 [1:25:24<03:15, 219.23it/s][2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at se

[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7050000, processed 75422846 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7060000, processed 75531110 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7070000, processed 75648482 words, keeping 75 word types
 97%|█████████▋| 1101390/1139171 [1:25:26<03:24, 184.87it/s][2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7080000, processed 75753000 words, keeping 75 word types
 96%|█████████▋| 1096505/1139171 [1:25:25<03:21, 211.93it/s][2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7090000, processed 75855345 words, keeping 75 word types
[2020-09-19 21:35:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7100000, processed 75988112 words, keeping 75 word types
[2020-09-19 21:35:47] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - P

[2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7540000, processed 80762160 words, keeping 75 word types
[2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7550000, processed 80861051 words, keeping 75 word types
 96%|█████████▋| 1099207/1139171 [1:25:24<03:17, 201.96it/s][2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7560000, processed 80969727 words, keeping 75 word types
[2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7570000, processed 81074029 words, keeping 75 word types
 96%|█████████▋| 1096701/1139171 [1:25:26<03:20, 211.73it/s][2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7580000, processed 81177308 words, keeping 75 word types
[2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7590000, processed 81275807 words, keeping 75 word types
[2020-09-19 21:35:48] - word2vec.py[line:1384] - INFO: PROGRESS: at se

[2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8060000, processed 86260222 words, keeping 75 word types
[2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8070000, processed 86377331 words, keeping 75 word types
[2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8080000, processed 86474275 words, keeping 75 word types
[2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8090000, processed 86583171 words, keeping 75 word types
[2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8100000, processed 86681416 words, keeping 75 word types
 97%|█████████▋| 1101739/1139171 [1:25:28<03:01, 206.37it/s][2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8110000, processed 86784547 words, keeping 75 word types
[2020-09-19 21:35:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8120000, processed 86896459 words, keeping 75 word t

 96%|█████████▋| 1097071/1139171 [1:25:28<03:00, 232.81it/s][2020-09-19 21:35:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8580000, processed 91812428 words, keeping 75 word types
[2020-09-19 21:35:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8590000, processed 91917329 words, keeping 75 word types
 97%|█████████▋| 1101932/1139171 [1:25:29<02:48, 221.53it/s][2020-09-19 21:35:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8600000, processed 92051558 words, keeping 75 word types
[2020-09-19 21:35:50] - word2vec.py[line:1407] - INFO: collected 75 word types from a corpus of 92062788 raw words and 8601298 sentences
[2020-09-19 21:35:50] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-19 21:35:50] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 75 unique words (100% of original 75, drops 0)
[2020-09-19 21:35:50] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 92062788 word corpus (100% of original 9

 96%|█████████▋| 1099191/1139171 [1:25:38<03:24, 195.77it/s][2020-09-19 21:36:00] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 26.56% examples, 554221 words/s, in_qsize 8, out_qsize 1
 97%|█████████▋| 1104031/1139171 [1:25:39<03:11, 183.97it/s][2020-09-19 21:36:01] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 73.46% examples, 350847 words/s, in_qsize 9, out_qsize 0
 97%|█████████▋| 1099405/1139171 [1:25:39<02:53, 229.70it/s][2020-09-19 21:36:01] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 5.97% examples, 210976 words/s, in_qsize 8, out_qsize 1
 97%|█████████▋| 1101948/1139171 [1:25:37<03:03, 203.12it/s][2020-09-19 21:36:01] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 29.20% examples, 553469 words/s, in_qsize 8, out_qsize 1
 97%|█████████▋| 1104232/1139171 [1:25:40<02:50, 205.26it/s][2020-09-19 21:36:02] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 76.34% examples, 350683 words/s, in_qsize 9, out_qsize 0
 97%|█

 97%|█████████▋| 1104315/1139171 [1:25:49<02:32, 227.86it/s][2020-09-19 21:36:13] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 60.96% examples, 552803 words/s, in_qsize 9, out_qsize 0
 97%|█████████▋| 1104339/1139171 [1:25:49<02:41, 215.46it/s][2020-09-19 21:36:13] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 8.37% examples, 341631 words/s, in_qsize 8, out_qsize 1
 97%|█████████▋| 1102046/1139171 [1:25:52<02:51, 216.90it/s][2020-09-19 21:36:14] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 44.12% examples, 211836 words/s, in_qsize 7, out_qsize 2
[2020-09-19 21:36:14] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 63.62% examples, 552895 words/s, in_qsize 9, out_qsize 0
 97%|█████████▋| 1102070/1139171 [1:25:52<02:48, 219.97it/s][2020-09-19 21:36:14] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 11.20% examples, 344683 words/s, in_qsize 7, out_qsize 2
 97%|█████████▋| 1102221/1139171 [1:25:53<03:43, 165.16it/s][2020-

 97%|█████████▋| 1109586/1139171 [1:26:06<02:33, 193.13it/s][2020-09-19 21:36:27] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 47.93% examples, 348746 words/s, in_qsize 7, out_qsize 2
 97%|█████████▋| 1107355/1139171 [1:26:04<03:02, 174.20it/s][2020-09-19 21:36:28] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 4 more threads
[2020-09-19 21:36:28] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 3 more threads
[2020-09-19 21:36:28] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 2 more threads
[2020-09-19 21:36:28] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 1 more threads
[2020-09-19 21:36:28] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 0 more threads
[2020-09-19 21:36:28] - base_any2vec.py[line:1332] - INFO: EPOCH - 1 : training on 92062788 raw words (20977075 effective words) took 37.9s, 553339 effective wor

 97%|█████████▋| 1107299/1139171 [1:26:18<03:03, 173.83it/s][2020-09-19 21:36:40] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 20.70% examples, 213525 words/s, in_qsize 8, out_qsize 1
 97%|█████████▋| 1109932/1139171 [1:26:16<02:10, 224.83it/s][2020-09-19 21:36:40] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 31.36% examples, 542697 words/s, in_qsize 7, out_qsize 2
 98%|█████████▊| 1112322/1139171 [1:26:19<02:17, 194.90it/s][2020-09-19 21:36:40] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 84.58% examples, 349654 words/s, in_qsize 9, out_qsize 0
 97%|█████████▋| 1107532/1139171 [1:26:19<02:11, 240.38it/s][2020-09-19 21:36:41] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 23.70% examples, 213880 words/s, in_qsize 8, out_qsize 1
 98%|█████████▊| 1112461/1139171 [1:26:20<02:14, 198.98it/s][2020-09-19 21:36:41] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 34.01% examples, 543832 words/s, in_qsize 9, out_qsize 0
 97%|

 98%|█████████▊| 1112864/1139171 [1:26:29<01:41, 258.02it/s][2020-09-19 21:36:53] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 19.55% examples, 345885 words/s, in_qsize 7, out_qsize 2
 98%|█████████▊| 1114779/1139171 [1:26:32<02:04, 196.01it/s][2020-09-19 21:36:53] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 59.92% examples, 216480 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:36:53] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 65.68% examples, 546915 words/s, in_qsize 8, out_qsize 1
 98%|█████████▊| 1114910/1139171 [1:26:32<02:11, 184.82it/s][2020-09-19 21:36:54] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 22.38% examples, 345722 words/s, in_qsize 9, out_qsize 0
 97%|█████████▋| 1110332/1139171 [1:26:32<02:38, 182.22it/s][2020-09-19 21:36:54] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 62.85% examples, 216192 words/s, in_qsize 9, out_qsize 0
 98%|█████████▊| 1114968/1139171 [1:26:33<02:15, 178.63it/s][202

 98%|█████████▊| 1115524/1139171 [1:26:42<02:04, 190.25it/s][2020-09-19 21:37:06] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 4 more threads
[2020-09-19 21:37:06] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 3 more threads
[2020-09-19 21:37:06] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 2 more threads
[2020-09-19 21:37:06] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 1 more threads
[2020-09-19 21:37:06] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 0 more threads
[2020-09-19 21:37:06] - base_any2vec.py[line:1332] - INFO: EPOCH - 2 : training on 92062788 raw words (20970370 effective words) took 38.2s, 548689 effective words/s
 98%|█████████▊| 1117589/1139171 [1:26:45<01:56, 184.60it/s][2020-09-19 21:37:07] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 4 more threads
[2020-09-19 21:37:

 98%|█████████▊| 1119957/1139171 [1:26:57<01:23, 230.02it/s][2020-09-19 21:37:18] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 89.61% examples, 347320 words/s, in_qsize 9, out_qsize 0
 98%|█████████▊| 1120035/1139171 [1:26:57<01:19, 240.58it/s][2020-09-19 21:37:18] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 31.29% examples, 540796 words/s, in_qsize 9, out_qsize 0
 98%|█████████▊| 1115353/1139171 [1:26:56<02:10, 182.40it/s][2020-09-19 21:37:19] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 35.42% examples, 213179 words/s, in_qsize 9, out_qsize 0
 98%|█████████▊| 1118088/1139171 [1:26:55<01:51, 189.91it/s][2020-09-19 21:37:19] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 92.52% examples, 347770 words/s, in_qsize 7, out_qsize 2
 98%|█████████▊| 1115472/1139171 [1:26:57<02:11, 180.08it/s][2020-09-19 21:37:19] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 33.84% examples, 541125 words/s, in_qsize 9, out_qsize 0
 98%|

 98%|█████████▊| 1120473/1139171 [1:27:07<01:25, 218.09it/s][2020-09-19 21:37:31] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 25.11% examples, 345360 words/s, in_qsize 9, out_qsize 0
 98%|█████████▊| 1117827/1139171 [1:27:08<01:49, 194.12it/s][2020-09-19 21:37:31] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 71.06% examples, 213940 words/s, in_qsize 8, out_qsize 1
 98%|█████████▊| 1120614/1139171 [1:27:07<01:38, 187.87it/s][2020-09-19 21:37:31] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 65.34% examples, 544646 words/s, in_qsize 8, out_qsize 1
 98%|█████████▊| 1120658/1139171 [1:27:07<01:37, 189.35it/s][2020-09-19 21:37:32] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 27.97% examples, 346725 words/s, in_qsize 7, out_qsize 2
 98%|█████████▊| 1120683/1139171 [1:27:08<01:30, 204.09it/s][2020-09-19 21:37:32] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 74.01% examples, 213870 words/s, in_qsize 9, out_qsize 0
 99%|

 98%|█████████▊| 1120659/1139171 [1:27:21<01:37, 189.91it/s][2020-09-19 21:37:44] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 61.87% examples, 348527 words/s, in_qsize 6, out_qsize 3
 98%|█████████▊| 1120806/1139171 [1:27:22<01:36, 191.23it/s][2020-09-19 21:37:44] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 11.94% examples, 214819 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:37:44] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 99.66% examples, 546896 words/s, in_qsize 8, out_qsize 1
 98%|█████████▊| 1120831/1139171 [1:27:22<01:29, 204.91it/s][2020-09-19 21:37:45] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 4 more threads
[2020-09-19 21:37:45] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 3 more threads
[2020-09-19 21:37:45] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 2 more threads
[2020-09-19 21:37:45] - base_any2vec.py[line:348] - INF

 99%|█████████▊| 1123297/1139171 [1:27:34<01:22, 192.02it/s][2020-09-19 21:37:57] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 31.62% examples, 548489 words/s, in_qsize 10, out_qsize 0
 99%|█████████▉| 1125816/1139171 [1:27:33<01:03, 211.29it/s][2020-09-19 21:37:57] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 98.47% examples, 349370 words/s, in_qsize 9, out_qsize 0
 99%|█████████▉| 1127831/1139171 [1:27:36<00:58, 193.77it/s][2020-09-19 21:37:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 4 more threads
[2020-09-19 21:37:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 3 more threads
[2020-09-19 21:37:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 2 more threads
[2020-09-19 21:37:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 1 more threads
[2020-09-19 21:37:57] - base_any2vec.py[line:348] - INFO: worker thread finis

 99%|█████████▉| 1128467/1139171 [1:27:45<00:54, 196.52it/s][2020-09-19 21:38:09] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 33.68% examples, 347591 words/s, in_qsize 10, out_qsize 2
 99%|█████████▉| 1130521/1139171 [1:27:48<00:36, 234.68it/s][2020-09-19 21:38:10] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 86.12% examples, 214863 words/s, in_qsize 8, out_qsize 1
 99%|█████████▉| 1126033/1139171 [1:27:47<00:55, 234.78it/s][2020-09-19 21:38:10] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 66.09% examples, 551378 words/s, in_qsize 8, out_qsize 1
 99%|█████████▉| 1128684/1139171 [1:27:46<00:43, 238.78it/s][2020-09-19 21:38:10] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 36.59% examples, 348276 words/s, in_qsize 7, out_qsize 2
 99%|█████████▉| 1128733/1139171 [1:27:46<00:45, 228.03it/s][2020-09-19 21:38:11] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 89.13% examples, 214883 words/s, in_qsize 9, out_qsize 0
 99%

 99%|█████████▉| 1131391/1139171 [1:27:59<00:34, 223.77it/s][2020-09-19 21:38:23] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 73.53% examples, 351008 words/s, in_qsize 8, out_qsize 1
 99%|█████████▉| 1128962/1139171 [1:28:01<00:55, 184.10it/s][2020-09-19 21:38:24] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 2.65% examples, 539541 words/s, in_qsize 9, out_qsize 0
100%|█████████▉| 1133640/1139171 [1:28:03<00:29, 189.98it/s][2020-09-19 21:38:24] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 76.40% examples, 350904 words/s, in_qsize 8, out_qsize 1
100%|█████████▉| 1133662/1139171 [1:28:03<00:28, 195.80it/s][2020-09-19 21:38:25] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 5.29% examples, 540084 words/s, in_qsize 9, out_qsize 0
 99%|█████████▉| 1131822/1139171 [1:28:01<00:32, 227.65it/s][2020-09-19 21:38:25] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 79.31% examples, 350854 words/s, in_qsize 8, out_qsize 1
 99%|██

100%|██████████| 1139171/1139171 [1:28:29<00:00, 214.56it/s]
100%|█████████▉| 1137079/1139171 [1:28:26<00:10, 192.26it/s][2020-09-19 21:38:51] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 74.87% examples, 557837 words/s, in_qsize 9, out_qsize 0
100%|█████████▉| 1135029/1139171 [1:28:29<00:20, 204.35it/s][2020-09-19 21:38:52] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 77.54% examples, 557509 words/s, in_qsize 6, out_qsize 3
100%|█████████▉| 1137481/1139171 [1:28:28<00:07, 230.75it/s][2020-09-19 21:38:53] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 80.29% examples, 557915 words/s, in_qsize 9, out_qsize 2
100%|█████████▉| 1137701/1139171 [1:28:30<00:08, 181.40it/s][2020-09-19 21:38:54] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 83.04% examples, 558859 words/s, in_qsize 9, out_qsize 0
100%|█████████▉| 1135669/1139171 [1:28:32<00:14, 242.68it/s][2020-09-19 21:38:55] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 8

creat_type_cd done!


100%|█████████▉| 1138752/1139171 [1:28:46<00:01, 214.77it/s]

w2v start!


[2020-09-19 21:39:09] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #10000, processed 105381 words, keeping 3 word types
[2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #20000, processed 202690 words, keeping 3 word types
[2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #30000, processed 292708 words, keeping 3 word types
100%|█████████▉| 1138775/1139171 [1:28:47<00:02, 195.16it/s][2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #40000, processed 396895 words, keeping 3 word types
[2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #50000, processed 491839 words, keeping 3 word types
[2020-09-19 21:39:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence

[2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #590000, processed 5586995 words, keeping 3 word types
[2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #600000, processed 5694194 words, keeping 3 word types
100%|█████████▉| 1138947/1139171 [1:28:47<00:01, 203.43it/s][2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #610000, processed 5795005 words, keeping 3 word types
[2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #620000, processed 5897821 words, keeping 3 word types
[2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #630000, processed 5995358 words, keeping 3 word types
[2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #640000, processed 6087760 words, keeping 3 word types
[2020-09-19 21:39:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #650000, processed 6194494 words, keeping 3 word types
[2020-09-19 21:3

[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1180000, processed 11413035 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1190000, processed 11512748 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1200000, processed 11605665 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1210000, processed 11696693 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1220000, processed 11784561 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1230000, processed 11875830 words, keeping 3 word types
100%|█████████▉| 1139107/1139171 [1:28:48<00:00, 162.00it/s][2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1240000, processed 11968977 words, keeping 3 word types
[2

[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1770000, processed 17077591 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1780000, processed 17177244 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1790000, processed 17274518 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1800000, processed 17371184 words, keeping 3 word types
[2020-09-19 21:39:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1810000, processed 17465996 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1820000, processed 17562370 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1830000, processed 17651623 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2390000, processed 23125149 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2400000, processed 23215863 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2410000, processed 23323738 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2420000, processed 23419669 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2430000, processed 23511710 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2440000, processed 23606967 words, keeping 3 word types
[2020-09-19 21:39:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2450000, processed 23708249 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3010000, processed 29169531 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3020000, processed 29268396 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3030000, processed 29360789 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3040000, processed 29457863 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3050000, processed 29553422 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3060000, processed 29649503 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3070000, processed 29735129 words, keeping 3 word types
[2020-09-19 21:39:13] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3630000, processed 35151660 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3640000, processed 35249457 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3650000, processed 35348466 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3660000, processed 35442613 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3670000, processed 35550050 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3680000, processed 35640908 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3690000, processed 35740050 words, keeping 3 word types
[2020-09-19 21:39:14] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4250000, processed 41084806 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4260000, processed 41193844 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4270000, processed 41291771 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4280000, processed 41403949 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4290000, processed 41510254 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4300000, processed 41603163 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4310000, processed 41695605 words, keeping 3 word types
[2020-09-19 21:39:15] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4870000, processed 47159173 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4880000, processed 47258528 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4890000, processed 47374912 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4900000, processed 47480589 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4910000, processed 47576816 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4920000, processed 47668152 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4930000, processed 47760479 words, keeping 3 word types
[2020-09-19 21:39:16] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5490000, processed 53259740 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5500000, processed 53357366 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5510000, processed 53463747 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5520000, processed 53564670 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5530000, processed 53649202 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5540000, processed 53743121 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5550000, processed 53835710 words, keeping 3 word types
[2020-09-19 21:39:17] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6110000, processed 59322177 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6120000, processed 59425412 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6130000, processed 59525270 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6140000, processed 59630300 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6150000, processed 59734604 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6160000, processed 59832527 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6170000, processed 59936064 words, keeping 3 word types
[2020-09-19 21:39:18] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6730000, processed 65247022 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6740000, processed 65339118 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6750000, processed 65425787 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6760000, processed 65514121 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6770000, processed 65609153 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6780000, processed 65709761 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6790000, processed 65821256 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7350000, processed 71367506 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7360000, processed 71462764 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7370000, processed 71555803 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7380000, processed 71658970 words, keeping 3 word types
[2020-09-19 21:39:19] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7390000, processed 71758148 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7400000, processed 71858561 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7410000, processed 71958601 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7970000, processed 77380448 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7980000, processed 77465129 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7990000, processed 77539365 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8000000, processed 77647117 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8010000, processed 77740257 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8020000, processed 77835276 words, keeping 3 word types
[2020-09-19 21:39:20] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8030000, processed 77918196 words, keeping 3 word types
[2020-09-19 21:39:21] - word2vec.py[line:1384] - INFO: PROGRES

[2020-09-19 21:39:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8590000, processed 83327329 words, keeping 3 word types
[2020-09-19 21:39:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8600000, processed 83451558 words, keeping 3 word types
[2020-09-19 21:39:21] - word2vec.py[line:1407] - INFO: collected 3 word types from a corpus of 83461490 raw words and 8601298 sentences
[2020-09-19 21:39:21] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-19 21:39:21] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 3 unique words (100% of original 3, drops 0)
[2020-09-19 21:39:21] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 83461490 word corpus (100% of original 83461490, drops 0)
[2020-09-19 21:39:21] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 3 items
[2020-09-19 21:39:21] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 3 most-common words
[2020-09-19 21:39:21] - word2vec.py[line:

tags done!


[2020-09-19 21:39:27] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 20.44% examples, 141758 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:39:28] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 23.57% examples, 140167 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:39:29] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 26.61% examples, 138656 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:39:30] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 29.69% examples, 137218 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:39:31] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 32.68% examples, 136249 words/s, in_qsize 7, out_qsize 2
[2020-09-19 21:39:32] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 35.73% examples, 135270 words/s, in_qsize 9, out_qsize 4
[2020-09-19 21:39:33] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 38.60% examples, 134041 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:39:34] - bas

w2v start!


[2020-09-19 21:39:49] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #10000, processed 115381 words, keeping 1713 word types
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #20000, processed 222690 words, keeping 2272 word types
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #30000, processed 322708 words, keeping 2566 word types
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #40000, processed 436895 words, keeping 2817 word types
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #50000, processed 541839 words, keeping 2931 word types
[2020-09-19 21:39:49] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #60000, processed 641207 words, keeping 3052

[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #600000, processed 6294194 words, keeping 4298 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #610000, processed 6405005 words, keeping 4310 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #620000, processed 6517821 words, keeping 4316 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #630000, processed 6625358 words, keeping 4319 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #640000, processed 6727760 words, keeping 4328 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #650000, processed 6844494 words, keeping 4340 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #660000, processed 6952074 words, keeping 4345 word types
[2020-09-19 21:39:50] - word2vec.py[line:1384] - INFO: 

[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1200000, processed 12805665 words, keeping 4615 word types
[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1210000, processed 12906693 words, keeping 4618 word types
[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1220000, processed 13004561 words, keeping 4620 word types
[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1230000, processed 13105830 words, keeping 4623 word types
[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1240000, processed 13208977 words, keeping 4627 word types
[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1250000, processed 13322515 words, keeping 4628 word types
[2020-09-19 21:39:51] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1260000, processed 13436829 words, keeping 4631 word types
[2020-09-19 21:39:51] - word2vec.py[line:

[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1790000, processed 19064518 words, keeping 4794 word types
[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1800000, processed 19171184 words, keeping 4794 word types
[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1810000, processed 19275996 words, keeping 4796 word types
[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1820000, processed 19382370 words, keeping 4799 word types
[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1830000, processed 19481623 words, keeping 4801 word types
[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1840000, processed 19579200 words, keeping 4804 word types
[2020-09-19 21:39:52] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1850000, processed 19686001 words, keeping 4808 word types
[2020-09-19 21:39:52] - word2vec.py[line:

spread_app_id done!


[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2250000, processed 23993972 words, keeping 4881 word types
[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2260000, processed 24108934 words, keeping 4882 word types
[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2270000, processed 24228464 words, keeping 4884 word types
[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2280000, processed 24330363 words, keeping 4887 word types
[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2290000, processed 24443457 words, keeping 4890 word types
[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2300000, processed 24548313 words, keeping 4892 word types
[2020-09-19 21:39:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2310000, processed 24658500 words, keeping 4892 word types
[2020-09-19 21:39:53] - word2vec.py[line:

[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2790000, processed 29818722 words, keeping 4961 word types
[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2800000, processed 29928703 words, keeping 4962 word types
[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2810000, processed 30033010 words, keeping 4963 word types
[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2820000, processed 30147966 words, keeping 4963 word types
[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2830000, processed 30259209 words, keeping 4963 word types
[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2840000, processed 30355239 words, keeping 4964 word types
[2020-09-19 21:39:54] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2850000, processed 30470882 words, keeping 4965 word types
[2020-09-19 21:39:54] - word2vec.py[line:

[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3380000, processed 36119374 words, keeping 5019 word types
[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3390000, processed 36227386 words, keeping 5021 word types
[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3400000, processed 36334215 words, keeping 5024 word types
[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3410000, processed 36440106 words, keeping 5027 word types
[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3420000, processed 36553863 words, keeping 5029 word types
[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3430000, processed 36676744 words, keeping 5030 word types
[2020-09-19 21:39:55] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3440000, processed 36783395 words, keeping 5030 word types
[2020-09-19 21:39:55] - word2vec.py[line:

[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3970000, processed 42384655 words, keeping 5081 word types
[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3980000, processed 42490051 words, keeping 5082 word types
[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3990000, processed 42596673 words, keeping 5083 word types
[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4000000, processed 42701463 words, keeping 5085 word types
[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4010000, processed 42814411 words, keeping 5085 word types
[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4020000, processed 42915063 words, keeping 5085 word types
[2020-09-19 21:39:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4030000, processed 43018852 words, keeping 5085 word types
[2020-09-19 21:39:56] - word2vec.py[line:

[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4560000, processed 48652327 words, keeping 5132 word types
[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4570000, processed 48762451 words, keeping 5132 word types
[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4580000, processed 48879638 words, keeping 5133 word types
[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4590000, processed 48997815 words, keeping 5133 word types
[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4600000, processed 49106240 words, keeping 5134 word types
[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4610000, processed 49217394 words, keeping 5135 word types
[2020-09-19 21:39:57] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4620000, processed 49314044 words, keeping 5135 word types
[2020-09-19 21:39:57] - word2vec.py[line:

[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5150000, processed 55127858 words, keeping 5171 word types
[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5160000, processed 55229857 words, keeping 5172 word types
[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5170000, processed 55337807 words, keeping 5172 word types
[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5180000, processed 55435595 words, keeping 5173 word types
[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5190000, processed 55548529 words, keeping 5173 word types
[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5200000, processed 55654966 words, keeping 5174 word types
[2020-09-19 21:39:58] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5210000, processed 55769265 words, keeping 5174 word types
[2020-09-19 21:39:58] - word2vec.py[line:

[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5740000, processed 61440170 words, keeping 5213 word types
[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5750000, processed 61535545 words, keeping 5213 word types
[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5760000, processed 61638883 words, keeping 5214 word types
[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5770000, processed 61749809 words, keeping 5215 word types


w2v start!


[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5780000, processed 61872829 words, keeping 5215 word types
[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5790000, processed 61977243 words, keeping 5216 word types
[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5800000, processed 62087795 words, keeping 5217 word types
[2020-09-19 21:39:59] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5810000, processed 62197748 words, keeping 5217 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5820000, processed 62302906 words, keeping 5217 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5830000, processed 62399929 words, keeping 5217 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5840000, processed 62505585 words, keeping 5217 word types
[2020-09-19 21:40:00] - word2vec.py[line:

[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6080000, processed 65094365 words, keeping 5229 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #290000, processed 3048037 words, keeping 3496 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6090000, processed 65199600 words, keeping 5229 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #300000, processed 3149852 words, keeping 3508 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6100000, processed 65326562 words, keeping 5229 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #310000, processed 3241969 words, keeping 3524 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6110000, processed 65432177 words, keeping 5229 word types
[2020-09-19 21:40:00] - word2vec.py[line:1384] 

[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #600000, processed 6294194 words, keeping 3741 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6380000, processed 68312159 words, keeping 5238 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #610000, processed 6405005 words, keeping 3749 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6390000, processed 68414666 words, keeping 5238 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #620000, processed 6517821 words, keeping 3754 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6400000, processed 68519235 words, keeping 5238 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #630000, processed 6625358 words, keeping 3756 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - 

[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6680000, processed 71461470 words, keeping 5253 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #900000, processed 9502464 words, keeping 3870 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6690000, processed 71563411 words, keeping 5253 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #910000, processed 9608073 words, keeping 3873 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6700000, processed 71675821 words, keeping 5253 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #920000, processed 9723209 words, keeping 3878 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #930000, processed 9831099 words, keeping 3881 word types
[2020-09-19 21:40:01] - word2vec.py[line:1384] - 

[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6970000, processed 74557066 words, keeping 5265 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1210000, processed 12906693 words, keeping 3944 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6980000, processed 74664660 words, keeping 5265 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1220000, processed 13004561 words, keeping 3945 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6990000, processed 74762848 words, keeping 5265 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1230000, processed 13105830 words, keeping 3947 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7000000, processed 74866760 words, keeping 5265 word types
[2020-09-19 21:40:02] - word2vec.py[line:

[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1500000, processed 15974779 words, keeping 4000 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7270000, processed 77856364 words, keeping 5279 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1510000, processed 16071928 words, keeping 4001 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7280000, processed 77966121 words, keeping 5280 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1520000, processed 16178505 words, keeping 4003 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7290000, processed 78084435 words, keeping 5280 word types
[2020-09-19 21:40:02] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1530000, processed 16283476 words, keeping 4004 word types
[2020-09-19 21:40:02] - word2vec.py[line:

[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1790000, processed 19064518 words, keeping 4049 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7580000, processed 81177308 words, keeping 5293 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1800000, processed 19171184 words, keeping 4049 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7590000, processed 81275807 words, keeping 5294 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1810000, processed 19275996 words, keeping 4049 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7600000, processed 81374347 words, keeping 5294 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #1820000, processed 19382370 words, keeping 4050 word types
[2020-09-19 21:40:03] - word2vec.py[line:

[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2090000, processed 22302410 words, keeping 4093 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7870000, processed 84283034 words, keeping 5302 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2100000, processed 22409690 words, keeping 4095 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7880000, processed 84398105 words, keeping 5302 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2110000, processed 22511849 words, keeping 4095 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7890000, processed 84501077 words, keeping 5302 word types
[2020-09-19 21:40:03] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2120000, processed 22623159 words, keeping 4097 word types
[2020-09-19 21:40:03] - word2vec.py[line:

[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2400000, processed 25615863 words, keeping 4130 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8160000, processed 87327702 words, keeping 5310 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2410000, processed 25733738 words, keeping 4130 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8170000, processed 87439263 words, keeping 5311 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8180000, processed 87541749 words, keeping 5312 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2420000, processed 25839669 words, keeping 4131 word types
[2020-09-19 21:40:04] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 32.47% examples, 135586 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:04] - word2vec.py[lin

[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2690000, processed 28700982 words, keeping 4151 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8460000, processed 90555650 words, keeping 5319 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2700000, processed 28820938 words, keeping 4151 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8470000, processed 90658475 words, keeping 5319 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2710000, processed 28924613 words, keeping 4152 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8480000, processed 90768698 words, keeping 5319 word types
[2020-09-19 21:40:04] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #2720000, processed 29042515 words, keeping 4153 word types
[2020-09-19 21:40:04] - word2vec.py[line:

[2020-09-19 21:40:05] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 76140333 word corpus (82.7% of prior 92062788)
[2020-09-19 21:40:05] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3080000, processed 32911269 words, keeping 4172 word types
[2020-09-19 21:40:05] - base_any2vec.py[line:1008] - INFO: estimated required memory for 5322 words and 32 dimensions: 4023432 bytes
[2020-09-19 21:40:05] - word2vec.py[line:1699] - INFO: resetting layer weights
[2020-09-19 21:40:05] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3090000, processed 33016163 words, keeping 4172 word types
[2020-09-19 21:40:05] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3100000, processed 33122139 words, keeping 4172 word types
[2020-09-19 21:40:05] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3110000, processed 33216865 words, keeping 4172 word types
[2020-09-19 21:40:05] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3120000, processed 33326189 wo

[2020-09-19 21:40:06] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3650000, processed 38998466 words, keeping 4201 word types
[2020-09-19 21:40:06] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3660000, processed 39102613 words, keeping 4202 word types
[2020-09-19 21:40:06] - base_any2vec.py[line:1196] - INFO: training model with 5 workers on 5322 vocabulary and 32 features, using sg=0 hs=0 sample=0.001 negative=5 window=5
[2020-09-19 21:40:06] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3670000, processed 39220050 words, keeping 4204 word types
[2020-09-19 21:40:06] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3680000, processed 39320908 words, keeping 4204 word types
[2020-09-19 21:40:06] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3690000, processed 39430050 words, keeping 4204 word types
[2020-09-19 21:40:06] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #3700000, processed 39521597 words, keeping 4204 word types
[202

[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4220000, processed 45027207 words, keeping 4230 word types
[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4230000, processed 45125600 words, keeping 4231 word types
[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4240000, processed 45225944 words, keeping 4231 word types
[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4250000, processed 45334806 words, keeping 4232 word types
[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4260000, processed 45453844 words, keeping 4233 word types
[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4270000, processed 45561771 words, keeping 4233 word types
[2020-09-19 21:40:07] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4280000, processed 45683949 words, keeping 4233 word types
[2020-09-19 21:40:07] - word2vec.py[line:

[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4800000, processed 51289476 words, keeping 4257 word types
[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4810000, processed 51393966 words, keeping 4257 word types
[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4820000, processed 51494390 words, keeping 4259 word types
[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4830000, processed 51606106 words, keeping 4259 word types
[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4840000, processed 51707792 words, keeping 4259 word types
[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4850000, processed 51808616 words, keeping 4259 word types
[2020-09-19 21:40:08] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #4860000, processed 51916349 words, keeping 4259 word types
[2020-09-19 21:40:08] - word2vec.py[line:

[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5380000, processed 57547801 words, keeping 4275 word types
[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5390000, processed 57664224 words, keeping 4275 word types
[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5400000, processed 57766964 words, keeping 4275 word types
[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5410000, processed 57888856 words, keeping 4278 word types
[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5420000, processed 58003026 words, keeping 4278 word types
[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5430000, processed 58120431 words, keeping 4278 word types
[2020-09-19 21:40:09] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5440000, processed 58214528 words, keeping 4278 word types
[2020-09-19 21:40:09] - word2vec.py[line:

[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5960000, processed 63822602 words, keeping 4297 word types
[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5970000, processed 63930107 words, keeping 4297 word types
[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5980000, processed 64029244 words, keeping 4298 word types
[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #5990000, processed 64127947 words, keeping 4298 word types
[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6000000, processed 64238267 words, keeping 4298 word types
[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6010000, processed 64342080 words, keeping 4299 word types
[2020-09-19 21:40:10] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6020000, processed 64445164 words, keeping 4300 word types
[2020-09-19 21:40:10] - word2vec.py[line:

[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6540000, processed 70005981 words, keeping 4317 word types
[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6550000, processed 70111236 words, keeping 4317 word types
[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6560000, processed 70207482 words, keeping 4318 word types
[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6570000, processed 70310373 words, keeping 4318 word types
[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6580000, processed 70415645 words, keeping 4318 word types
[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6590000, processed 70524208 words, keeping 4318 word types
[2020-09-19 21:40:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #6600000, processed 70621601 words, keeping 4318 word types
[2020-09-19 21:40:11] - word2vec.py[line:

[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7120000, processed 76212871 words, keeping 4333 word types
[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7130000, processed 76314205 words, keeping 4333 word types
[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7140000, processed 76424836 words, keeping 4333 word types
[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7150000, processed 76533722 words, keeping 4333 word types
[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7160000, processed 76648221 words, keeping 4333 word types
[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7170000, processed 76762388 words, keeping 4333 word types
[2020-09-19 21:40:12] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7180000, processed 76871184 words, keeping 4334 word types
[2020-09-19 21:40:13] - word2vec.py[line:

[2020-09-19 21:40:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7700000, processed 82453784 words, keeping 4350 word types
[2020-09-19 21:40:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7710000, processed 82570428 words, keeping 4350 word types
[2020-09-19 21:40:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7720000, processed 82687657 words, keeping 4350 word types
[2020-09-19 21:40:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7730000, processed 82789826 words, keeping 4351 word types
[2020-09-19 21:40:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7740000, processed 82897070 words, keeping 4351 word types
[2020-09-19 21:40:13] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7750000, processed 83010350 words, keeping 4351 word types
[2020-09-19 21:40:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #7760000, processed 83124664 words, keeping 4351 word types
[2020-09-19 21:40:14] - word2vec.py[line:

[2020-09-19 21:40:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8280000, processed 88611073 words, keeping 4364 word types
[2020-09-19 21:40:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8290000, processed 88725314 words, keeping 4364 word types
[2020-09-19 21:40:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8300000, processed 88831390 words, keeping 4364 word types
[2020-09-19 21:40:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8310000, processed 88941186 words, keeping 4364 word types
[2020-09-19 21:40:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8320000, processed 89062029 words, keeping 4364 word types
[2020-09-19 21:40:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8330000, processed 89177944 words, keeping 4364 word types
[2020-09-19 21:40:15] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #8340000, processed 89278414 words, keeping 4364 word types
[2020-09-19 21:40:15] - word2vec.py[line:

[2020-09-19 21:40:21] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 9.98% examples, 1490297 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:22] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 89.40% examples, 133437 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:22] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 31.60% examples, 1491672 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:40:22] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 11.94% examples, 1494884 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:23] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 92.57% examples, 133414 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:23] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 33.55% examples, 1493259 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:23] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 13.92% examples, 1502325 words/s, in_qsize 7, out_qsize 2
[2020-09-19 21:40:24] 

[2020-09-19 21:40:39] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 46.07% examples, 1512545 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:40:40] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 67.21% examples, 1497438 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:40:40] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 48.91% examples, 135811 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:40] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 48.16% examples, 1515557 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:41] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 69.14% examples, 1497392 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:41] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 52.15% examples, 135698 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:41] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 50.20% examples, 1516245 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:42] 

[2020-09-19 21:40:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 2 more threads
[2020-09-19 21:40:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 1 more threads
[2020-09-19 21:40:57] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 0 more threads
[2020-09-19 21:40:57] - base_any2vec.py[line:1332] - INFO: EPOCH - 1 : training on 92062788 raw words (76143221 effective words) took 50.8s, 1499888 effective words/s
[2020-09-19 21:40:57] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 3.31% examples, 136298 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:40:58] - base_any2vec.py[line:1291] - INFO: EPOCH 1 - PROGRESS: at 82.25% examples, 1519130 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:58] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 1.81% examples, 1345696 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:40:58] - base_any2vec.py[line:1291] - INFO: EPOCH 4 -

[2020-09-19 21:41:14] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 59.21% examples, 137234 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:14] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 15.95% examples, 1500447 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:15] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 35.74% examples, 1499871 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:15] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 62.43% examples, 137003 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:15] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 18.00% examples, 1503310 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:16] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 37.75% examples, 1501138 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:16] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 65.65% examples, 136938 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:16] -

[2020-09-19 21:41:33] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 51.97% examples, 1508318 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:33] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 71.65% examples, 1509005 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:33] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 19.84% examples, 137535 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:41:34] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 53.91% examples, 1507840 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:34] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 73.70% examples, 1509727 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:34] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 23.14% examples, 137679 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:35] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 55.88% examples, 1508666 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:35] 

[2020-09-19 21:41:51] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 87.82% examples, 1513218 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:41:51] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 79.69% examples, 138549 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:51] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 8.03% examples, 1492770 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:52] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 89.85% examples, 1513862 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:41:52] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 82.97% examples, 138616 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:41:52] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 10.05% examples, 1499230 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:41:53] - base_any2vec.py[line:1291] - INFO: EPOCH 2 - PROGRESS: at 91.84% examples, 1513709 words/s, in_qsize 7, out_qsize 2
[2020-09-19 21:41:53]

[2020-09-19 21:42:11] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 47.93% examples, 1509228 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:12] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 29.78% examples, 1498878 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:12] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 49.92% examples, 1509230 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:13] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 31.76% examples, 1501006 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:13] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 51.96% examples, 1509271 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:14] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 33.76% examples, 1503548 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:14] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 53.95% examples, 1510266 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:15

[2020-09-19 21:42:39] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 1.93% examples, 1432371 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:39] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 84.37% examples, 1521648 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:42:40] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 3.82% examples, 1408194 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:40] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 86.39% examples, 1522151 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:41] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 5.81% examples, 1430343 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:41] - base_any2vec.py[line:1291] - INFO: EPOCH 3 - PROGRESS: at 88.49% examples, 1523354 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:42] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 7.77% examples, 1443347 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:42:42] 

label done!


[2020-09-19 21:42:56] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 35.75% examples, 1503107 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:56] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 17.81% examples, 1485944 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:57] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 37.75% examples, 1502958 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:42:57] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 19.85% examples, 1492586 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:42:58] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 39.79% examples, 1504735 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:58] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 21.84% examples, 1492539 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:59] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 41.80% examples, 1504559 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:42:5

[2020-09-19 21:43:25] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 94.58% examples, 1524193 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:26] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 78.41% examples, 1521523 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:26] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 96.57% examples, 1523803 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:27] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 80.39% examples, 1520947 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:27] - base_any2vec.py[line:1291] - INFO: EPOCH 4 - PROGRESS: at 98.65% examples, 1525079 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:27] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 4 more threads
[2020-09-19 21:43:27] - base_any2vec.py[line:348] - INFO: worker thread finished; awaiting finish of 3 more threads
[2020-09-19 21:43:27] - base_any2vec.py[line:348] - INFO: worker thr

[2020-09-19 21:43:50] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 44.73% examples, 1535476 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:50] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 27.58% examples, 1490422 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:43:51] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 46.76% examples, 1535731 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:51] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 29.67% examples, 1494679 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:52] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 48.92% examples, 1538054 words/s, in_qsize 10, out_qsize 0
[2020-09-19 21:43:52] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 31.65% examples, 1496914 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:53] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 50.92% examples, 1536958 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:43:5

[2020-09-19 21:44:17] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 92062788 raw words (76143050 effective words) took 49.7s, 1532163 effective words/s
[2020-09-19 21:44:17] - base_any2vec.py[line:1368] - INFO: training on a 460313940 raw words (380709836 effective words) took 251.2s, 1515541 effective words/s
[2020-09-19 21:44:18] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 82.10% examples, 1517474 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:44:19] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 84.20% examples, 1520910 words/s, in_qsize 8, out_qsize 1
[2020-09-19 21:44:20] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 86.26% examples, 1521954 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:44:21] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 88.24% examples, 1521174 words/s, in_qsize 9, out_qsize 0
[2020-09-19 21:44:22] - base_any2vec.py[line:1291] - INFO: EPOCH 5 - PROGRESS: at 90.28% examples, 1522435 words/

adv_id done!


[2020-09-19 21:45:21] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_EMB_INPUTSEQ_V2_task_id.pkl


task_id done!


In [2]:
import pandas as pd
import numpy as np
import gc
from base import Cache
from tqdm import tqdm
from gensim.models import Word2Vec
data = Cache.reload_cache('CACHE_data_step_1_feature_0917_r5.pkl')
seq_emb = Cache.reload_cache('CACHE_EMB_INPUTSEQ_V2_adv_id.pkl')

[2020-09-19 21:45:28] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_data_step_1_feature_0917_r5.pkl
[2020-09-19 21:45:30] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/cached_data/CACHE_EMB_INPUTSEQ_V2_adv_id.pkl


In [3]:
data[['index','uid','pt_d','adv_id']].head()

Unnamed: 0,index,uid,pt_d,adv_id
0,17073310,2237673,5,6340
1,17636486,2237673,5,4501
2,35175266,2237672,6,3701
3,30784519,2237672,4,5389
4,11939625,2237672,4,4506


In [4]:
data.query('uid==2237673')[['index','uid','pt_d','adv_id']]

Unnamed: 0,index,uid,pt_d,adv_id
0,17073310,2237673,5,6340
1,17636486,2237673,5,4501


In [5]:
seq_emb.keys()

dict_keys(['id_list', 'key2index', 'emb'])

In [6]:
seq_emb['id_list'][:5,:]

array([[1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [4, 5, 6, 7, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [5, 7, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [6, 7, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
      dtype=int32)

In [7]:
seq_emb['key2index']['6340']

1

In [8]:
seq_emb['key2index']['4501']

3

### 检查过了，seq 和 dense 可以对上！