In [1]:
import pandas as pd 
import numpy as np
import gc
from base import Cache
from tqdm import tqdm


In [2]:
def reduce_mem(df, use_float16=False):
    start_mem = df.memory_usage().sum() / 1024**2
    tm_cols = df.select_dtypes('datetime').columns
    for col in df.columns:
        if col in tm_cols:
            continue
        col_type = df[col].dtypes
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(
                        np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(
                        np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(
                        np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(
                        np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if use_float16 and c_min > np.finfo(
                        np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(
                        np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    print('{:.2f} Mb, {:.2f} Mb ({:.2f} %)'.format(
        start_mem, end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

# list_df

In [3]:

def gen_list_df(data,feature1,feature2):
    try:
        data_group = data.groupby([feature1])
        del data
        gc.collect()
        feature2_name_list = []
        for name,group in data_group:
            feature2_name_list.append(name)    
        list_feature2 = []
        for i in feature2_name_list:
            list_feature2_ = []
            index_get_group = data_group.get_group(i)
            buf = []
            for j in index_get_group[[feature2,'label']].values:
                if j[1] == 1:
                    buf.append(j[0])
            list_feature2_.append(buf)
            list_feature2_.append(i)
            list_feature2.append(list_feature2_)
        list_df = pd.DataFrame(list_feature2)
        Cache.cache_data(list_df, nm_marker='list_df_adv_userseq_'+feature1+'_'+feature2)
        del list_df,data_group,feature2_name_list,list_feature2_,index_get_group,list_feature2
        gc.collect()
        return True
    except:
        return False

In [4]:

train = Cache.reload_cache('CACHE_train_raw.pkl').drop(columns = ['communication_onlinerate']).astype(int)
train = reduce_mem(train, use_float16=True)
test = Cache.reload_cache('CACHE_test_B_raw.pkl').drop(columns = ['id','communication_onlinerate']).astype(int)
test = reduce_mem(test, use_float16=True)
data = pd.concat([train,test],axis=0,ignore_index=True)
data = reduce_mem(data, use_float16=True)
del train,test
gc.collect()
poc_feature1_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],
                  ['task_id','gender'],['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],
                  ['adv_id','device_name'],['adv_id','career'],['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],
                  ['creat_type_cd','city'],['creat_type_cd','city_rank'],['creat_type_cd','device_name'],['creat_type_cd','career'],
                  ['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],['indu_name','city'],['indu_name','city_rank'],
                  ['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],['indu_name','residence'],['adv_prim_id','age'],
                  ['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],['adv_prim_id','career'],['adv_prim_id','gender'],
                  ['adv_prim_id','residence']]
for i in tqdm(poc_feature1_list):
    if gen_list_df(data,i[0],i[1]):
        print(i,' Done')
    else:
        print(i,' Err')

[2020-09-26 17:22:36] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_train_raw.pkl


11190.41 Mb, 1718.53 Mb (84.64 %)


[2020-09-26 17:26:16] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_test_B_raw.pkl


267.03 Mb, 41.01 Mb (84.64 %)


  0%|          | 0/35 [00:00<?, ?it/s]

1759.54 Mb, 1759.54 Mb (0.00 %)


[2020-09-26 17:28:27] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_age.pkl
  3%|▎         | 1/35 [01:58<1:07:08, 118.48s/it]

['task_id', 'age']  Done


[2020-09-26 17:30:29] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_city.pkl
  6%|▌         | 2/35 [03:59<1:05:39, 119.37s/it]

['task_id', 'city']  Done


[2020-09-26 17:32:26] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_city_rank.pkl
  9%|▊         | 3/35 [05:57<1:03:22, 118.84s/it]

['task_id', 'city_rank']  Done


[2020-09-26 17:34:28] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_device_name.pkl
 11%|█▏        | 4/35 [07:58<1:01:47, 119.60s/it]

['task_id', 'device_name']  Done


[2020-09-26 17:36:31] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_career.pkl
 14%|█▍        | 5/35 [10:01<1:00:16, 120.54s/it]

['task_id', 'career']  Done


[2020-09-26 17:38:31] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_gender.pkl
 17%|█▋        | 6/35 [12:01<58:12, 120.42s/it]  

['task_id', 'gender']  Done


[2020-09-26 17:40:36] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_residence.pkl
 20%|██        | 7/35 [14:06<56:51, 121.83s/it]

['task_id', 'residence']  Done


[2020-09-26 17:42:36] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_age.pkl
 23%|██▎       | 8/35 [16:07<54:39, 121.45s/it]

['adv_id', 'age']  Done


[2020-09-26 17:44:39] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_city.pkl
 26%|██▌       | 9/35 [18:10<52:46, 121.80s/it]

['adv_id', 'city']  Done


[2020-09-26 17:46:42] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_city_rank.pkl
 29%|██▊       | 10/35 [20:12<50:51, 122.05s/it]

['adv_id', 'city_rank']  Done


[2020-09-26 17:48:43] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_device_name.pkl
 31%|███▏      | 11/35 [22:13<48:43, 121.81s/it]

['adv_id', 'device_name']  Done


[2020-09-26 17:50:44] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_career.pkl
 34%|███▍      | 12/35 [24:15<46:37, 121.64s/it]

['adv_id', 'career']  Done


[2020-09-26 17:52:46] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_gender.pkl
 37%|███▋      | 13/35 [26:16<44:35, 121.63s/it]

['adv_id', 'gender']  Done


[2020-09-26 17:54:45] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_residence.pkl
 40%|████      | 14/35 [28:16<42:22, 121.07s/it]

['adv_id', 'residence']  Done


[2020-09-26 17:56:15] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_age.pkl
 43%|████▎     | 15/35 [29:46<37:14, 111.73s/it]

['creat_type_cd', 'age']  Done


[2020-09-26 17:57:40] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_city.pkl
 46%|████▌     | 16/35 [31:11<32:48, 103.62s/it]

['creat_type_cd', 'city']  Done


[2020-09-26 17:59:06] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_city_rank.pkl
 49%|████▊     | 17/35 [32:37<29:29, 98.32s/it] 

['creat_type_cd', 'city_rank']  Done


[2020-09-26 18:00:32] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_device_name.pkl
 51%|█████▏    | 18/35 [34:03<26:47, 94.59s/it]

['creat_type_cd', 'device_name']  Done


[2020-09-26 18:01:55] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_career.pkl
 54%|█████▍    | 19/35 [35:26<24:19, 91.20s/it]

['creat_type_cd', 'career']  Done


[2020-09-26 18:03:21] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_gender.pkl
 57%|█████▋    | 20/35 [36:51<22:22, 89.50s/it]

['creat_type_cd', 'gender']  Done


[2020-09-26 18:04:44] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_creat_type_cd_residence.pkl
 60%|██████    | 21/35 [38:15<20:26, 87.63s/it]

['creat_type_cd', 'residence']  Done


[2020-09-26 18:06:13] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_age.pkl
 63%|██████▎   | 22/35 [39:43<19:03, 87.92s/it]

['indu_name', 'age']  Done


[2020-09-26 18:07:39] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_city.pkl
 66%|██████▌   | 23/35 [41:10<17:29, 87.47s/it]

['indu_name', 'city']  Done


[2020-09-26 18:09:08] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_city_rank.pkl
 69%|██████▊   | 24/35 [42:38<16:06, 87.84s/it]

['indu_name', 'city_rank']  Done


[2020-09-26 18:10:39] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_device_name.pkl
 71%|███████▏  | 25/35 [44:10<14:49, 88.91s/it]

['indu_name', 'device_name']  Done


[2020-09-26 18:12:10] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_career.pkl
 74%|███████▍  | 26/35 [45:41<13:25, 89.54s/it]

['indu_name', 'career']  Done


[2020-09-26 18:13:38] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_gender.pkl
 77%|███████▋  | 27/35 [47:08<11:51, 88.97s/it]

['indu_name', 'gender']  Done


[2020-09-26 18:15:06] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_residence.pkl
 80%|████████  | 28/35 [48:36<10:20, 88.64s/it]

['indu_name', 'residence']  Done


[2020-09-26 18:16:41] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_age.pkl
 83%|████████▎ | 29/35 [50:11<09:03, 90.51s/it]

['adv_prim_id', 'age']  Done


[2020-09-26 18:18:16] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_city.pkl
 86%|████████▌ | 30/35 [51:47<07:40, 92.10s/it]

['adv_prim_id', 'city']  Done


[2020-09-26 18:19:52] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_city_rank.pkl
 89%|████████▊ | 31/35 [53:23<06:12, 93.22s/it]

['adv_prim_id', 'city_rank']  Done


[2020-09-26 18:21:28] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_device_name.pkl
 91%|█████████▏| 32/35 [54:59<04:42, 94.02s/it]

['adv_prim_id', 'device_name']  Done


[2020-09-26 18:23:05] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_career.pkl
 94%|█████████▍| 33/35 [56:36<03:09, 94.97s/it]

['adv_prim_id', 'career']  Done


[2020-09-26 18:24:39] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_gender.pkl
 97%|█████████▋| 34/35 [58:10<01:34, 94.70s/it]

['adv_prim_id', 'gender']  Done


[2020-09-26 18:26:16] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_residence.pkl
100%|██████████| 35/35 [59:46<00:00, 102.48s/it]

['adv_prim_id', 'residence']  Done





# list_txt

In [5]:
def write(feature1_feature2):
    list_df = Cache.reload_cache('CACHE_list_df_adv_userseq_'+feature1_feature2+'.pkl')[0].values.tolist()
    f = open('adv_userseq_'+feature1_feature2+'.txt', 'w')
    for i in list_df:
        if i:
            for j in i:
                f.write(str(j))
                f.write(' ')
            f.write('\n')
        else:
            f.write(str(-2))
            f.write(' ')
            f.write('\n')
    f.close()

In [6]:
f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],
              ['task_id','gender'],['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],
              ['adv_id','device_name'],['adv_id','career'],['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],
              ['creat_type_cd','city'],['creat_type_cd','city_rank'],['creat_type_cd','device_name'],['creat_type_cd','career'],
              ['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],['indu_name','city'],['indu_name','city_rank'],
              ['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],['indu_name','residence'],['adv_prim_id','age'],
              ['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],['adv_prim_id','career'],['adv_prim_id','gender'],
              ['adv_prim_id','residence']]
for i in tqdm(f1_f2_list):
    write(str(i[0])+'_'+str(i[1]))

  0%|          | 0/35 [00:00<?, ?it/s][2020-09-26 18:26:16] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_age.pkl
  3%|▎         | 1/35 [00:01<01:03,  1.88s/it][2020-09-26 18:26:18] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_city.pkl
  6%|▌         | 2/35 [00:03<01:02,  1.89s/it][2020-09-26 18:26:20] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_city_rank.pkl
  9%|▊         | 3/35 [00:05<00:55,  1.74s/it][2020-09-26 18:26:21] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_device_name.pkl
 11%|█▏        | 4/35 [00:06<00:52,  1.68s/it][2020-09-26 18:26:23] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot

# w2v

In [7]:
from gensim.models import word2vec
from gensim.models.callbacks import CallbackAny2Vec
from tqdm import tqdm

In [8]:
def f1_f2_w2v(f1_f2):
    print('LineSentence start')
    sentences = word2vec.LineSentence('adv_userseq_'+f1_f2+'.txt') 
    print('Word2Vec start')
    # model = word2vec.Word2Vec(sentences , size=128, window=35, sg=0, hs=1, min_count=1, iter=5, callbacks=[bsz_loss])
    model = word2vec.Word2Vec(sentences , size=64, window=10000, sg=0, hs=1, min_count=1, iter=10, workers=-1)
    print('save start')
    model.save('adv_userseq_'+f1_f2+'_word2vec.model')
    model.wv.save('adv_userseq_'+f1_f2+'_word2vec.kv')
    print('Done')

In [9]:
f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],['task_id','gender'],
              ['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],['adv_id','device_name'],['adv_id','career'],
              ['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],['creat_type_cd','city'],['creat_type_cd','city_rank'],
              ['creat_type_cd','device_name'],['creat_type_cd','career'],['creat_type_cd','gender'],['creat_type_cd','residence'],
              ['indu_name','age'],['indu_name','city'],['indu_name','city_rank'],['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],
              ['indu_name','residence'],['adv_prim_id','age'],['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],
              ['adv_prim_id','career'],['adv_prim_id','gender'],['adv_prim_id','residence']]
for i in f1_f2_list:
    f1_f2_w2v(str(i[0])+'_'+str(i[1]))

[2020-09-26 18:27:11] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:11] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:11] - word2vec.py[line:1407] - INFO: collected 9 word types from a corpus of 1446773 raw words and 4941 sentences


LineSentence start
Word2Vec start


[2020-09-26 18:27:11] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:11] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 9 unique words (100% of original 9, drops 0)
[2020-09-26 18:27:11] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:11] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 9 items
[2020-09-26 18:27:11] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-26 18:27:11] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 129968 word corpus (9.0% of prior 1446773)
[2020-09-26 18:27:11] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 9 words
[2020-09-26 18:27:11] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node depth 6
[2020-09-26 18:27:11] - base_any2vec.py[line:1008] - INFO: estimated required memory for 9 words and 64 dimensions: 13212 b

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:12] - word2vec.py[line:1407] - INFO: collected 345 word types from a corpus of 1446773 raw words and 4941 sentences
[2020-09-26 18:27:12] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:12] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 345 unique words (100% of original 345, drops 0)
[2020-09-26 18:27:12] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:12] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 345 items
[2020-09-26 18:27:12] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 98 most-common words
[2020-09-26 18:27:12] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 912919 word corpus (63.1% of prior 1446773)
[2020-09-26 18:27:12] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 345 words
[2020-09-26 18:27:12] - word2vec.py[line:1673] - INFO: built huffman tree with 

save start


[2020-09-26 18:27:14] - utils.py[line:565] - INFO: saved adv_userseq_task_id_city_word2vec.kv
[2020-09-26 18:27:14] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:14] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:14] - word2vec.py[line:1407] - INFO: collected 5 word types from a corpus of 1446773 raw words and 4941 sentences
[2020-09-26 18:27:14] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:14] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 5 unique words (100% of original 5, drops 0)
[2020-09-26 18:27:14] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:14] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 5 items
[2020-09-26 18:27:14] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 4 most-common words
[2020-0

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 2 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 3 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 6 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 7 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:14] - base_any2vec.py[line:1332] - INFO: EPOCH - 8 : training on 0 raw words (0 effective words) took 0.0s

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:15] - word2vec.py[line:1407] - INFO: collected 91 word types from a corpus of 1446773 raw words and 4941 sentences
[2020-09-26 18:27:15] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:15] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 91 unique words (100% of original 91, drops 0)
[2020-09-26 18:27:15] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:15] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 91 items
[2020-09-26 18:27:15] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 51 most-common words
[2020-09-26 18:27:15] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 405191 word corpus (28.0% of prior 1446773)
[2020-09-26 18:27:15] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 91 words
[2020-09-26 18:27:15] - word2vec.py[line:1673] - INFO: built huffman tree with maxim

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:17] - word2vec.py[line:1407] - INFO: collected 10 word types from a corpus of 1446773 raw words and 4941 sentences
[2020-09-26 18:27:17] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:17] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 10 unique words (100% of original 10, drops 0)
[2020-09-26 18:27:17] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:17] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 10 items
[2020-09-26 18:27:17] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-26 18:27:17] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 117287 word corpus (8.1% of prior 1446773)
[2020-09-26 18:27:17] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 10 words
[2020-09-26 18:27:17] - word2vec.py[line:1673] - INFO: built huffman tree with maximum

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:18] - word2vec.py[line:1407] - INFO: collected 4 word types from a corpus of 1446773 raw words and 4941 sentences
[2020-09-26 18:27:18] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:18] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 4 unique words (100% of original 4, drops 0)
[2020-09-26 18:27:18] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:18] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 4 items
[2020-09-26 18:27:18] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 3 most-common words
[2020-09-26 18:27:18] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 74575 word corpus (5.2% of prior 1446773)
[2020-09-26 18:27:18] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 4 words
[2020-09-26 18:27:18] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:19] - word2vec.py[line:1407] - INFO: collected 37 word types from a corpus of 1446773 raw words and 4941 sentences
[2020-09-26 18:27:19] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:19] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 37 unique words (100% of original 37, drops 0)
[2020-09-26 18:27:19] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1446773 word corpus (100% of original 1446773, drops 0)
[2020-09-26 18:27:19] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 37 items
[2020-09-26 18:27:19] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 31 most-common words
[2020-09-26 18:27:19] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 289605 word corpus (20.0% of prior 1446773)
[2020-09-26 18:27:19] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 37 words
[2020-09-26 18:27:19] - word2vec.py[line:1673] - INFO: built huffman tree with maxim

save start


[2020-09-26 18:27:21] - utils.py[line:565] - INFO: saved adv_userseq_task_id_residence_word2vec.kv
[2020-09-26 18:27:21] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:21] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types


Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:22] - word2vec.py[line:1407] - INFO: collected 9 word types from a corpus of 1447271 raw words and 5965 sentences
[2020-09-26 18:27:22] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:22] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 9 unique words (100% of original 9, drops 0)
[2020-09-26 18:27:22] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1447271 word corpus (100% of original 1447271, drops 0)
[2020-09-26 18:27:22] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 9 items
[2020-09-26 18:27:22] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-26 18:27:22] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 130490 word corpus (9.0% of prior 1447271)
[2020-09-26 18:27:22] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 9 words
[2020-09-26 18:27:22] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:23] - word2vec.py[line:1407] - INFO: collected 345 word types from a corpus of 1447271 raw words and 5965 sentences
[2020-09-26 18:27:23] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:23] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 345 unique words (100% of original 345, drops 0)
[2020-09-26 18:27:23] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1447271 word corpus (100% of original 1447271, drops 0)
[2020-09-26 18:27:23] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 345 items
[2020-09-26 18:27:23] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 98 most-common words
[2020-09-26 18:27:23] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 913527 word corpus (63.1% of prior 1447271)
[2020-09-26 18:27:23] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 345 words
[2020-09-26 18:27:23] - word2vec.py[line:1673] - INFO: built huffman tree with 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:25] - word2vec.py[line:1407] - INFO: collected 5 word types from a corpus of 1447271 raw words and 5965 sentences
[2020-09-26 18:27:25] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:25] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 5 unique words (100% of original 5, drops 0)
[2020-09-26 18:27:25] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1447271 word corpus (100% of original 1447271, drops 0)
[2020-09-26 18:27:25] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 5 items
[2020-09-26 18:27:25] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 4 most-common words
[2020-09-26 18:27:25] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 90814 word corpus (6.3% of prior 1447271)
[2020-09-26 18:27:25] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 5 words
[2020-09-26 18:27:25] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:26] - word2vec.py[line:1407] - INFO: collected 91 word types from a corpus of 1447271 raw words and 5965 sentences
[2020-09-26 18:27:26] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:26] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 91 unique words (100% of original 91, drops 0)
[2020-09-26 18:27:26] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1447271 word corpus (100% of original 1447271, drops 0)
[2020-09-26 18:27:26] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 91 items
[2020-09-26 18:27:26] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 51 most-common words
[2020-09-26 18:27:26] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 405765 word corpus (28.0% of prior 1447271)
[2020-09-26 18:27:26] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 91 words
[2020-09-26 18:27:26] - word2vec.py[line:1673] - INFO: built huffman tree with maxim

save start


[2020-09-26 18:27:28] - utils.py[line:551] - INFO: saving Word2VecKeyedVectors object under adv_userseq_adv_id_device_name_word2vec.kv, separately None
[2020-09-26 18:27:28] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:28] - utils.py[line:565] - INFO: saved adv_userseq_adv_id_device_name_word2vec.kv
[2020-09-26 18:27:28] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:28] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types


Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:29] - word2vec.py[line:1407] - INFO: collected 10 word types from a corpus of 1447271 raw words and 5965 sentences
[2020-09-26 18:27:29] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:29] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 10 unique words (100% of original 10, drops 0)
[2020-09-26 18:27:29] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1447271 word corpus (100% of original 1447271, drops 0)
[2020-09-26 18:27:29] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 10 items
[2020-09-26 18:27:29] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-26 18:27:29] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 117806 word corpus (8.1% of prior 1447271)
[2020-09-26 18:27:29] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 10 words
[2020-09-26 18:27:29] - word2vec.py[line:1673] - INFO: built huffman tree with maximum

[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 2 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s


save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 3 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 6 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 7 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 8 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:30] - base_any2vec.py[line:1332] - INFO: EPOCH - 9 : training on 0 raw words (0 effective words) took 0.0s

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:31] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:31] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:31] - word2vec.py[line:1407] - INFO: collected 37 word types from a corpus of 1447271 raw words and 5965 sentences
[2020-09-26 18:27:31] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:31] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 37 unique words (100% of original 37, drops 0)
[2020-09-26 18:27:31] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1447271 word corpus (100% of original 1447271, drops 0)
[2020-09-26 18:27:31] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 37 items
[2020-09-26 18:27:31] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 31 most-common words
[2020-09-26 18:27:31] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 290159 word

save start


[2020-09-26 18:27:33] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:33] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types


Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:33] - word2vec.py[line:1407] - INFO: collected 8 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:33] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:33] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 8 unique words (100% of original 8, drops 0)
[2020-09-26 18:27:33] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:33] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 8 items
[2020-09-26 18:27:33] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-26 18:27:33] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 128620 word corpus (8.9% of prior 1445488)
[2020-09-26 18:27:33] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 8 words
[2020-09-26 18:27:33] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:34] - word2vec.py[line:1407] - INFO: collected 344 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:34] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:34] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 344 unique words (100% of original 344, drops 0)
[2020-09-26 18:27:34] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:34] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 344 items
[2020-09-26 18:27:34] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 98 most-common words
[2020-09-26 18:27:34] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 911350 word corpus (63.0% of prior 1445488)
[2020-09-26 18:27:34] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 344 words
[2020-09-26 18:27:34] - word2vec.py[line:1673] - INFO: built huffman tree with m

save start


[2020-09-26 18:27:36] - utils.py[line:565] - INFO: saved adv_userseq_creat_type_cd_city_word2vec.kv
[2020-09-26 18:27:36] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:36] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:36] - word2vec.py[line:1407] - INFO: collected 4 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:36] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:36] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 4 unique words (100% of original 4, drops 0)
[2020-09-26 18:27:36] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:36] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 4 items
[2020-09-26 18:27:36] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 4 most-common words
[2

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 6 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 7 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 8 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 9 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:36] - base_any2vec.py[line:1332] - INFO: EPOCH - 10 : training on 0 raw words (0 effective words) took 0.0

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:37] - word2vec.py[line:1407] - INFO: collected 90 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:37] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:37] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 90 unique words (100% of original 90, drops 0)
[2020-09-26 18:27:37] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:37] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 90 items
[2020-09-26 18:27:37] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 51 most-common words
[2020-09-26 18:27:37] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 403710 word corpus (27.9% of prior 1445488)
[2020-09-26 18:27:37] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 90 words
[2020-09-26 18:27:37] - word2vec.py[line:1673] - INFO: built huffman tree with maximu

save start


[2020-09-26 18:27:38] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:38] - utils.py[line:565] - INFO: saved adv_userseq_creat_type_cd_device_name_word2vec.kv
[2020-09-26 18:27:38] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:38] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:39] - word2vec.py[line:1407] - INFO: collected 9 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:39] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:39] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 9 unique words (100% of original 9, drops 0)
[2020-09-26 18:27:39] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:39] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 9 items
[2020-09-

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 2 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 3 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 6 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 7 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:39] - base_any2vec.py[line:1332] - INFO: EPOCH - 8 : training on 0 raw words (0 effective words) took 0.0s

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:39] - word2vec.py[line:1407] - INFO: collected 3 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:39] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:39] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 3 unique words (100% of original 3, drops 0)
[2020-09-26 18:27:39] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:39] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 3 items
[2020-09-26 18:27:39] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 3 most-common words
[2020-09-26 18:27:39] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 73255 word corpus (5.1% of prior 1445488)
[2020-09-26 18:27:39] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 3 words
[2020-09-26 18:27:39] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node d

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:40] - word2vec.py[line:1407] - INFO: collected 36 word types from a corpus of 1445488 raw words and 148 sentences
[2020-09-26 18:27:40] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:40] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 36 unique words (100% of original 36, drops 0)
[2020-09-26 18:27:40] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445488 word corpus (100% of original 1445488, drops 0)
[2020-09-26 18:27:40] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 36 items
[2020-09-26 18:27:40] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 31 most-common words
[2020-09-26 18:27:40] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 288175 word corpus (19.9% of prior 1445488)
[2020-09-26 18:27:40] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 36 words
[2020-09-26 18:27:40] - word2vec.py[line:1673] - INFO: built huffman tree with maximu

save start


[2020-09-26 18:27:42] - utils.py[line:551] - INFO: saving Word2VecKeyedVectors object under adv_userseq_creat_type_cd_residence_word2vec.kv, separately None
[2020-09-26 18:27:42] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:42] - utils.py[line:565] - INFO: saved adv_userseq_creat_type_cd_residence_word2vec.kv
[2020-09-26 18:27:42] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:42] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:42] - word2vec.py[line:1407] - INFO: collected 9 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:42] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:42] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 9 unique words (100% of original 9, drops 0)
[2020-09-26 18:27:42] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 wor

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 3 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 6 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 7 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 8 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:42] - base_any2vec.py[line:1332] - INFO: EPOCH - 9 : training on 0 raw words (0 effective words) took 0.0s

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:43] - word2vec.py[line:1407] - INFO: collected 345 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:43] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:43] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 345 unique words (100% of original 345, drops 0)
[2020-09-26 18:27:43] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 word corpus (100% of original 1445492, drops 0)
[2020-09-26 18:27:43] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 345 items
[2020-09-26 18:27:43] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 98 most-common words
[2020-09-26 18:27:43] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 911355 word corpus (63.0% of prior 1445492)
[2020-09-26 18:27:43] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 345 words
[2020-09-26 18:27:43] - word2vec.py[line:1673] - INFO: built huffman tree with m

save start


[2020-09-26 18:27:44] - utils.py[line:551] - INFO: saving Word2VecKeyedVectors object under adv_userseq_indu_name_city_word2vec.kv, separately None
[2020-09-26 18:27:44] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:44] - utils.py[line:565] - INFO: saved adv_userseq_indu_name_city_word2vec.kv
[2020-09-26 18:27:44] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:44] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:45] - word2vec.py[line:1407] - INFO: collected 5 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:45] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:45] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 5 unique words (100% of original 5, drops 0)
[2020-09-26 18:27:45] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 word corpus (100% of 

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:45] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node depth 4
[2020-09-26 18:27:45] - base_any2vec.py[line:1008] - INFO: estimated required memory for 5 words and 64 dimensions: 7340 bytes
[2020-09-26 18:27:45] - word2vec.py[line:1699] - INFO: resetting layer weights
[2020-09-26 18:27:45] - base_any2vec.py[line:1196] - INFO: training model with -1 workers on 5 vocabulary and 64 features, using sg=0 hs=1 sample=0.001 negative=5 window=10000
[2020-09-26 18:27:45] - base_any2vec.py[line:1332] - INFO: EPOCH - 1 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:45] - base_any2vec.py[line:1332] - INFO: EPOCH - 2 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:45] - base_any2vec.py[line:1332] - INFO: EPOCH - 3 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:45] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:45] - word2vec.py[line:1407] - INFO: collected 91 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:45] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:45] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 91 unique words (100% of original 91, drops 0)
[2020-09-26 18:27:45] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 word corpus (100% of original 1445492, drops 0)
[2020-09-26 18:27:45] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 91 items
[2020-09-26 18:27:45] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 51 most-common words
[2020-09-26 18:27:45] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 403715 word corpus (27.9% of prior 1445492)
[2020-09-26 18:27:45] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 91 words
[2020-09-26 18:27:45] - word2vec.py[line:1673] - INFO: built huffman tree with maximu

save start


[2020-09-26 18:27:47] - utils.py[line:551] - INFO: saving Word2VecKeyedVectors object under adv_userseq_indu_name_device_name_word2vec.kv, separately None
[2020-09-26 18:27:47] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:47] - utils.py[line:565] - INFO: saved adv_userseq_indu_name_device_name_word2vec.kv
[2020-09-26 18:27:47] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:47] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:48] - word2vec.py[line:1407] - INFO: collected 10 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:48] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:48] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 10 unique words (100% of original 10, drops 0)
[2020-09-26 18:27:48] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 word

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 4 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 5 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 6 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 7 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 8 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 9 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:48] - base_any2vec.py[line:1332] - INFO: EPOCH - 10 : training on 0 raw words (0 effective words) took 0.0

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:48] - word2vec.py[line:1407] - INFO: collected 4 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:48] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:48] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 4 unique words (100% of original 4, drops 0)
[2020-09-26 18:27:48] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 word corpus (100% of original 1445492, drops 0)
[2020-09-26 18:27:48] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 4 items
[2020-09-26 18:27:48] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 3 most-common words
[2020-09-26 18:27:48] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 73260 word corpus (5.1% of prior 1445492)
[2020-09-26 18:27:48] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 4 words
[2020-09-26 18:27:48] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node d

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:49] - word2vec.py[line:1407] - INFO: collected 37 word types from a corpus of 1445492 raw words and 174 sentences
[2020-09-26 18:27:49] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:49] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 37 unique words (100% of original 37, drops 0)
[2020-09-26 18:27:49] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445492 word corpus (100% of original 1445492, drops 0)
[2020-09-26 18:27:49] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 37 items
[2020-09-26 18:27:49] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 31 most-common words
[2020-09-26 18:27:49] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 288179 word corpus (19.9% of prior 1445492)
[2020-09-26 18:27:49] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 37 words
[2020-09-26 18:27:49] - word2vec.py[line:1673] - INFO: built huffman tree with maximu

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:51] - word2vec.py[line:1407] - INFO: collected 9 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:51] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:51] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 9 unique words (100% of original 9, drops 0)
[2020-09-26 18:27:51] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445501 word corpus (100% of original 1445501, drops 0)
[2020-09-26 18:27:51] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 9 items
[2020-09-26 18:27:51] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 8 most-common words
[2020-09-26 18:27:51] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 128634 word corpus (8.9% of prior 1445501)
[2020-09-26 18:27:51] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 9 words
[2020-09-26 18:27:51] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:51] - word2vec.py[line:1407] - INFO: collected 345 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:51] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:51] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 345 unique words (100% of original 345, drops 0)
[2020-09-26 18:27:51] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445501 word corpus (100% of original 1445501, drops 0)
[2020-09-26 18:27:51] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 345 items
[2020-09-26 18:27:51] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 98 most-common words
[2020-09-26 18:27:51] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 911366 word corpus (63.0% of prior 1445501)
[2020-09-26 18:27:51] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 345 words
[2020-09-26 18:27:51] - word2vec.py[line:1673] - INFO: built huffman tree with m

save start


[2020-09-26 18:27:53] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:53] - utils.py[line:565] - INFO: saved adv_userseq_adv_prim_id_city_word2vec.kv
[2020-09-26 18:27:53] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:53] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types


Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:53] - word2vec.py[line:1407] - INFO: collected 5 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:53] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:53] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 5 unique words (100% of original 5, drops 0)
[2020-09-26 18:27:53] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445501 word corpus (100% of original 1445501, drops 0)
[2020-09-26 18:27:53] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 5 items
[2020-09-26 18:27:53] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 4 most-common words
[2020-09-26 18:27:53] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 88986 word corpus (6.2% of prior 1445501)
[2020-09-26 18:27:53] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 5 words
[2020-09-26 18:27:53] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node d

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:54] - word2vec.py[line:1407] - INFO: collected 91 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:54] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:54] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 91 unique words (100% of original 91, drops 0)
[2020-09-26 18:27:54] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445501 word corpus (100% of original 1445501, drops 0)
[2020-09-26 18:27:54] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 91 items
[2020-09-26 18:27:54] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 51 most-common words
[2020-09-26 18:27:54] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 403725 word corpus (27.9% of prior 1445501)
[2020-09-26 18:27:54] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 91 words
[2020-09-26 18:27:54] - word2vec.py[line:1673] - INFO: built huffman tree with maximu

save start


[2020-09-26 18:27:56] - utils.py[line:565] - INFO: saved adv_userseq_adv_prim_id_device_name_word2vec.model
[2020-09-26 18:27:56] - utils.py[line:551] - INFO: saving Word2VecKeyedVectors object under adv_userseq_adv_prim_id_device_name_word2vec.kv, separately None
[2020-09-26 18:27:56] - utils.py[line:657] - INFO: not storing attribute vectors_norm
[2020-09-26 18:27:56] - utils.py[line:565] - INFO: saved adv_userseq_adv_prim_id_device_name_word2vec.kv
[2020-09-26 18:27:56] - word2vec.py[line:1399] - INFO: collecting all words and their counts
[2020-09-26 18:27:56] - word2vec.py[line:1384] - INFO: PROGRESS: at sentence #0, processed 0 words, keeping 0 word types
[2020-09-26 18:27:56] - word2vec.py[line:1407] - INFO: collected 10 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:56] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:56] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 10 unique words (100% of origin

Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:56] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 10 words
[2020-09-26 18:27:56] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node depth 7
[2020-09-26 18:27:56] - base_any2vec.py[line:1008] - INFO: estimated required memory for 10 words and 64 dimensions: 14680 bytes
[2020-09-26 18:27:56] - word2vec.py[line:1699] - INFO: resetting layer weights
[2020-09-26 18:27:56] - base_any2vec.py[line:1196] - INFO: training model with -1 workers on 10 vocabulary and 64 features, using sg=0 hs=1 sample=0.001 negative=5 window=10000
[2020-09-26 18:27:56] - base_any2vec.py[line:1332] - INFO: EPOCH - 1 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:56] - base_any2vec.py[line:1332] - INFO: EPOCH - 2 : training on 0 raw words (0 effective words) took 0.0s, 0 effective words/s
[2020-09-26 18:27:56] - base_any2vec.py[line:1332] - INFO: EPOCH - 3 : training on 0 raw words (0 effective words) took 0.0s, 

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:57] - word2vec.py[line:1407] - INFO: collected 4 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:57] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:57] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 4 unique words (100% of original 4, drops 0)
[2020-09-26 18:27:57] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445501 word corpus (100% of original 1445501, drops 0)
[2020-09-26 18:27:57] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 4 items
[2020-09-26 18:27:57] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 3 most-common words
[2020-09-26 18:27:57] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 73269 word corpus (5.1% of prior 1445501)
[2020-09-26 18:27:57] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 4 words
[2020-09-26 18:27:57] - word2vec.py[line:1673] - INFO: built huffman tree with maximum node d

save start
Done
LineSentence start
Word2Vec start


[2020-09-26 18:27:58] - word2vec.py[line:1407] - INFO: collected 37 word types from a corpus of 1445501 raw words and 232 sentences
[2020-09-26 18:27:58] - word2vec.py[line:1458] - INFO: Loading a fresh vocabulary
[2020-09-26 18:27:58] - word2vec.py[line:1482] - INFO: effective_min_count=1 retains 37 unique words (100% of original 37, drops 0)
[2020-09-26 18:27:58] - word2vec.py[line:1488] - INFO: effective_min_count=1 leaves 1445501 word corpus (100% of original 1445501, drops 0)
[2020-09-26 18:27:58] - word2vec.py[line:1547] - INFO: deleting the raw counts dictionary of 37 items
[2020-09-26 18:27:58] - word2vec.py[line:1550] - INFO: sample=0.001 downsamples 31 most-common words
[2020-09-26 18:27:58] - word2vec.py[line:1553] - INFO: downsampling leaves estimated 288189 word corpus (19.9% of prior 1445501)
[2020-09-26 18:27:58] - word2vec.py[line:1648] - INFO: constructing a huffman tree from 37 words
[2020-09-26 18:27:58] - word2vec.py[line:1673] - INFO: built huffman tree with maximu

save start
Done


# avg

In [10]:
from gensim.models import KeyedVectors
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
from base import Cache
from tqdm import tqdm
import pandas as pd 


In [11]:
def get_embedding(f1_f2,f1):
    path = 'adv_userseq_'+f1_f2+'_word2vec.kv'
    wv = KeyedVectors.load(path, mmap='r')
    list_df = Cache.reload_cache('CACHE_list_df_adv_userseq_'+f1_f2+'.pkl')
    list_df.columns=['list',f1] 
    f = open('adv_userseq_'+f1_f2+'.txt','r')
    ind = 0
    buf = []
    for i in f:
        buf_ = np.zeros(64)
        for j in i.strip().split(' '):
            buf_ = buf_+wv[j]
        buf_ = buf_/len(i) # 求平均
        buf_f1 = list_df.at[ind, f1]
        buf__ = []
        buf_ = buf_.tolist()
        buf__.append(buf_)
        buf__.append(buf_f1)
        buf.append(buf__)
        ind = ind+1
    df_f1_list = pd.DataFrame(buf) 
    Cache.cache_data(df_f1_list, nm_marker='list_df_avg_adv_userseq_'+f1_f2)
    return 0

In [12]:
f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],['task_id','gender'],
              ['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],['adv_id','device_name'],['adv_id','career'],
              ['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],['creat_type_cd','city'],['creat_type_cd','city_rank'],
              ['creat_type_cd','device_name'],['creat_type_cd','career'],['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],
              ['indu_name','city'],['indu_name','city_rank'],['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],['indu_name','residence'],
              ['adv_prim_id','age'],['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],['adv_prim_id','career'],
              ['adv_prim_id','gender'],['adv_prim_id','residence']]
for i in tqdm(f1_f2_list):
    get_embedding(str(i[0])+'_'+str(i[1]),i[0])
    

  0%|          | 0/35 [00:00<?, ?it/s][2020-09-26 18:28:01] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_age_word2vec.kv
[2020-09-26 18:28:01] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:28:01] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_age_word2vec.kv
[2020-09-26 18:28:01] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_task_id_age.pkl
[2020-09-26 18:28:04] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_age.pkl
  3%|▎         | 1/35 [00:03<01:45,  3.11s/it][2020-09-26 18:28:04] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_city_word2vec.kv
[2020-09-26 18:28:04] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:28:0

[2020-09-26 18:28:40] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:28:40] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_career_word2vec.kv
[2020-09-26 18:28:40] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_id_career.pkl
[2020-09-26 18:28:43] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_career.pkl
 34%|███▍      | 12/35 [00:42<01:19,  3.45s/it][2020-09-26 18:28:43] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_gender_word2vec.kv
[2020-09-26 18:28:43] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:28:43] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_gender_word2vec.kv
[2020-09-26 18:28:43] - __init__.py[line:126] - INFO: Successfully Reload: /home/

[2020-09-26 18:29:17] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:29:17] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_city_word2vec.kv
[2020-09-26 18:29:17] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_indu_name_city.pkl
[2020-09-26 18:29:21] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_city.pkl
 66%|██████▌   | 23/35 [01:20<00:42,  3.58s/it][2020-09-26 18:29:21] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_city_rank_word2vec.kv
[2020-09-26 18:29:21] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:29:21] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_city_rank_word2vec.kv
[2020-09-26 18:29:21] - __init__.py[line:126] - INFO: Successfully

[2020-09-26 18:29:55] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:29:55] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_gender_word2vec.kv
[2020-09-26 18:29:55] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_adv_userseq_adv_prim_id_gender.pkl
[2020-09-26 18:29:58] - __init__.py[line:111] - INFO: Cache Successfully! File name: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_gender.pkl
 97%|█████████▋| 34/35 [01:57<00:03,  3.23s/it][2020-09-26 18:29:58] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_residence_word2vec.kv
[2020-09-26 18:29:58] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:29:58] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_residence_word2vec.kv
[2020-09-26 18:29:58] - __init__.py[line:126] - IN

# adv_emb_mtx

In [13]:
from gensim.models import KeyedVectors
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
from base import Cache
from tqdm import tqdm
import pandas as pd 


In [14]:
def get_embedding(f1_f2,f1):
    avg_f1 = Cache.reload_cache('CACHE_list_df_avg_adv_userseq_'+f1_f2+'.pkl')
    feature_tokens = avg_f1[[1]].values.flatten().astype(str).tolist()
    tokenizer = Tokenizer(num_words=len(feature_tokens)+1)
    tokenizer.fit_on_texts(feature_tokens)
    embedding_dim = 64
    embedding_matrix = np.random.randn(
        len(feature_tokens)+1, embedding_dim)
    avg_f1_copy = avg_f1.copy()
    avg_f1_copy = avg_f1_copy.set_index(1)
    
    for feature in feature_tokens:
        embedding_vector = np.array(avg_f1_copy.loc[int(feature),:].values[0])
        if embedding_vector is not None:
            index = tokenizer.texts_to_sequences([feature])[0][0]
            embedding_matrix[index] = embedding_vector
    return embedding_matrix



In [15]:
if __name__ == '__main__': 
    f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],['task_id','gender'],
                  ['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],['adv_id','device_name'],['adv_id','career'],
                  ['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],['creat_type_cd','city'],['creat_type_cd','city_rank'],
                  ['creat_type_cd','device_name'],['creat_type_cd','career'],['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],
                  ['indu_name','city'],['indu_name','city_rank'],['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],
                  ['indu_name','residence'],['adv_prim_id','age'],['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],
                  ['adv_prim_id','career'],['adv_prim_id','gender'],['adv_prim_id','residence']]
    for i in tqdm(f1_f2_list):
        mtx = get_embedding(str(i[0])+'_'+str(i[1]),i[0])
        np.save(str(i[0])+'_'+str(i[1])+'_emb_mtx_adv_userseq_adv.npy',mtx)

  0%|          | 0/35 [00:00<?, ?it/s][2020-09-26 18:30:02] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_age.pkl
  3%|▎         | 1/35 [00:00<00:20,  1.65it/s][2020-09-26 18:30:02] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_city.pkl
  6%|▌         | 2/35 [00:01<00:21,  1.54it/s][2020-09-26 18:30:03] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_city_rank.pkl
  9%|▊         | 3/35 [00:02<00:21,  1.49it/s][2020-09-26 18:30:04] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_device_name.pkl
 11%|█▏        | 4/35 [00:02<00:20,  1.52it/s][2020-09-26 18:30:04] - __init__.py[line:126] - INFO: Successfully Reload: 

# user_emb_mtx

In [16]:
from gensim.models import KeyedVectors
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
from tqdm import tqdm


In [17]:
def get_embedding(path):
    path_kv = 'adv_userseq_'+path+'_word2vec.kv'
    wv = KeyedVectors.load(path_kv, mmap='r')
    feature_tokens = list(wv.vocab.keys())
    tokenizer = Tokenizer(num_words=len(feature_tokens)+1)
    tokenizer.fit_on_texts(feature_tokens)
    embedding_dim = 64
    embedding_matrix = np.random.randn(
        len(feature_tokens)+1, embedding_dim)
    for feature in feature_tokens:
        embedding_vector = wv[feature]
        if embedding_vector is not None:
            index = tokenizer.texts_to_sequences([feature])[0][0]
            embedding_matrix[index] = embedding_vector
    print(embedding_matrix.shape)
    return embedding_matrix



In [18]:
f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],['task_id','gender'],
              ['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],['adv_id','device_name'],['adv_id','career'],
              ['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],['creat_type_cd','city'],['creat_type_cd','city_rank'],
              ['creat_type_cd','device_name'],['creat_type_cd','career'],['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],
              ['indu_name','city'],['indu_name','city_rank'],['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],
              ['indu_name','residence'],['adv_prim_id','age'],['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],
              ['adv_prim_id','career'],['adv_prim_id','gender'],['adv_prim_id','residence']]
for i in tqdm(f1_f2_list):
    embedding_matrix = get_embedding(str(i[0])+'_'+str(i[1]))
    np.save(str(i[0])+'_'+str(i[1])+'_emb_mtx_adv_userseq_user.npy',embedding_matrix)

  0%|          | 0/35 [00:00<?, ?it/s][2020-09-26 18:30:12] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_age_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:30:12] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_age_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_city_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:30:12] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_city_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_city_rank_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:30:12] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_city_rank_

[2020-09-26 18:30:12] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_device_name_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_career_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:30:12] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_career_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_gender_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 18:30:12] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_gender_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_residence_word2vec.kv
[2020-09-26 18:30:12] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2

(10, 64)
(346, 64)
(6, 64)
(92, 64)
(11, 64)
(5, 64)
(38, 64)
(10, 64)
(346, 64)
(6, 64)
(92, 64)
(11, 64)
(5, 64)
(38, 64)
(9, 64)
(345, 64)
(5, 64)
(91, 64)
(10, 64)
(4, 64)
(37, 64)
(10, 64)
(346, 64)
(6, 64)
(92, 64)
(11, 64)
(5, 64)
(38, 64)
(10, 64)
(346, 64)
(6, 64)
(92, 64)
(11, 64)
(5, 64)
(38, 64)





# input_adv

In [19]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import Word2Vec, KeyedVectors
import numpy as np
from tqdm import tqdm
from base import Cache


In [20]:
def input_w2v(f1_f2,all_data,f1):
    feature_seq = all_data[[f1]].values.flatten().astype(str).tolist()

    avg_f1 = Cache.reload_cache('CACHE_list_df_avg_adv_userseq_'+f1_f2+'.pkl')
    feature_tokens = avg_f1[[1]].values.flatten().astype(str).tolist()
    tokenizer = Tokenizer(num_words=len(feature_tokens)+1)
    tokenizer.fit_on_texts(feature_tokens)
    
    npy_path = f1_f2
    sequences = tokenizer.texts_to_sequences(feature_seq[:8672928])
    x_train = pad_sequences(
        sequences, maxlen=1, padding='post')
    print(x_train.shape)
    np.save(npy_path+'_adv_userseq_adv_train.npy', x_train)

    sequences = tokenizer.texts_to_sequences(feature_seq[8672928:])
    x_test = pad_sequences(
        sequences, maxlen=1, padding='post') 
    print(x_test.shape)
    np.save(npy_path+'_adv_userseq_adv_test.npy', x_test)


In [21]:

f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],['task_id','gender'],
              ['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],['adv_id','device_name'],['adv_id','career'],
              ['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],['creat_type_cd','city'],['creat_type_cd','city_rank'],
              ['creat_type_cd','device_name'],['creat_type_cd','career'],['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],
              ['indu_name','city'],['indu_name','city_rank'],['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],
              ['indu_name','residence'],['adv_prim_id','age'],['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],
              ['adv_prim_id','career'],['adv_prim_id','gender'],['adv_prim_id','residence']]
data = Cache.reload_cache('CACHE_data_sampling_pos1_neg5.pkl')
for i in tqdm(f1_f2_list):
    input_w2v(str(i[0])+'_'+str(i[1]),data,str(i[0]))

[2020-09-26 18:30:13] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_data_sampling_pos1_neg5.pkl
  0%|          | 0/35 [00:00<?, ?it/s][2020-09-26 18:30:20] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_age.pkl


(8672928, 1)
(1000000, 1)


  3%|▎         | 1/35 [01:39<56:10, 99.14s/it][2020-09-26 18:31:57] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_city.pkl


(8672928, 1)


  6%|▌         | 2/35 [03:16<54:18, 98.73s/it]

(1000000, 1)


[2020-09-26 18:33:36] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_city_rank.pkl


(8672928, 1)


  9%|▊         | 3/35 [04:57<52:55, 99.24s/it]

(1000000, 1)


[2020-09-26 18:35:16] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_device_name.pkl


(8672928, 1)


 11%|█▏        | 4/35 [06:34<50:53, 98.49s/it]

(1000000, 1)


[2020-09-26 18:36:52] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_career.pkl


(8672928, 1)
(1000000, 1)


 14%|█▍        | 5/35 [08:12<49:14, 98.49s/it][2020-09-26 18:38:31] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_gender.pkl


(8672928, 1)
(1000000, 1)


 17%|█▋        | 6/35 [09:53<47:55, 99.15s/it][2020-09-26 18:40:13] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_task_id_residence.pkl


(8672928, 1)
(1000000, 1)


 20%|██        | 7/35 [11:32<46:20, 99.31s/it][2020-09-26 18:41:53] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_age.pkl


(8672928, 1)
(1000000, 1)


 23%|██▎       | 8/35 [13:14<44:58, 99.94s/it][2020-09-26 18:43:33] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_city.pkl


(8672928, 1)
(1000000, 1)


 26%|██▌       | 9/35 [14:49<42:43, 98.60s/it][2020-09-26 18:45:08] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_city_rank.pkl


(8672928, 1)
(1000000, 1)


 29%|██▊       | 10/35 [16:26<40:54, 98.17s/it][2020-09-26 18:46:45] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_device_name.pkl


(8672928, 1)
(1000000, 1)


 31%|███▏      | 11/35 [18:05<39:21, 98.39s/it][2020-09-26 18:48:24] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_career.pkl


(8672928, 1)
(1000000, 1)


 34%|███▍      | 12/35 [19:43<37:39, 98.25s/it][2020-09-26 18:50:03] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_gender.pkl


(8672928, 1)
(1000000, 1)


 37%|███▋      | 13/35 [21:22<36:05, 98.45s/it][2020-09-26 18:51:41] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_id_residence.pkl


(8672928, 1)
(1000000, 1)


 40%|████      | 14/35 [23:08<35:11, 100.53s/it][2020-09-26 18:53:25] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_age.pkl


(8672928, 1)


 43%|████▎     | 15/35 [24:35<32:10, 96.54s/it] 

(1000000, 1)


[2020-09-26 18:54:53] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_city.pkl


(8672928, 1)


 46%|████▌     | 16/35 [26:08<30:13, 95.42s/it]

(1000000, 1)


[2020-09-26 18:56:25] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_city_rank.pkl


(8672928, 1)


 49%|████▊     | 17/35 [27:42<28:30, 95.01s/it]

(1000000, 1)


[2020-09-26 18:57:59] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_device_name.pkl


(8672928, 1)


 51%|█████▏    | 18/35 [29:10<26:19, 92.90s/it]

(1000000, 1)


[2020-09-26 18:59:27] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_career.pkl


(8672928, 1)


 54%|█████▍    | 19/35 [30:40<24:33, 92.07s/it]

(1000000, 1)


[2020-09-26 19:00:57] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_gender.pkl


(8672928, 1)


 57%|█████▋    | 20/35 [32:10<22:52, 91.52s/it]

(1000000, 1)


[2020-09-26 19:02:28] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_creat_type_cd_residence.pkl


(8672928, 1)


 60%|██████    | 21/35 [33:42<21:23, 91.66s/it]

(1000000, 1)


[2020-09-26 19:04:00] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_age.pkl


(8672928, 1)
(1000000, 1)


 63%|██████▎   | 22/35 [35:16<19:59, 92.25s/it][2020-09-26 19:05:34] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_city.pkl


(8672928, 1)
(1000000, 1)


 66%|██████▌   | 23/35 [36:49<18:32, 92.67s/it][2020-09-26 19:07:07] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_city_rank.pkl


(8672928, 1)
(1000000, 1)


 69%|██████▊   | 24/35 [38:27<17:16, 94.20s/it][2020-09-26 19:08:45] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_device_name.pkl


(8672928, 1)
(1000000, 1)


 71%|███████▏  | 25/35 [40:07<15:58, 95.88s/it][2020-09-26 19:10:25] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_career.pkl


(8672928, 1)
(1000000, 1)


 74%|███████▍  | 26/35 [41:43<14:22, 95.83s/it][2020-09-26 19:12:01] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_gender.pkl


(8672928, 1)
(1000000, 1)


 77%|███████▋  | 27/35 [43:18<12:45, 95.69s/it][2020-09-26 19:13:36] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_indu_name_residence.pkl


(8672928, 1)
(1000000, 1)


 80%|████████  | 28/35 [44:57<11:16, 96.64s/it][2020-09-26 19:15:15] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_age.pkl


(8672928, 1)
(1000000, 1)


 83%|████████▎ | 29/35 [46:31<09:34, 95.83s/it][2020-09-26 19:16:49] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_city.pkl


(8672928, 1)
(1000000, 1)


 86%|████████▌ | 30/35 [48:09<08:02, 96.53s/it][2020-09-26 19:18:27] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_city_rank.pkl


(8672928, 1)
(1000000, 1)


 89%|████████▊ | 31/35 [49:42<06:22, 95.64s/it][2020-09-26 19:20:00] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_device_name.pkl


(8672928, 1)
(1000000, 1)


 91%|█████████▏| 32/35 [51:16<04:44, 94.89s/it][2020-09-26 19:21:33] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_career.pkl


(8672928, 1)
(1000000, 1)


 94%|█████████▍| 33/35 [52:50<03:09, 94.88s/it][2020-09-26 19:23:08] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_gender.pkl


(8672928, 1)
(1000000, 1)


 97%|█████████▋| 34/35 [54:21<01:33, 93.61s/it][2020-09-26 19:24:39] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_list_df_avg_adv_userseq_adv_prim_id_residence.pkl


(8672928, 1)
(1000000, 1)


100%|██████████| 35/35 [55:52<00:00, 95.79s/it]


# input_user

In [22]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from gensim.models import Word2Vec, KeyedVectors
import numpy as np
from tqdm import tqdm
from base import Cache


In [23]:
def input_w2v(f1_f2,all_data,f2):
    feature_seq = all_data[[f2]].values.flatten().astype(str).tolist()

    path_kv = 'adv_userseq_'+f1_f2+'_word2vec.kv'
    wv = KeyedVectors.load(path_kv, mmap='r')
    feature_tokens = list(wv.vocab.keys())
    
    tokenizer = Tokenizer(num_words=len(feature_tokens)+1)
    tokenizer.fit_on_texts(feature_tokens)
    
    npy_path = f1_f2
    sequences = tokenizer.texts_to_sequences(feature_seq[:8672928])
    x_train = pad_sequences(
        sequences, maxlen=1, padding='post')
    print(x_train.shape)
    np.save(npy_path+'_adv_userseq_user_train.npy', x_train)

    sequences = tokenizer.texts_to_sequences(feature_seq[8672928:])
    x_test = pad_sequences(
        sequences, maxlen=1, padding='post') 
    print(x_test.shape)
    np.save(npy_path+'_adv_userseq_user_test.npy', x_test)


In [24]:
f1_f2_list = [['task_id','age'],['task_id','city'],['task_id','city_rank'],['task_id','device_name'],['task_id','career'],
              ['task_id','gender'],['task_id','residence'],['adv_id','age'],['adv_id','city'],['adv_id','city_rank'],['adv_id','device_name'],
              ['adv_id','career'],['adv_id','gender'],['adv_id','residence'],['creat_type_cd','age'],['creat_type_cd','city'],['creat_type_cd','city_rank'],
              ['creat_type_cd','device_name'],['creat_type_cd','career'],['creat_type_cd','gender'],['creat_type_cd','residence'],['indu_name','age'],
              ['indu_name','city'],['indu_name','city_rank'],['indu_name','device_name'],['indu_name','career'],['indu_name','gender'],
              ['indu_name','residence'],['adv_prim_id','age'],['adv_prim_id','city'],['adv_prim_id','city_rank'],['adv_prim_id','device_name'],
              ['adv_prim_id','career'],['adv_prim_id','gender'],['adv_prim_id','residence']]
data = Cache.reload_cache('CACHE_data_sampling_pos1_neg5.pkl')
for i in tqdm(f1_f2_list):
    input_w2v(str(i[0])+'_'+str(i[1]),data,str(i[1]))
    

[2020-09-26 19:26:06] - __init__.py[line:126] - INFO: Successfully Reload: /home/zhangqibot/proj/digix/zlh/stage2/cached_data/CACHE_data_sampling_pos1_neg5.pkl
  0%|          | 0/35 [00:00<?, ?it/s][2020-09-26 19:26:11] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_age_word2vec.kv
[2020-09-26 19:26:11] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:26:11] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_age_word2vec.kv


(8672928, 1)


  3%|▎         | 1/35 [01:25<48:40, 85.90s/it]

(1000000, 1)


[2020-09-26 19:27:37] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_city_word2vec.kv
[2020-09-26 19:27:37] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:27:37] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_city_word2vec.kv


(8672928, 1)
(1000000, 1)


  6%|▌         | 2/35 [03:01<48:50, 88.80s/it][2020-09-26 19:29:13] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_city_rank_word2vec.kv
[2020-09-26 19:29:13] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:29:13] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_city_rank_word2vec.kv


(8672928, 1)


  9%|▊         | 3/35 [04:37<48:27, 90.86s/it]

(1000000, 1)


[2020-09-26 19:30:48] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_device_name_word2vec.kv
[2020-09-26 19:30:48] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:30:48] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_device_name_word2vec.kv


(8672928, 1)
(1000000, 1)


 11%|█▏        | 4/35 [06:20<48:50, 94.53s/it][2020-09-26 19:32:31] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_career_word2vec.kv
[2020-09-26 19:32:31] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:32:31] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_career_word2vec.kv


(8672928, 1)


 14%|█▍        | 5/35 [07:57<47:37, 95.25s/it]

(1000000, 1)


[2020-09-26 19:34:08] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_gender_word2vec.kv
[2020-09-26 19:34:08] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:34:08] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_gender_word2vec.kv


(8672928, 1)


 17%|█▋        | 6/35 [09:36<46:39, 96.55s/it]

(1000000, 1)


[2020-09-26 19:35:48] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_task_id_residence_word2vec.kv
[2020-09-26 19:35:48] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:35:48] - utils.py[line:437] - INFO: loaded adv_userseq_task_id_residence_word2vec.kv


(8672928, 1)
(1000000, 1)


 20%|██        | 7/35 [11:19<45:54, 98.39s/it][2020-09-26 19:37:30] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_age_word2vec.kv
[2020-09-26 19:37:30] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:37:30] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_age_word2vec.kv


(8672928, 1)


 23%|██▎       | 8/35 [12:57<44:13, 98.27s/it]

(1000000, 1)


[2020-09-26 19:39:09] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_city_word2vec.kv
[2020-09-26 19:39:10] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:39:10] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_city_word2vec.kv


(8672928, 1)
(1000000, 1)


 26%|██▌       | 9/35 [14:41<43:21, 100.08s/it][2020-09-26 19:40:52] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_city_rank_word2vec.kv
[2020-09-26 19:40:52] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:40:52] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_city_rank_word2vec.kv


(8672928, 1)


 29%|██▊       | 10/35 [16:17<41:09, 98.76s/it]

(1000000, 1)


[2020-09-26 19:42:29] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_device_name_word2vec.kv
[2020-09-26 19:42:29] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:42:29] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_device_name_word2vec.kv


(8672928, 1)
(1000000, 1)


 31%|███▏      | 11/35 [17:57<39:43, 99.31s/it][2020-09-26 19:44:09] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_career_word2vec.kv
[2020-09-26 19:44:09] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:44:09] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_career_word2vec.kv


(8672928, 1)


 34%|███▍      | 12/35 [19:32<37:28, 97.75s/it]

(1000000, 1)


[2020-09-26 19:45:43] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_gender_word2vec.kv
[2020-09-26 19:45:43] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:45:43] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_gender_word2vec.kv


(8672928, 1)


 37%|███▋      | 13/35 [21:14<36:24, 99.29s/it]

(1000000, 1)


[2020-09-26 19:47:26] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_id_residence_word2vec.kv
[2020-09-26 19:47:26] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:47:26] - utils.py[line:437] - INFO: loaded adv_userseq_adv_id_residence_word2vec.kv


(8672928, 1)
(1000000, 1)


 40%|████      | 14/35 [23:02<35:38, 101.84s/it][2020-09-26 19:49:15] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_age_word2vec.kv
[2020-09-26 19:49:15] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:49:15] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_age_word2vec.kv


(8672928, 1)


 43%|████▎     | 15/35 [24:39<33:27, 100.36s/it]

(1000000, 1)


[2020-09-26 19:50:51] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_city_word2vec.kv
[2020-09-26 19:50:52] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:50:52] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_city_word2vec.kv


(8672928, 1)
(1000000, 1)


 46%|████▌     | 16/35 [26:21<31:54, 100.74s/it][2020-09-26 19:52:32] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_city_rank_word2vec.kv
[2020-09-26 19:52:32] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:52:32] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_city_rank_word2vec.kv


(8672928, 1)


 49%|████▊     | 17/35 [27:50<29:08, 97.15s/it] 

(1000000, 1)


[2020-09-26 19:54:01] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_device_name_word2vec.kv
[2020-09-26 19:54:01] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:54:01] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_device_name_word2vec.kv


(8672928, 1)
(1000000, 1)


 51%|█████▏    | 18/35 [29:28<27:36, 97.43s/it][2020-09-26 19:55:39] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_career_word2vec.kv
[2020-09-26 19:55:39] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:55:39] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_career_word2vec.kv


(8672928, 1)


 54%|█████▍    | 19/35 [30:59<25:27, 95.47s/it]

(1000000, 1)


[2020-09-26 19:57:10] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_gender_word2vec.kv
[2020-09-26 19:57:10] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:57:10] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_gender_word2vec.kv


(8672928, 1)


 57%|█████▋    | 20/35 [32:28<23:25, 93.72s/it]

(1000000, 1)


[2020-09-26 19:58:40] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_creat_type_cd_residence_word2vec.kv
[2020-09-26 19:58:40] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 19:58:40] - utils.py[line:437] - INFO: loaded adv_userseq_creat_type_cd_residence_word2vec.kv


(8672928, 1)
(1000000, 1)


 60%|██████    | 21/35 [34:07<22:14, 95.34s/it][2020-09-26 20:00:19] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_age_word2vec.kv
[2020-09-26 20:00:19] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:00:19] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_age_word2vec.kv


(8672928, 1)


 63%|██████▎   | 22/35 [35:41<20:32, 94.83s/it]

(1000000, 1)


[2020-09-26 20:01:53] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_city_word2vec.kv
[2020-09-26 20:01:53] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:01:53] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_city_word2vec.kv


(8672928, 1)
(1000000, 1)


 66%|██████▌   | 23/35 [37:16<18:57, 94.76s/it][2020-09-26 20:03:27] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_city_rank_word2vec.kv
[2020-09-26 20:03:27] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:03:27] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_city_rank_word2vec.kv


(8672928, 1)


 69%|██████▊   | 24/35 [38:45<17:05, 93.25s/it]

(1000000, 1)


[2020-09-26 20:04:57] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_device_name_word2vec.kv
[2020-09-26 20:04:57] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:04:57] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_device_name_word2vec.kv


(8672928, 1)
(1000000, 1)


 71%|███████▏  | 25/35 [40:19<15:34, 93.43s/it][2020-09-26 20:06:31] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_career_word2vec.kv
[2020-09-26 20:06:31] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:06:31] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_career_word2vec.kv


(8672928, 1)


 74%|███████▍  | 26/35 [41:48<13:48, 92.09s/it]

(1000000, 1)


[2020-09-26 20:07:59] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_gender_word2vec.kv
[2020-09-26 20:07:59] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:07:59] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_gender_word2vec.kv


(8672928, 1)


 77%|███████▋  | 27/35 [43:16<12:06, 90.87s/it]

(1000000, 1)


[2020-09-26 20:09:28] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_indu_name_residence_word2vec.kv
[2020-09-26 20:09:28] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:09:28] - utils.py[line:437] - INFO: loaded adv_userseq_indu_name_residence_word2vec.kv


(8672928, 1)
(1000000, 1)


 80%|████████  | 28/35 [44:50<10:43, 91.92s/it][2020-09-26 20:11:02] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_age_word2vec.kv
[2020-09-26 20:11:02] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:11:02] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_age_word2vec.kv


(8672928, 1)


 83%|████████▎ | 29/35 [46:21<09:09, 91.59s/it]

(1000000, 1)


[2020-09-26 20:12:33] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_city_word2vec.kv
[2020-09-26 20:12:34] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:12:34] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_city_word2vec.kv


(8672928, 1)
(1000000, 1)


 86%|████████▌ | 30/35 [47:57<07:44, 92.81s/it][2020-09-26 20:14:08] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_city_rank_word2vec.kv
[2020-09-26 20:14:08] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:14:08] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_city_rank_word2vec.kv


(8672928, 1)


 89%|████████▊ | 31/35 [49:28<06:09, 92.39s/it]

(1000000, 1)


[2020-09-26 20:15:40] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_device_name_word2vec.kv
[2020-09-26 20:15:40] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:15:40] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_device_name_word2vec.kv


(8672928, 1)
(1000000, 1)


 91%|█████████▏| 32/35 [51:05<04:41, 93.81s/it][2020-09-26 20:17:16] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_career_word2vec.kv
[2020-09-26 20:17:16] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:17:16] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_career_word2vec.kv


(8672928, 1)


 94%|█████████▍| 33/35 [52:35<03:05, 92.65s/it]

(1000000, 1)


[2020-09-26 20:18:47] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_gender_word2vec.kv
[2020-09-26 20:18:47] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:18:47] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_gender_word2vec.kv


(8672928, 1)


 97%|█████████▋| 34/35 [54:05<01:31, 91.74s/it]

(1000000, 1)


[2020-09-26 20:20:17] - utils.py[line:431] - INFO: loading Word2VecKeyedVectors object from adv_userseq_adv_prim_id_residence_word2vec.kv
[2020-09-26 20:20:17] - utils.py[line:503] - INFO: setting ignored attribute vectors_norm to None
[2020-09-26 20:20:17] - utils.py[line:437] - INFO: loaded adv_userseq_adv_prim_id_residence_word2vec.kv


(8672928, 1)
(1000000, 1)


100%|██████████| 35/35 [55:39<00:00, 95.41s/it]
