# Поробуем строить ембединги по айтемам

In [1]:
import os
import csv
import json
import random
import pickle
import glob
from datetime import datetime
from collections import defaultdict

import gensim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

plt.style.use('seaborn-poster')

%matplotlib inline

## Word2vec все чанки

In [2]:
valid_time = datetime.strptime('2019-03-01 00:00:00', '%Y-%m-%d %H:%M:%S')

class TransactionDocCorpus:
    """
    Iterator over all chunks
    1 transaction = 1 document
    """
    def __init__(self, chunks_path_list, valid_time):
        self.chunks_path_list = chunks_path_list
        self.valid_time = valid_time
        
    def __iter__(self):
        for chunk_file in self.chunks_path_list:
            print(chunk_file)
            with open(chunk_file, 'r') as chunk:
                for row in tqdm(chunk):
                    client_id, transaction_history = row.split('\t')
                    client_id, transaction_history = json.loads(client_id), json.loads(transaction_history)
                    tr_history = [transaction_history[tr] for tr in transaction_history\
                                  if datetime.strptime(
                                    transaction_history[tr]['datetime'], '%Y-%m-%d %H:%M:%S'
                                     ) < valid_time]
                    sorted_transactions = sorted(tr_history, 
                                         key=lambda x: datetime.strptime(x['datetime'], '%Y-%m-%d %H:%M:%S'))
                    for transaction in sorted_transactions:
                        cur_products = []
                        for pr in transaction['products']:
                            cur_products.append(pr['product_id'])

                        yield cur_products

class ClientDocCorpus:
    """
    Iterator over all chunks
    1 client transaction_history = 1 document
    """
    def __init__(self, chunks_path_list, valid_time):
        self.chunks_path_list = chunks_path_list
        self.valid_time = valid_time
        
    def __iter__(self):
        for chunk_file in self.chunks_path_list:
            print(chunk_file)
            with open(chunk_file, 'r') as chunk:
                for row in tqdm(chunk):
                    client_id, transaction_history = row.split('\t')
                    client_id, transaction_history = json.loads(client_id), json.loads(transaction_history)
                    
                    tr_history = [transaction_history[tr] for tr in transaction_history\
                                  if datetime.strptime(
                                    transaction_history[tr]['datetime'], '%Y-%m-%d %H:%M:%S'
                                     ) < valid_time]
                    sorted_transactions = sorted(tr_history, 
                                         key=lambda x: datetime.strptime(x['datetime'], '%Y-%m-%d %H:%M:%S'))
                    cur_products = []
                    for transaction in sorted_transactions:
                        for pr in transaction['products']:
                            cur_products.append(pr['product_id'])

                    yield cur_products

In [3]:
valid_chunks = [
    '../_processed_data/client_tr_history_4.tsv', 
    '../_processed_data/client_tr_history_0.tsv',
    '../_processed_data/client_tr_history_15.tsv', 
    '../_processed_data/client_tr_history_16.tsv', 
    '../_processed_data/client_tr_history_6.tsv'
]
train_chunks = sorted(glob.glob('../_processed_data/client_tr_history_*'))
train_chunks = [chunk for chunk in train_chunks
                if chunk not in valid_chunks and chunk != '../_processed_data/client_tr_history_20.tsv']

In [4]:
%%time
transaction_level = TransactionDocCorpus(train_chunks, valid_time)
model1 = gensim.models.Word2Vec(sentences=transaction_level, seed=42, workers=8, iter=5,
                                size=50, window=10, min_count=2)

280it [00:00, 2781.53it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:06, 3048.44it/s]
575it [00:00, 2903.53it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:06, 3069.40it/s]
595it [00:00, 2935.84it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:06, 3091.73it/s]
281it [00:00, 2807.75it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:06, 2958.38it/s]
296it [00:00, 2946.22it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:06, 2951.69it/s]
278it [00:00, 2777.04it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:06, 2988.85it/s]
315it [00:00, 3148.21it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:06, 3021.84it/s]
616it [00:00, 3075.40it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:06, 3035.32it/s]
315it [00:00, 3136.57it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:06, 3032.06it/s]
637it [00:00, 3083.74it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:06, 3027.86it/s]
301it [00:00, 3007.53it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:06, 3014.24it/s]
597it [00:00, 2840.51it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:06, 3012.25it/s]
314it [00:00, 3139.79it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:06, 3017.94it/s]
598it [00:00, 3008.11it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:06, 3096.09it/s]
332it [00:00, 3318.87it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:06, 3147.49it/s]
280it [00:00, 2724.12it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:07, 2660.73it/s]
288it [00:00, 2879.60it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:07, 2608.20it/s]
242it [00:00, 2068.88it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:07, 2644.33it/s]
520it [00:00, 2601.64it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2628.34it/s]
258it [00:00, 2575.72it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:07, 2621.69it/s]
307it [00:00, 3063.14it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2690.61it/s]
320it [00:00, 3194.42it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2666.84it/s]
292it [00:00, 2906.12it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2638.51it/s]
525it [00:00, 2698.83it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2617.57it/s]
274it [00:00, 2731.48it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2609.64it/s]
260it [00:00, 2596.78it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2598.96it/s]
251it [00:00, 2508.83it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:07, 2574.90it/s]
279it [00:00, 2784.86it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:07, 2579.01it/s]
262it [00:00, 2618.43it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:07, 2565.83it/s]
300it [00:00, 2998.63it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:07, 2612.45it/s]
281it [00:00, 2797.36it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:07, 2621.40it/s]
277it [00:00, 2767.42it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:07, 2579.59it/s]
265it [00:00, 2618.24it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:07, 2600.12it/s]
261it [00:00, 2598.10it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2578.53it/s]
229it [00:00, 2026.99it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:07, 2602.36it/s]
302it [00:00, 3011.77it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2618.98it/s]
281it [00:00, 2805.53it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2632.03it/s]
299it [00:00, 2968.53it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2653.65it/s]
297it [00:00, 2966.77it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2652.49it/s]
283it [00:00, 2820.94it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2635.01it/s]
499it [00:00, 2406.82it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2573.57it/s]
225it [00:00, 2240.55it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:07, 2560.31it/s]
273it [00:00, 2728.88it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:07, 2525.30it/s]
278it [00:00, 2772.14it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:07, 2595.47it/s]
277it [00:00, 2766.43it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:07, 2659.72it/s]
508it [00:00, 2421.76it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:07, 2619.84it/s]
264it [00:00, 2638.74it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:07, 2577.16it/s]
252it [00:00, 2518.17it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:07, 2503.19it/s]
259it [00:00, 2586.23it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2543.85it/s]
296it [00:00, 2944.17it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:07, 2542.28it/s]
250it [00:00, 2494.65it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2616.91it/s]
297it [00:00, 2955.81it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2543.16it/s]
294it [00:00, 2933.06it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2575.18it/s]
294it [00:00, 2935.79it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2517.19it/s]
263it [00:00, 2583.06it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2563.18it/s]
283it [00:00, 2827.51it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2588.17it/s]
228it [00:00, 2276.65it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:07, 2595.65it/s]
303it [00:00, 3028.50it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:07, 2595.37it/s]
266it [00:00, 2657.58it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:07, 2596.35it/s]
311it [00:00, 3102.69it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:07, 2663.51it/s]
257it [00:00, 2560.05it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:07, 2619.09it/s]
241it [00:00, 2406.31it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:07, 2567.48it/s]
251it [00:00, 2501.65it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:07, 2609.87it/s]
245it [00:00, 2443.46it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2544.00it/s]
287it [00:00, 2822.31it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:08, 2497.96it/s]
292it [00:00, 2912.63it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2548.56it/s]
200it [00:00, 1992.75it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2517.84it/s]
233it [00:00, 2321.31it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2554.90it/s]
256it [00:00, 2554.96it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2522.04it/s]
265it [00:00, 2629.96it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2528.89it/s]
284it [00:00, 2834.02it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2541.12it/s]
227it [00:00, 2268.36it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:07, 2564.81it/s]
275it [00:00, 2692.62it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:07, 2587.42it/s]
254it [00:00, 2533.11it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:07, 2505.93it/s]
283it [00:00, 2823.39it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:07, 2580.34it/s]
499it [00:00, 2554.84it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:07, 2609.76it/s]
295it [00:00, 2944.65it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:07, 2545.74it/s]
255it [00:00, 2521.78it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:07, 2579.03it/s]
264it [00:00, 2637.51it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2569.13it/s]
524it [00:00, 2572.91it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:07, 2531.37it/s]
287it [00:00, 2861.61it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2561.84it/s]
283it [00:00, 2828.36it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2551.29it/s]
206it [00:00, 2058.17it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2575.27it/s]
265it [00:00, 2649.08it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2544.29it/s]
283it [00:00, 2803.86it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2574.83it/s]
197it [00:00, 1965.61it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2572.17it/s]
235it [00:00, 2346.25it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:07, 2567.50it/s]
266it [00:00, 2652.78it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:07, 2616.77it/s]
279it [00:00, 2774.24it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:07, 2625.67it/s]
595it [00:00, 2970.84it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:07, 2650.38it/s]

CPU times: user 14min 39s, sys: 8.79 s, total: 14min 48s
Wall time: 11min 24s





In [5]:
model1.save('../_model_files/w2v_01_26_1.wv')

In [6]:
%%time
transaction_level = ClientDocCorpus(train_chunks, valid_time)
model2 = gensim.models.Word2Vec(sentences=transaction_level, seed=42, workers=8, iter=5,
                                size=50, window=10, min_count=2)

273it [00:00, 2721.11it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:06, 3041.85it/s]
303it [00:00, 3015.09it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:06, 3035.45it/s]
588it [00:00, 2913.85it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:06, 3062.34it/s]
592it [00:00, 2924.08it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:06, 3032.24it/s]
317it [00:00, 3169.48it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:06, 3031.93it/s]
623it [00:00, 3129.90it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:06, 3077.35it/s]
632it [00:00, 3176.49it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:06, 3053.80it/s]
319it [00:00, 3183.51it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:06, 3106.28it/s]
632it [00:00, 3131.50it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:06, 3106.07it/s]
653it [00:00, 3198.81it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:06, 3088.24it/s]
624it [00:00, 3095.37it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:06, 3085.36it/s]
593it [00:00, 2854.07it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:06, 3053.93it/s]
638it [00:00, 3201.88it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:06, 3068.24it/s]
587it [00:00, 2936.26it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:06, 3058.45it/s]
653it [00:00, 3266.15it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:06, 3075.42it/s]
268it [00:00, 2662.90it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:07, 2769.84it/s]
261it [00:00, 2608.52it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:07, 2775.38it/s]
248it [00:00, 2478.53it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:07, 2821.70it/s]
280it [00:00, 2789.22it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2804.87it/s]
265it [00:00, 2640.17it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:07, 2764.72it/s]
250it [00:00, 2479.74it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2834.55it/s]
286it [00:00, 2846.48it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2785.66it/s]
296it [00:00, 2956.86it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2792.45it/s]
290it [00:00, 2894.95it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2722.18it/s]
283it [00:00, 2804.98it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2733.54it/s]
283it [00:00, 2829.30it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2720.67it/s]
253it [00:00, 2511.89it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:07, 2706.60it/s]
308it [00:00, 3070.61it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:07, 2786.54it/s]
200it [00:00, 1998.62it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:07, 2650.39it/s]
303it [00:00, 3022.91it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:09, 2166.24it/s]
181it [00:00, 1807.90it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:11, 1794.38it/s]
203it [00:00, 2016.43it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:10, 1859.35it/s]
195it [00:00, 1940.96it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:10, 1842.27it/s]
178it [00:00, 1763.76it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:07, 2599.64it/s]
284it [00:00, 2820.88it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:07, 2742.09it/s]
329it [00:00, 3283.45it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:07, 2771.05it/s]
310it [00:00, 3099.00it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:07, 2659.96it/s]
300it [00:00, 2983.47it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:07, 2756.21it/s]
291it [00:00, 2908.73it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:07, 2759.37it/s]
289it [00:00, 2882.77it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:07, 2760.62it/s]
284it [00:00, 2822.77it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:07, 2745.50it/s]
253it [00:00, 2490.69it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:10, 1975.07it/s]
188it [00:00, 1857.69it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:10, 1865.45it/s]
173it [00:00, 1728.83it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:10, 1866.57it/s]
208it [00:00, 2079.90it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:10, 1910.22it/s]
569it [00:00, 2905.62it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:09, 2034.73it/s]
294it [00:00, 2937.15it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:10, 1871.15it/s]
134it [00:00, 1300.00it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:10, 1962.30it/s]
206it [00:00, 2047.32it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:10, 1920.27it/s]
293it [00:00, 2924.49it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:11, 1713.49it/s]
217it [00:00, 2163.17it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:10, 1886.86it/s]
200it [00:00, 1998.61it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:10, 1884.37it/s]
202it [00:00, 1997.10it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:10, 1870.29it/s]
206it [00:00, 2056.36it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:10, 1867.20it/s]
160it [00:00, 1594.83it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:11, 1798.89it/s]
143it [00:00, 1427.23it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:11, 1726.63it/s]
183it [00:00, 1827.81it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:09, 2138.75it/s]
185it [00:00, 1841.65it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:12, 1591.77it/s]
203it [00:00, 2026.32it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:10, 1923.91it/s]
132it [00:00, 1317.97it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:11, 1769.38it/s]
203it [00:00, 2019.52it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:09, 2080.05it/s]
154it [00:00, 1538.55it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:13, 1470.85it/s]
141it [00:00, 1388.36it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:08, 2292.95it/s]
274it [00:00, 2737.00it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:10, 1885.26it/s]
186it [00:00, 1853.18it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:10, 1852.86it/s]
173it [00:00, 1729.49it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:11, 1784.10it/s]
174it [00:00, 1719.44it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:10, 1980.58it/s]
300it [00:00, 2992.89it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:10, 1907.89it/s]
140it [00:00, 1394.22it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:11, 1727.53it/s]
147it [00:00, 1459.86it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:10, 1843.44it/s]
277it [00:00, 2760.37it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:11, 1773.78it/s]
115it [00:00, 1148.47it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:12, 1641.11it/s]
163it [00:00, 1626.70it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:11, 1774.56it/s]
195it [00:00, 1945.65it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:11, 1802.20it/s]
207it [00:00, 2068.14it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:11, 1798.92it/s]
370it [00:00, 1814.46it/s]

../_processed_data/client_tr_history_1.tsv


20000it [00:11, 1761.02it/s]
234it [00:00, 2337.87it/s]

../_processed_data/client_tr_history_10.tsv


20000it [00:14, 1367.25it/s]
134it [00:00, 1326.80it/s]

../_processed_data/client_tr_history_11.tsv


20000it [00:12, 1561.86it/s]
245it [00:00, 2418.49it/s]

../_processed_data/client_tr_history_12.tsv


20000it [00:15, 1291.74it/s]
150it [00:00, 1497.52it/s]

../_processed_data/client_tr_history_13.tsv


20000it [00:12, 1562.42it/s]
122it [00:00, 1203.83it/s]

../_processed_data/client_tr_history_14.tsv


20000it [00:17, 1173.93it/s]
140it [00:00, 1392.17it/s]

../_processed_data/client_tr_history_17.tsv


20000it [00:12, 1580.44it/s]
139it [00:00, 1389.32it/s]

../_processed_data/client_tr_history_18.tsv


20000it [00:14, 1383.44it/s]
105it [00:00, 1048.82it/s]

../_processed_data/client_tr_history_19.tsv


20000it [00:08, 2306.54it/s]
281it [00:00, 2800.39it/s]

../_processed_data/client_tr_history_2.tsv


20000it [00:11, 1779.83it/s]
172it [00:00, 1697.83it/s]

../_processed_data/client_tr_history_3.tsv


20000it [00:11, 1763.22it/s]
172it [00:00, 1716.87it/s]

../_processed_data/client_tr_history_5.tsv


20000it [00:11, 1803.19it/s]
160it [00:00, 1594.44it/s]

../_processed_data/client_tr_history_7.tsv


20000it [00:10, 1994.63it/s]
117it [00:00, 1162.30it/s]

../_processed_data/client_tr_history_8.tsv


20000it [00:12, 1623.18it/s]
261it [00:00, 2603.35it/s]

../_processed_data/client_tr_history_9.tsv


20000it [00:09, 2005.64it/s]

CPU times: user 18min 10s, sys: 9.69 s, total: 18min 20s
Wall time: 14min 19s





In [7]:
model2.save('../_model_files/w2v_01_26_2.wv')