# Transaction Level Models

While it was expected that aggregation of features in a somewhat arbitrary manner would result in the loss of information, it was not expected that such an approach would not be able to beat the baseline score of guessing only zeros.    

In [1]:
import pandas as pd
import sqlalchemy
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler

from lightgbm import LGBMRegressor
import catboost as cb

from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
from keras.callbacks import EarlyStopping

%matplotlib inline

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
engine = sqlalchemy.create_engine('postgresql://romandtse:duckthewut@localhost:5432/training')

In [3]:
with open('../pickles/field_dict.pkl', 'rb') as f:
    field_dict = pickle.load(f)
    
with open('../pickles/useless_fields.pkl', 'rb') as f:
    useless_fields = pickle.load(f)

with open('../pickles/adwordsClickInfo_keys.pkl', 'rb') as f:
    adwordsClickInfo_keys = pickle.load(f)
    
with open('../pickles/channel_groups.pkl', 'rb') as f:
    channel_groups = pickle.load(f)
    
with open('../pickles/field_vals.pkl', 'rb') as f:
    field_vals = pickle.load(f)

In [4]:
with open('../pickles/train_customer_ids.pkl', 'rb') as f:
    train_customer_ids = pickle.load(f)

There is the question of scaling the revenue.  By coincidence, or maybe by design, the target is actually scaled by the natural log.  When we fit a regressor, there will necessarily be negative values.  Since we will send these values through another model, we should consider scaling everything by the natural log here; the argument is not that we pretend a single session should imitate a 

In [5]:
def revTemplate(key, name, num_type = 'FLOAT'):
    return f"""COALESCE(CAST({key} ->> '{name}' AS {num_type}), 0)/10^6 AS {name}"""

def jnumTemplate(key, name, num_type = 'INT'):
    return f"""COALESCE(CAST({key} ->> '{name}' AS {num_type}), 0) AS {name}"""

def numTemplate(name):
    return f"""COALESCE({name}, 0) AS {name}"""

def jstrTemplate(key, name):
    return f"{key} ->> '{name}' AS {name}"

def strTemplate(name):
    return f"{name}"

def adwordsTemplate(name):
    return f"CAST(trafficSource ->> 'adwordsClickInfo' AS JSONB) ->> '{name}' AS {name}"

In [6]:
def getQuery(dataset = 'train'):
    numeric_cols = ['visitNumber', 'newVisits', 'bounces', 'pageviews', 'visits', 'hits', 'transactionRevenue']
    if dataset != 'train':
        numeric_cols.pop()
    selects = []
    for cat, subcats in field_dict.items():
        for subcat in subcats:
            if subcat not in useless_fields[dataset]:
                if subcat == 'transactionRevenue':
                    selects.append(revTemplate(cat, subcat, 'NUMERIC'))
                elif subcat in numeric_cols:
                    selects.append(jnumTemplate(cat, subcat))
                elif subcat == 'adwordsClickInfo':
                    for key in adwordsClickInfo_keys:
                        selects.append(adwordsTemplate(key))
                else:
                    selects.append(jstrTemplate(cat, subcat))
    selects.extend([numTemplate('visitNumber'), 
                    strTemplate('channelGrouping'),
                    strTemplate('fullVisitorId'),
                    numTemplate('visitStartTime'),
                   ])
    return ', '.join(selects)

In [7]:
qstring = getQuery()

In [8]:
with open('../pickles/top_order.pkl', 'rb') as f:
    top_order = pickle.load(f)

In [9]:
#create user_list by
def getUserData(user_list):
    users = "\', \'".join(user_list)
    query = f"""
    SELECT {qstring}
    FROM train_data
    WHERE fullVisitorId IN (\'{users}\')
    """

    return pd.read_sql_query(query, engine, parse_dates=['visitstarttime'])

In [10]:
with open('../pickles/objects.pkl', 'rb') as f:
    objects = pickle.load(f)

By introducing all possible values of fields ahead of time for dummying, independent of whether they show up in the training set or not, we fail to simulate the fact that we have no idea whether we have captured all the features.  The categories included here, though are fairly set in stone; there probably are not many sub continents that have yet to appear in the store's history.

In [58]:
def adjustCols(df, drop_ids = True):
    back_looking = ['bounces', 'hits', 'newvisits', 'pageviews']
    
    df = df.sort_values(['fullvisitorid','visitstarttime'])
    #turns out the for loop checks col_order dynamically, temporary list needed to avoid infinite loop
    for col in back_looking:
        df[f'{col}last'] = df.groupby('fullvisitorid')[col].shift(1)
        df[f'{col}two'] = df.groupby('fullvisitorid')[col].shift(2)
    df['sincelast'] = df.groupby('fullvisitorid').visitstarttime.diff().map(lambda x: x.days + x.seconds/86400)
    df['sincetwo'] = df.groupby('fullvisitorid').visitstarttime.diff(2).map(lambda x: x.days + x.seconds/86400)
    df['hour'] = df.visitstarttime.map(lambda x: x.hour)
    df['weekday'] = df.visitstarttime.map(lambda x: x.dayofweek)
    
    if drop_ids:
        return df.drop(['fullvisitorid', 'visitstarttime'], axis=1)
    else:
        return df.drop('visitstarttime', axis=1)

In [14]:
with open('../pickles/train_customer_ids.pkl', 'rb') as f:
    train_customer_ids = pickle.load(f)

In [15]:
with open('../pickles/train_looker_ids.pkl', 'rb') as f:
    train_looker_ids = pickle.load(f)

In [16]:
#shuffle now so we can just iterate through lists
from random import shuffle

def stratifiedIdSplit(test_size=0.5):
    customer_size = int(len(train_customer_ids)*test_size)
    looker_size = int(len(train_looker_ids)*test_size)
    
    test_customers = list(np.random.choice(train_customer_ids.T.values[0], replace=False, size=customer_size))
    test_lookers = list(np.random.choice(train_looker_ids.T.values[0], replace=False, size=looker_size))
    
    train_customers = list(set(train_customer_ids.T.values[0]).difference(set(test_customers)))
    train_lookers = list(set(train_looker_ids.T.values[0]).difference(set(test_lookers)))
    
    test_customers.extend(test_lookers)
    train_customers.extend(train_lookers)
    
    shuffle(test_customers)
    shuffle(train_customers)
    
    return train_customers, test_customers

In [17]:
train_ids, test_ids = stratifiedIdSplit(0.3)

While we can hope a machine can learn what is unique about this outlier, the fact is that there is no one else like this user.  In our ensemble later, considering it is a system of gradient boosted trees, there is a very good chance that one of the forests will be awful at guessing because it was fit to minimize the error it would get from this outlier point.  With all its activity, though, perhaps the behavior could still fall in line with the other points of data.  For the fear of overfitting to this point, and because our validation can tell us nothing about how well it does on similar outliers (because there are none like it), we remove it from our model for now.

In [18]:
try:
    train_ids.remove(top_order.iloc[0,0])
except:
    test_ids.remove(top_order.iloc[0,0])

In [19]:
getUserData([test_ids[0]]).columns

Index(['devicecategory', 'ismobile', 'browser', 'operatingsystem', 'city',
       'continent', 'country', 'metro', 'networkdomain', 'region',
       'subcontinent', 'bounces', 'hits', 'newvisits', 'pageviews',
       'transactionrevenue', 'adcontent', 'adnetworktype',
       'criteriaparameters', 'gclid', 'isvideoad', 'page', 'slot',
       'targetingcriteria', 'campaign', 'campaigncode', 'istruedirect',
       'keyword', 'medium', 'referralpath', 'source', 'visitnumber',
       'channelgrouping', 'fullvisitorid', 'visitstarttime'],
      dtype='object')

In [59]:
def createChunk(ids_list, size, drop_ids = True):
    new_size = len(ids_list)
    if  new_size > size:
        new_size = size
    someppl = ids_list
    shuffle(someppl)
    someppl = someppl[:new_size]
    chunk = getUserData(someppl)
    chunk = adjustCols(chunk, drop_ids)
    
    return chunk.fillna(0), ids_list[new_size:]

In [21]:
trial_df = adjustCols(getUserData([top_order.iloc[0][0]])).drop('transactionrevenue', axis=1)

In [22]:
trial_vals = adjustCols(getUserData([top_order.iloc[0][0]])).transactionrevenue

In [23]:
cat_feets = np.where(trial_df.dtypes == object)[0]

In [24]:
trial_df.columns[cat_feets]

Index(['devicecategory', 'ismobile', 'browser', 'operatingsystem', 'city',
       'continent', 'country', 'metro', 'networkdomain', 'region',
       'subcontinent', 'adcontent', 'adnetworktype', 'criteriaparameters',
       'gclid', 'isvideoad', 'page', 'slot', 'targetingcriteria', 'campaign',
       'campaigncode', 'istruedirect', 'keyword', 'medium', 'referralpath',
       'source', 'channelgrouping'],
      dtype='object')

Instead of training a set of trees and losing most of the trees, let's make an ensemble.  We can blend it and fit to the actual target with a single ensemble in two stages; we fit by session while still preventing user leakage before using these to predict the sum of all sessions and fitting on a separate validation set.

In [25]:
id_holder = train_ids
fold = 8
holdouts = 2
fold_size = len(id_holder)//fold

models = []
first_ids = id_holder[:fold_size].copy()


def trainCB(train_set, eval_pool, model_list):
    model_list.append(cb.CatBoostRegressor(iterations = 500,
                                           learning_rate  = .1,
                                           l2_leaf_reg = 100,
                                           cat_features = cat_feets,
                                           verbose = True))
    model_list[-1].fit(X = train_set.drop('transactionrevenue', axis=1),
                       y = train_set.transactionrevenue,
                       use_best_model = True, 
                       eval_set = eval_pool, 
                       early_stopping_rounds = 10, 
                       metric_period = 50)

eval_chunk, id_holder = createChunk(id_holder, fold_size)
eval_chunk = cb.Pool(eval_chunk.drop('transactionrevenue', axis=1), 
                 eval_chunk.transactionrevenue, 
                 cat_features=cat_feets)
chunk, id_holder = createChunk(id_holder, fold_size)
trainCB(chunk, eval_chunk, models)


for n in range(fold - holdouts - 2):
    eval_chunk = chunk
    eval_chunk = cb.Pool(eval_chunk.drop('transactionrevenue', axis=1), 
                     eval_chunk.transactionrevenue, 
                     cat_features=cat_feets)
    if n < 3:
        chunk, id_holder = createChunk(id_holder, fold_size)
    else:
        chunk = createChunk(first_ids, fold_size)[0]
    trainCB(chunk, eval_chunk, models)




0:	learn: 35.6141297	test: 67.0926702	best: 67.0926702 (0)	total: 459ms	remaining: 3m 48s
50:	learn: 34.0242495	test: 66.6397450	best: 66.6397450 (50)	total: 17.6s	remaining: 2m 34s
100:	learn: 33.5665814	test: 66.5808375	best: 66.5808375 (100)	total: 34.6s	remaining: 2m 16s
Stopped by overfitting detector  (10 iterations wait)

bestTest = 66.57089931
bestIteration = 107

Shrink model to first 108 iterations.




0:	learn: 29.4909781	test: 35.6619093	best: 35.6619093 (0)	total: 457ms	remaining: 3m 48s
50:	learn: 27.8292836	test: 34.6339357	best: 34.6339357 (50)	total: 19.2s	remaining: 2m 48s
100:	learn: 27.2804282	test: 34.4388677	best: 34.4388677 (100)	total: 36.7s	remaining: 2m 24s
150:	learn: 26.9199866	test: 34.3520542	best: 34.3520418 (149)	total: 54.6s	remaining: 2m 6s
200:	learn: 26.6876952	test: 34.2950701	best: 34.2950701 (200)	total: 1m 11s	remaining: 1m 46s
250:	learn: 26.4680033	test: 34.2521019	best: 34.2506934 (246)	total: 1m 29s	remaining: 1m 28s
Stopped by overfitting detector  (10 iterations wait)

bestTest = 34.23564526
bestIteration = 270

Shrink model to first 271 iterations.




0:	learn: 46.7135529	test: 29.4882553	best: 29.4882553 (0)	total: 458ms	remaining: 3m 48s
50:	learn: 45.1500831	test: 28.5862330	best: 28.5862330 (50)	total: 17.9s	remaining: 2m 37s
100:	learn: 44.5401197	test: 28.4358089	best: 28.4346498 (96)	total: 34.5s	remaining: 2m 16s
Stopped by overfitting detector  (10 iterations wait)

bestTest = 28.42968001
bestIteration = 106

Shrink model to first 107 iterations.




0:	learn: 33.7786868	test: 46.7572169	best: 46.7572169 (0)	total: 385ms	remaining: 3m 11s
50:	learn: 31.6877456	test: 45.7699874	best: 45.7699874 (50)	total: 17.5s	remaining: 2m 34s
100:	learn: 31.1521536	test: 45.6285858	best: 45.6285858 (100)	total: 34.6s	remaining: 2m 16s
150:	learn: 30.7269877	test: 45.5599185	best: 45.5597168 (147)	total: 52.2s	remaining: 2m
Stopped by overfitting detector  (10 iterations wait)

bestTest = 45.54300345
bestIteration = 160

Shrink model to first 161 iterations.




0:	learn: 36.6152465	test: 33.9221094	best: 33.9221094 (0)	total: 443ms	remaining: 3m 40s
50:	learn: 35.6539505	test: 32.8664661	best: 32.8664661 (50)	total: 18.4s	remaining: 2m 41s
100:	learn: 35.4313658	test: 32.6865116	best: 32.6864966 (99)	total: 36.2s	remaining: 2m 22s
150:	learn: 35.2936361	test: 32.5881407	best: 32.5881407 (150)	total: 53.8s	remaining: 2m 4s
200:	learn: 35.1402468	test: 32.5117200	best: 32.5117200 (200)	total: 1m 11s	remaining: 1m 46s
Stopped by overfitting detector  (10 iterations wait)

bestTest = 32.49291973
bestIteration = 209

Shrink model to first 210 iterations.


At the arbitrary fold size of 8, we still get over sixty-two thousand users, and expect a good number of users to be paying customers.  We reserve 3 of these folds for validation and predicting the log of the sums.  We will also use the previous fold for validation, mostly to save space and time to be honest, though the first and last rounds must be treated separately.

At a L2 coefficient of 100, trees tend to stop around 100 iterations.  At a coefficient of 1, they stop $O(10)$ iterations, with the RMSE still increasing only 1 unit by the end.  At 1000, the training runs for more iterations, but the gains are just as modest.  There is still the question of whether it is better to let the ensemble components overfit, but we might run into the problem we usually do with random forests.  At this rate, though, the system will do not much better than a small collection of independent decision trees.  We'll go ahead and try anyways.

In [30]:
del chunk, eval_chunk

In [26]:
with open('../models/cb_stage1.pkl', 'wb') as f:
    pickle.dump(models, f)

Effectively doing a weighted average here.  I wonder if tree regressors have something like predict proba to asses how confident it is in each prediction.

In [28]:
from sklearn.linear_model import LinearRegression

In [29]:
len(id_holder)

187473

In [60]:
val_df = createChunk(id_holder, len(id_holder), False)[0]

oh my god it's only 93MB.  All this effort to avoid loading the entire dataframe and good fraction of it is just 93MB.

In [61]:
val_x = val_df.drop('transactionrevenue', axis=1)
val_y = val_df[['fullvisitorid', 'transactionrevenue']]
del val_df

In [62]:
predicts = [model.predict(val_x) for model in models]

In [64]:
for i in range(len(models)):
    val_x[f'predicted_{i}'] = predicts[i]

In [67]:
import matplotlib.pyplot as plt
%matplotlib inline

In [109]:
col_mask = [column=='fullvisitorid' or 'predicted_' in column for column in val_x.columns.values]
x = val_x.loc[:,col_mask].groupby('fullvisitorid').sum()

In [110]:
x.describe()

Unnamed: 0,predicted_0,predicted_1,predicted_2,predicted_3,predicted_4
count,187473.0,187473.0,187473.0,187473.0,187473.0
mean,2.024744,2.015175,1.999806,1.968603,1.834993
std,12.670578,18.926023,13.953792,17.662588,12.120747
min,-0.731466,-17.012426,-1.291338,-6.361899,-3.322797
25%,-0.037,-0.386212,-0.073637,-0.204398,-0.240298
50%,0.157122,-0.255843,0.041073,-0.021647,-0.131213
75%,0.613433,-0.022711,0.378796,0.398186,0.161302
max,1456.764486,2082.520218,1130.844994,2751.167519,908.566214


Just realized some of these will be negative.  We need to transform them with a function that runs from zero to infinity and accepts negative infinity to infinity, or else use a different model.  We try the former here.  Why not, headed down a bad way at this point.

In [106]:
y = val_y.groupby('fullvisitorid').sum()

In [108]:
y.describe()

Unnamed: 0,transactionrevenue
count,187473.0
mean,2.081995
std,53.622774
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,8951.97


In [116]:
cbagg = cb.CatBoostRegressor()

In [117]:
cbagg.fit(x, y)
cbagg.score(x, y)

0:	learn: 53.5166450	total: 43.9ms	remaining: 43.8s
1:	learn: 53.3762209	total: 68.7ms	remaining: 34.3s
2:	learn: 53.2484818	total: 92.4ms	remaining: 30.7s
3:	learn: 53.1234203	total: 119ms	remaining: 29.7s
4:	learn: 53.0053379	total: 162ms	remaining: 32.3s
5:	learn: 52.8932759	total: 189ms	remaining: 31.3s
6:	learn: 52.7880458	total: 216ms	remaining: 30.6s
7:	learn: 52.6881363	total: 240ms	remaining: 29.7s
8:	learn: 52.5938717	total: 286ms	remaining: 31.5s
9:	learn: 52.5048306	total: 316ms	remaining: 31.2s
10:	learn: 52.4232247	total: 351ms	remaining: 31.5s
11:	learn: 52.3447308	total: 382ms	remaining: 31.4s
12:	learn: 52.2722240	total: 406ms	remaining: 30.8s
13:	learn: 52.2010987	total: 436ms	remaining: 30.7s
14:	learn: 52.1341082	total: 461ms	remaining: 30.3s
15:	learn: 52.0715408	total: 487ms	remaining: 30s
16:	learn: 52.0132958	total: 543ms	remaining: 31.4s
17:	learn: 51.9573958	total: 577ms	remaining: 31.5s
18:	learn: 51.9040579	total: 608ms	remaining: 31.4s
19:	learn: 51.8532553

159:	learn: 50.8648513	total: 6.77s	remaining: 35.6s
160:	learn: 50.8633589	total: 6.82s	remaining: 35.5s
161:	learn: 50.8626486	total: 6.87s	remaining: 35.5s
162:	learn: 50.8615895	total: 6.92s	remaining: 35.5s
163:	learn: 50.8614325	total: 6.98s	remaining: 35.6s
164:	learn: 50.8607427	total: 7.04s	remaining: 35.6s
165:	learn: 50.8601069	total: 7.11s	remaining: 35.7s
166:	learn: 50.8594434	total: 7.13s	remaining: 35.6s
167:	learn: 50.8587930	total: 7.17s	remaining: 35.5s
168:	learn: 50.8580807	total: 7.24s	remaining: 35.6s
169:	learn: 50.8578593	total: 7.29s	remaining: 35.6s
170:	learn: 50.8569681	total: 7.34s	remaining: 35.6s
171:	learn: 50.8563770	total: 7.39s	remaining: 35.6s
172:	learn: 50.8557029	total: 7.42s	remaining: 35.5s
173:	learn: 50.8550962	total: 7.45s	remaining: 35.4s
174:	learn: 50.8544869	total: 7.5s	remaining: 35.3s
175:	learn: 50.8531021	total: 7.53s	remaining: 35.2s
176:	learn: 50.8520618	total: 7.57s	remaining: 35.2s
177:	learn: 50.8518063	total: 7.6s	remaining: 3

316:	learn: 50.7911853	total: 13.2s	remaining: 28.4s
317:	learn: 50.7909764	total: 13.2s	remaining: 28.4s
318:	learn: 50.7903925	total: 13.3s	remaining: 28.4s
319:	learn: 50.7902034	total: 13.3s	remaining: 28.3s
320:	learn: 50.7901359	total: 13.4s	remaining: 28.3s
321:	learn: 50.7900703	total: 13.4s	remaining: 28.3s
322:	learn: 50.7898679	total: 13.5s	remaining: 28.4s
323:	learn: 50.7896688	total: 13.6s	remaining: 28.3s
324:	learn: 50.7896417	total: 13.6s	remaining: 28.3s
325:	learn: 50.7895876	total: 13.7s	remaining: 28.3s
326:	learn: 50.7895242	total: 13.7s	remaining: 28.2s
327:	learn: 50.7893282	total: 13.7s	remaining: 28.2s
328:	learn: 50.7892762	total: 13.8s	remaining: 28.1s
329:	learn: 50.7891085	total: 13.8s	remaining: 28s
330:	learn: 50.7890856	total: 13.8s	remaining: 28s
331:	learn: 50.7885139	total: 13.9s	remaining: 27.9s
332:	learn: 50.7884921	total: 13.9s	remaining: 27.9s
333:	learn: 50.7883144	total: 13.9s	remaining: 27.8s
334:	learn: 50.7881241	total: 14s	remaining: 27.7s

476:	learn: 50.7546527	total: 19.5s	remaining: 21.4s
477:	learn: 50.7545636	total: 19.6s	remaining: 21.4s
478:	learn: 50.7540616	total: 19.6s	remaining: 21.3s
479:	learn: 50.7534135	total: 19.7s	remaining: 21.3s
480:	learn: 50.7534001	total: 19.7s	remaining: 21.3s
481:	learn: 50.7533814	total: 19.8s	remaining: 21.2s
482:	learn: 50.7533546	total: 19.8s	remaining: 21.2s
483:	learn: 50.7533290	total: 19.9s	remaining: 21.2s
484:	learn: 50.7532146	total: 19.9s	remaining: 21.2s
485:	learn: 50.7530861	total: 20s	remaining: 21.1s
486:	learn: 50.7529898	total: 20s	remaining: 21.1s
487:	learn: 50.7528953	total: 20s	remaining: 21s
488:	learn: 50.7526679	total: 20.1s	remaining: 21s
489:	learn: 50.7526327	total: 20.1s	remaining: 20.9s
490:	learn: 50.7525378	total: 20.1s	remaining: 20.9s
491:	learn: 50.7521274	total: 20.2s	remaining: 20.8s
492:	learn: 50.7521095	total: 20.2s	remaining: 20.8s
493:	learn: 50.7520173	total: 20.2s	remaining: 20.7s
494:	learn: 50.7519267	total: 20.3s	remaining: 20.7s
495

637:	learn: 50.7353785	total: 26.8s	remaining: 15.2s
638:	learn: 50.7353243	total: 26.9s	remaining: 15.2s
639:	learn: 50.7353153	total: 26.9s	remaining: 15.2s
640:	learn: 50.7348945	total: 27s	remaining: 15.1s
641:	learn: 50.7348858	total: 27s	remaining: 15.1s
642:	learn: 50.7346574	total: 27.1s	remaining: 15s
643:	learn: 50.7346490	total: 27.1s	remaining: 15s
644:	learn: 50.7346409	total: 27.1s	remaining: 14.9s
645:	learn: 50.7346253	total: 27.2s	remaining: 14.9s
646:	learn: 50.7346035	total: 27.2s	remaining: 14.8s
647:	learn: 50.7345954	total: 27.2s	remaining: 14.8s
648:	learn: 50.7345741	total: 27.3s	remaining: 14.8s
649:	learn: 50.7342310	total: 27.3s	remaining: 14.7s
650:	learn: 50.7342232	total: 27.3s	remaining: 14.7s
651:	learn: 50.7342156	total: 27.4s	remaining: 14.6s
652:	learn: 50.7339929	total: 27.4s	remaining: 14.6s
653:	learn: 50.7339856	total: 27.4s	remaining: 14.5s
654:	learn: 50.7336860	total: 27.5s	remaining: 14.5s
655:	learn: 50.7335736	total: 27.5s	remaining: 14.4s
6

799:	learn: 50.7220819	total: 34.1s	remaining: 8.52s
800:	learn: 50.7219865	total: 34.1s	remaining: 8.48s
801:	learn: 50.7214408	total: 34.2s	remaining: 8.44s
802:	learn: 50.7214034	total: 34.2s	remaining: 8.39s
803:	learn: 50.7213998	total: 34.3s	remaining: 8.36s
804:	learn: 50.7213900	total: 34.3s	remaining: 8.32s
805:	learn: 50.7213574	total: 34.4s	remaining: 8.28s
806:	learn: 50.7213521	total: 34.5s	remaining: 8.24s
807:	learn: 50.7212457	total: 34.5s	remaining: 8.2s
808:	learn: 50.7212411	total: 34.6s	remaining: 8.16s
809:	learn: 50.7212283	total: 34.6s	remaining: 8.12s
810:	learn: 50.7212250	total: 34.7s	remaining: 8.08s
811:	learn: 50.7212217	total: 34.7s	remaining: 8.04s
812:	learn: 50.7212092	total: 34.8s	remaining: 8s
813:	learn: 50.7211970	total: 34.8s	remaining: 7.96s
814:	learn: 50.7211064	total: 34.9s	remaining: 7.91s
815:	learn: 50.7211020	total: 34.9s	remaining: 7.88s
816:	learn: 50.7210902	total: 35s	remaining: 7.85s
817:	learn: 50.7210400	total: 35.1s	remaining: 7.81s

958:	learn: 50.7159309	total: 41.1s	remaining: 1.76s
959:	learn: 50.7159294	total: 41.1s	remaining: 1.71s
960:	learn: 50.7159265	total: 41.1s	remaining: 1.67s
961:	learn: 50.7159201	total: 41.2s	remaining: 1.63s
962:	learn: 50.7159137	total: 41.2s	remaining: 1.58s
963:	learn: 50.7158859	total: 41.2s	remaining: 1.54s
964:	learn: 50.7158832	total: 41.3s	remaining: 1.5s
965:	learn: 50.7158767	total: 41.4s	remaining: 1.46s
966:	learn: 50.7158741	total: 41.4s	remaining: 1.41s
967:	learn: 50.7158726	total: 41.4s	remaining: 1.37s
968:	learn: 50.7158711	total: 41.5s	remaining: 1.33s
969:	learn: 50.7157240	total: 41.5s	remaining: 1.28s
970:	learn: 50.7157214	total: 41.5s	remaining: 1.24s
971:	learn: 50.7157200	total: 41.6s	remaining: 1.2s
972:	learn: 50.7155469	total: 41.6s	remaining: 1.15s
973:	learn: 50.7155194	total: 41.7s	remaining: 1.11s
974:	learn: 50.7155180	total: 41.7s	remaining: 1.07s
975:	learn: 50.7155117	total: 41.7s	remaining: 1.03s
976:	learn: 50.7154847	total: 41.8s	remaining: 9

50.714543203500675