# Step 0.1. Import necessary libraries 

In [1]:
# Standard python libraries
import logging
import os
import time
logging.basicConfig(format='[%(asctime)s] (%(levelname)s): %(message)s', level=logging.INFO)

# Installed libraries
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import torch

# Imports from our package
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.dataset.roles import DatetimeRole
from lightautoml.tasks import Task
from lightautoml.utils.profiler import Profiler

# Step 0.2. Parameters 

In [2]:
N_THREADS = 8 # threads cnt for lgbm and linear models
N_FOLDS = 5 # folds cnt for AutoML
RANDOM_STATE = 42 # fixed random state for various reasons
TEST_SIZE = 0.2 # Test size for metric check
TIMEOUT = 300 # Time in seconds for automl run
TARGET_NAME = 'TARGET' # Target column name

# Step 0.3. Fix torch number of threads and numpy seed 

In [3]:
np.random.seed(RANDOM_STATE)
torch.set_num_threads(N_THREADS)

# Step 0.4. Change profiling decorators settings 

By default, profiling decorators are turned off for speed and memory reduction. If you want to see profiling report after using LAMA, you need to turn on the decorators using command below: 

In [4]:
p = Profiler()
p.change_deco_settings({'enabled': True})

ALL_FUNCS len = 418


# Step 0.5. Example data load 

In [5]:
%%time

data = pd.read_csv('./example_data/test_data_files/sampled_app_train.csv')
data.head()

CPU times: user 80.8 ms, sys: 4.53 ms, total: 85.4 ms
Wall time: 84.7 ms


Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR
0,313802,0,Cash loans,M,N,Y,0,270000.0,327024.0,15372.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
1,319656,0,Cash loans,F,N,N,0,108000.0,675000.0,19737.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
2,207678,0,Revolving loans,F,Y,Y,2,112500.0,270000.0,13500.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0
3,381593,0,Cash loans,F,N,N,1,67500.0,142200.0,9630.0,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,4.0
4,258153,0,Cash loans,F,Y,Y,0,337500.0,1483231.5,46570.5,...,0,0,0,0,0.0,0.0,0.0,2.0,0.0,0.0


# Step 0.6. (Optional) Some user feature preparation 

Cell below shows some user feature preparations to create task more difficult (this block can be omitted if you don't want to change the initial data):

In [6]:
%%time

data['BIRTH_DATE'] = (np.datetime64('2018-01-01') + data['DAYS_BIRTH'].astype(np.dtype('timedelta64[D]'))).astype(str)
data['EMP_DATE'] = (np.datetime64('2018-01-01') + np.clip(data['DAYS_EMPLOYED'], None, 0).astype(np.dtype('timedelta64[D]'))
                    ).astype(str)

data['constant'] = 1
data['allnan'] = np.nan

data['report_dt'] = np.datetime64('2018-01-01')

data.drop(['DAYS_BIRTH', 'DAYS_EMPLOYED'], axis=1, inplace=True)

[2020-11-11 22:33:41,141] (INFO): Note: detected 96 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.
[2020-11-11 22:33:41,142] (INFO): Note: NumExpr detected 96 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
[2020-11-11 22:33:41,142] (INFO): NumExpr defaulting to 8 threads.


CPU times: user 112 ms, sys: 3.61 ms, total: 116 ms
Wall time: 114 ms


# Step 0.7. (Optional) Data splitting for train-test 

Block below can be omitted if you are going to train model only or you have specific train and test files:

In [7]:
%%time

train_data, test_data = train_test_split(data, 
                                         test_size=TEST_SIZE, 
                                         stratify=data[TARGET_NAME], 
                                         random_state=RANDOM_STATE)
logging.info('Data splitted. Parts sizes: train_data = {}, test_data = {}'
              .format(train_data.shape, test_data.shape))

[2020-11-11 22:33:41,220] (INFO): Data splitted. Parts sizes: train_data = (8000, 125), test_data = (2000, 125)


CPU times: user 15.4 ms, sys: 0 ns, total: 15.4 ms
Wall time: 14.1 ms


In [8]:
train_data.head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,...,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,BIRTH_DATE,EMP_DATE,constant,allnan,report_dt
6444,112261,0,Cash loans,F,N,N,1,90000.0,640080.0,31261.5,...,0.0,0.0,0.0,1.0,0.0,1985-06-28,2012-06-21,1,,2018-01-01
3586,115058,0,Cash loans,F,N,Y,0,180000.0,239850.0,23850.0,...,0.0,0.0,0.0,0.0,3.0,1953-12-27,2018-01-01,1,,2018-01-01
9349,326623,0,Cash loans,F,N,Y,0,112500.0,337500.0,31086.0,...,0.0,0.0,0.0,0.0,2.0,1975-06-21,2016-06-17,1,,2018-01-01
7734,191976,0,Cash loans,M,Y,Y,1,67500.0,135000.0,9018.0,...,,,,,,1988-04-27,2009-06-05,1,,2018-01-01
2174,281519,0,Revolving loans,F,N,Y,0,67500.0,202500.0,10125.0,...,0.0,0.0,0.0,0.0,2.0,1975-06-13,1997-01-22,1,,2018-01-01


# ========= AutoML preset usage =========


## Step 1. Create Task

In [9]:
%%time

task = Task('binary', )

CPU times: user 1.66 ms, sys: 2.46 ms, total: 4.13 ms
Wall time: 3.74 ms


## Step 2. Setup columns roles

Roles setup here set target column and base date, which is used to calculate date differences:

In [10]:
%%time

roles = {'target': TARGET_NAME,
         DatetimeRole(base_date=True, seasonality=(), base_feats=False): 'report_dt',
         }

CPU times: user 3 µs, sys: 191 µs, total: 194 µs
Wall time: 197 µs


## Step 3. Create AutoML from preset

To create AutoML model here we use `TabularAutoML` preset, which looks like:

![TabularAutoML preset pipeline](imgs/tutorial_2_pipeline.png)

All params we set above can be send inside preset to change its configuration:

In [11]:
%%time 

automl = TabularAutoML(task = task, 
                       timeout = TIMEOUT,
                       general_params = {'nested_cv': False, 'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params = {'cv': N_FOLDS, 'random_state': RANDOM_STATE},
                       tuning_params = {'max_tuning_iter': 20, 'max_tuning_time': 30},
                       lgb_params = {'default_params': {'n_threads': N_THREADS}})
oof_pred = automl.fit_predict(train_data, roles = roles)
logging.info('oof_pred:\n{}\nShape = {}'.format(oof_pred, oof_pred.shape))


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Feats was rejected during automatic roles guess: []
Train process start. Time left 295.983624458313 secs
C = 1e-05 score = 0.6871107761224348
C = 5e-05 score = 0.7049836693020297
C = 0.0001 score = 0.7203521657552481
C = 0.0005 score = 0.7541307845684259
C = 0.001 score = 0.7616252652736126
C = 0.005 score = 0.7634160292081617
C = 0.01 score = 0.7601766174340223
C = 0.05 score = 0.7482025541104714
C = 1e-05 score = 0.7097380264945652
C = 5e-05 score = 0.7196469514266305
C = 0.0001 score = 0.726806640625
C = 0.0005 score = 0.7401706861413043
C = 0.001 score = 0.7423785665760868
C = 0.005 score = 0.7421981148097827
C = 0.01 score = 0.7413011633831522
C = 1e-05 score = 0.6369283924932064
C = 5e-05 score = 0.6511761209239131
C = 0.0001 score = 0.6631337041440217
C = 0.0005 score = 0.6919688349184784
C = 0.001 score = 0.7008268936820652
C = 0.005 score = 0.7106243631114132
C = 0.01 score = 0.7092550526494564
C = 0.05 score = 0.7000944718070651
C = 1e-05 score = 0.6700306768002717
C = 5e-05 


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer

[32m[I 2020-11-11 22:34:06,599][0m A new study created in memory with name: no-name-0ef90daa-9fe7-4ddb-8544-8f6d4196180c[0m


Early stopping, best iteration is:
[366]	valid's auc: 0.759569
Time history [2.13704252243042, 2.0840117931365967, 3.077803134918213, 2.9917008876800537, 3.772228956222534, 2.5856049060821533]. Time left 50.28570532798767
Optuna may run 225.87924909591675 secs
Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.76417
[200]	valid's auc: 0.76039
[300]	valid's auc: 0.759968


[32m[I 2020-11-11 22:34:09,867][0m Trial 0 finished with value: 0.7660460466881558 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 108}. Best is trial 0 with value: 0.7660460466881558.[0m


Early stopping, best iteration is:
[155]	valid's auc: 0.766046
Time history [3.2605254650115967]. Time left 8999999996.737516
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.760797
[200]	valid's auc: 0.756488


[32m[I 2020-11-11 22:34:12,186][0m Trial 1 finished with value: 0.7636191606395433 and parameters: {'feature_fraction': 0.5917173949330818, 'num_leaves': 87}. Best is trial 0 with value: 0.7660460466881558.[0m


[300]	valid's auc: 0.759589
Early stopping, best iteration is:
[116]	valid's auc: 0.763619
Time history [2.3080246448516846]. Time left 8999999997.689964
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.747524
[200]	valid's auc: 0.745257


[32m[I 2020-11-11 22:34:14,307][0m Trial 2 finished with value: 0.7482880831342111 and parameters: {'feature_fraction': 0.7993292420985183, 'num_leaves': 118}. Best is trial 0 with value: 0.7660460466881558.[0m


Early stopping, best iteration is:
[42]	valid's auc: 0.748288
Time history [2.1121344566345215]. Time left 8999999997.886015
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.755334
[200]	valid's auc: 0.750464


[32m[I 2020-11-11 22:34:16,525][0m Trial 3 finished with value: 0.7598238102110964 and parameters: {'feature_fraction': 0.7229163764267956, 'num_leaves': 230}. Best is trial 0 with value: 0.7660460466881558.[0m


Early stopping, best iteration is:
[39]	valid's auc: 0.759824
Time history [2.209660530090332]. Time left 8999999997.788353
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.740035
[200]	valid's auc: 0.742253
[300]	valid's auc: 0.747395
[400]	valid's auc: 0.749993
[500]	valid's auc: 0.74886


[32m[I 2020-11-11 22:34:21,793][0m Trial 4 finished with value: 0.7507630792586771 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 103}. Best is trial 0 with value: 0.7660460466881558.[0m


Early stopping, best iteration is:
[382]	valid's auc: 0.750763
Time history [5.257806062698364]. Time left 8999999994.740194
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.764474
[200]	valid's auc: 0.758327


[32m[I 2020-11-11 22:34:24,585][0m Trial 5 finished with value: 0.766142266839863 and parameters: {'feature_fraction': 0.6668543055695109, 'num_leaves': 119}. Best is trial 5 with value: 0.766142266839863.[0m


[300]	valid's auc: 0.761673
Early stopping, best iteration is:
[118]	valid's auc: 0.766142
Time history [2.781802177429199]. Time left 8999999997.216267



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.750036
[200]	valid's auc: 0.752329
[300]	valid's auc: 0.75211
[400]	valid's auc: 0.753687
[500]	valid's auc: 0.754927
[600]	valid's auc: 0.75303


[32m[I 2020-11-11 22:34:31,526][0m Trial 6 finished with value: 0.7557718727114304 and parameters: {'feature_fraction': 0.8540362888980227, 'num_leaves': 165}. Best is trial 5 with value: 0.766142266839863.[0m


[700]	valid's auc: 0.753185
Early stopping, best iteration is:
[509]	valid's auc: 0.755772
Time history [6.930838108062744]. Time left 8999999993.067072



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.740035
[200]	valid's auc: 0.742253
[300]	valid's auc: 0.747395
[400]	valid's auc: 0.749993
[500]	valid's auc: 0.74886


[32m[I 2020-11-11 22:34:36,684][0m Trial 7 finished with value: 0.7507630792586771 and parameters: {'feature_fraction': 0.5282057895135501, 'num_leaves': 103}. Best is trial 5 with value: 0.766142266839863.[0m


Early stopping, best iteration is:
[382]	valid's auc: 0.750763
Time history [5.148114204406738]. Time left 8999999994.84999
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.737469
Early stopping, best iteration is:
[23]	valid's auc: 0.741045
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.725087
[200]	valid's auc: 0.735203
Early stopping, best iteration is:
[182]	valid's auc: 0.737161
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.718368
[200]	valid's auc: 0.719291
[300]	valid's auc: 0.719408
Early stopping, best iteration is:
[212]	valid's auc: 0.721218
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.734911
[200]	valid's auc: 0.739258
Early stopping, best iteration is:
[194]	valid's auc: 0.739842
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.746269
[200]	valid's auc: 0.748355
Early stopping, best iteration is:


[2020-11-11 22:34:47,094] (INFO): oof_pred:
array([[0.02656044],
       [0.02681707],
       [0.03005865],
       ...,
       [0.02124858],
       [0.164036  ],
       [0.09136057]], dtype=float32)
Shape = (8000, 1)


Blending, iter 2: score = 0.7513083907298949, weights = [0.34694588 0.6530542  0.        ]
No score update. Terminated
CPU times: user 7min 49s, sys: 3.89 s, total: 7min 53s
Wall time: 1min 5s


## Step 4. Predict to test data and check scores

In [12]:
%%time

test_pred = automl.predict(test_data)
logging.info('Prediction for test data:\n{}\nShape = {}'
              .format(test_pred, test_pred.shape))

logging.info('Check scores...')
logging.info('OOF score: {}'.format(roc_auc_score(train_data[TARGET_NAME].values, oof_pred.data[:, 0])))
logging.info('TEST score: {}'.format(roc_auc_score(test_data[TARGET_NAME].values, test_pred.data[:, 0])))

[2020-11-11 22:34:47,535] (INFO): Prediction for test data:
array([[0.06316119],
       [0.07426225],
       [0.02580068],
       ...,
       [0.05014627],
       [0.04235209],
       [0.2220479 ]], dtype=float32)
Shape = (2000, 1)
[2020-11-11 22:34:47,536] (INFO): Check scores...
[2020-11-11 22:34:47,540] (INFO): OOF score: 0.7513083907298949
[2020-11-11 22:34:47,542] (INFO): TEST score: 0.731970108695652


CPU times: user 1.12 s, sys: 7.59 ms, total: 1.12 s
Wall time: 443 ms


## Step 5. Profiling AutoML 

To build report here, we **must** turn on decorators on step 0.4. Report is interactive and you can go as deep into functions call stack as you want:

In [13]:
%%time
p.profile('my_report_profile.html')
assert os.path.exists('my_report_profile.html'), 'Profile report failed to build'

FULL_STATS_DF shape = (8325, 6)
RUN_FNAME vc head:
PandasToPandasReader._is_ok_feature [111]    1
LAMLDataset._get_cols_idx [26]               1
NumpyDataset._get_2d [24]                    1
CatLinear.forward [349]                      1
CatLogisticRegression.forward [568]          1
Name: run_fname, dtype: int64
CONNECTED COMPONENTS cnt = 1
PATH LENS describe:
count    8326.000000
mean        9.012251
std         2.779780
min         0.000000
25%         7.000000
50%         9.000000
75%        10.000000
max        21.000000
dtype: float64
CPU times: user 1.02 s, sys: 28.2 ms, total: 1.04 s
Wall time: 1.04 s


## Step 6. Create AutoML with time utilization 

Below we are going to create specific AutoML preset for TIMEOUT utilization (try to spend it as much as possible):

In [14]:
%%time 

automl = TabularUtilizedAutoML(task = task, 
                       timeout = TIMEOUT,
                       general_params = {'nested_cv': False, 'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params = {'cv': N_FOLDS, 'random_state': RANDOM_STATE},
                       tuning_params = {'max_tuning_iter': 20, 'max_tuning_time': 30},
                       lgb_params = {'default_params': {'n_threads': N_THREADS}})
oof_pred = automl.fit_predict(train_data, roles = roles)
logging.info('oof_pred:\n{}\nShape = {}'.format(oof_pred, oof_pred.shape))

CUR SETUP FOR RANDOM STATE: {'reader_params': {'random_state': 42}}
FOUND reader_params in kwargs, need to combine
MERGED VARIANT FOR reader_params = {'cv': 5, 'random_state': 42}




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Feats was rejected during automatic roles guess: []
Train process start. Time left 298.7156345844269 secs
C = 1e-05 score = 0.6871107761224348
C = 5e-05 score = 0.7049836693020297
C = 0.0001 score = 0.7231478957187378
C = 0.0005 score = 0.7541949313362306
C = 0.001 score = 0.7611922745909308
C = 0.005 score = 0.763330500184422
C = 0.01 score = 0.7603904399933715
C = 0.05 score = 0.7483094653901461
C = 1e-05 score = 0.7097380264945652
C = 5e-05 score = 0.7196469514266305
C = 0.0001 score = 0.726806640625
C = 0.0005 score = 0.7401706861413043
C = 0.001 score = 0.7423785665760868
C = 0.005 score = 0.7421025815217392
C = 0.01 score = 0.7413117781929349
C = 1e-05 score = 0.6368514351222825
C = 5e-05 score = 0.6511761209239131
C = 0.0001 score = 0.6631337041440217
C = 0.0005 score = 0.6919582201086957
C = 0.001 score = 0.7008322010869565
C = 0.005 score = 0.7106509001358695
C = 0.01 score = 0.7091542119565217
C = 0.05 score = 0.7000520125679347
C = 1e-05 score = 0.6700306768002717
C = 5e-05 


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer

[32m[I 2020-11-11 22:35:10,678][0m A new study created in memory with name: no-name-e3cd1756-317e-4f58-b7b3-8774a86024fd[0m


[600]	valid's auc: 0.755339
Early stopping, best iteration is:
[431]	valid's auc: 0.757653
Time history [2.1371512413024902, 3.0651638507843018, 3.2649903297424316, 5.082632064819336, 3.0418026447296143]. Time left 49.527692681550974
Optuna may run 222.53812839984894 secs
Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.754788
[200]	valid's auc: 0.755815
[300]	valid's auc: 0.758348
[400]	valid's auc: 0.759685
[500]	valid's auc: 0.757055


[32m[I 2020-11-11 22:35:16,537][0m Trial 0 finished with value: 0.7599307214907709 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 108}. Best is trial 0 with value: 0.7599307214907709.[0m


Early stopping, best iteration is:
[340]	valid's auc: 0.759931
Time history [5.85319972038269]. Time left 8999999994.145054
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.743327
[200]	valid's auc: 0.749229
[300]	valid's auc: 0.750255
[400]	valid's auc: 0.750164
[500]	valid's auc: 0.750523
[600]	valid's auc: 0.749411


[32m[I 2020-11-11 22:35:22,758][0m Trial 1 finished with value: 0.7522705283020885 and parameters: {'feature_fraction': 0.5917173949330818, 'num_leaves': 87}. Best is trial 0 with value: 0.7599307214907709.[0m


Early stopping, best iteration is:
[451]	valid's auc: 0.752271
Time history [6.21126389503479]. Time left 8999999993.786871
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.757921


[32m[I 2020-11-11 22:35:24,959][0m Trial 2 finished with value: 0.7636672707153969 and parameters: {'feature_fraction': 0.7993292420985183, 'num_leaves': 118}. Best is trial 2 with value: 0.7636672707153969.[0m


[200]	valid's auc: 0.754435
Early stopping, best iteration is:
[17]	valid's auc: 0.763667
Time history [2.192293643951416]. Time left 8999999997.805832



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.755927
[200]	valid's auc: 0.758632
[300]	valid's auc: 0.759835
[400]	valid's auc: 0.7624
[500]	valid's auc: 0.762892
[600]	valid's auc: 0.760235


[32m[I 2020-11-11 22:35:31,837][0m Trial 3 finished with value: 0.7643728851612489 and parameters: {'feature_fraction': 0.7229163764267956, 'num_leaves': 230}. Best is trial 3 with value: 0.7643728851612489.[0m


Early stopping, best iteration is:
[442]	valid's auc: 0.764373
Time history [6.86871862411499]. Time left 8999999993.12946
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.740184
[200]	valid's auc: 0.744883
[300]	valid's auc: 0.747834
[400]	valid's auc: 0.746663
[500]	valid's auc: 0.747117
[600]	valid's auc: 0.744455


[32m[I 2020-11-11 22:35:37,919][0m Trial 4 finished with value: 0.7485339790774626 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 103}. Best is trial 3 with value: 0.7643728851612489.[0m


Early stopping, best iteration is:
[446]	valid's auc: 0.748534
Time history [6.0731964111328125]. Time left 8999999993.924953
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.749956
[200]	valid's auc: 0.752949
[300]	valid's auc: 0.761315
[400]	valid's auc: 0.762812
[500]	valid's auc: 0.760717
[600]	valid's auc: 0.756836
Early stopping, best iteration is:
[412]	valid's auc: 0.763288


[32m[I 2020-11-11 22:35:44,289][0m Trial 5 finished with value: 0.7632877356725521 and parameters: {'feature_fraction': 0.6668543055695109, 'num_leaves': 119}. Best is trial 3 with value: 0.7643728851612489.[0m


Time history [6.360728025436401]. Time left 8999999993.637419
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.746193
Early stopping, best iteration is:
[45]	valid's auc: 0.761711
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.737209
[200]	valid's auc: 0.738525
[300]	valid's auc: 0.735559
Early stopping, best iteration is:
[210]	valid's auc: 0.739274
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.721892
[200]	valid's auc: 0.727433
[300]	valid's auc: 0.720915
Early stopping, best iteration is:
[224]	valid's auc: 0.728075
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.717943
Early stopping, best iteration is:
[27]	valid's auc: 0.722401
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.739852
Early stopping, best iteration is:
[37]	valid's auc: 0.751635
Time history [2.1371512413024902, 3.0651638507843018, 3.26499032


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Feats was rejected during automatic roles guess: []
Train process start. Time left 233.9503846168518 secs
C = 1e-05 score = 0.6671905319370719
C = 5e-05 score = 0.6836762512628894
C = 0.0001 score = 0.697820613563834
C = 0.0005 score = 0.7269646284031196
C = 0.001 score = 0.7344751458002576
C = 0.005 score = 0.7409700060404874
C = 0.01 score = 0.7415580180786974
C = 0.05 score = 0.7401414436230094
C = 0.1 score = 0.7384362087122002
C = 1e-05 score = 0.6909179687500001
C = 5e-05 score = 0.6978653617527174
C = 0.0001 score = 0.7026473335597827
C = 0.0005 score = 0.7156557829483695
C = 0.001 score = 0.7172267747961957
C = 0.005 score = 0.7088888417119564
C = 0.01 score = 0.702233355978261
C = 1e-05 score = 0.6663526452105979
C = 5e-05 score = 0.6762217646059783
C = 0.0001 score = 0.6844800866168479
C = 0.0005 score = 0.7055292544157608
C = 0.001 score = 0.7126252547554348
C = 0.005 score = 0.7175823709239131
C = 0.01 score = 0.7172320822010869
C = 0.05 score = 0.7133364470108696
C = 1e-05


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer

[32m[I 2020-11-11 22:36:17,169][0m A new study created in memory with name: no-name-9cc350a4-e9c1-447e-924c-ab1f26ab23c4[0m


Early stopping, best iteration is:
[364]	valid's auc: 0.746147
Time history [2.3198904991149902, 3.871607780456543, 2.3687968254089355, 4.4430153369903564, 2.242335796356201, 2.824998140335083]. Time left 35.15898485779762
Optuna may run 165.73995239734649 secs
Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.726516
[200]	valid's auc: 0.732214
[300]	valid's auc: 0.73757
[400]	valid's auc: 0.737586
[500]	valid's auc: 0.738495


[32m[I 2020-11-11 22:36:22,632][0m Trial 0 finished with value: 0.7399543488835789 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 108}. Best is trial 0 with value: 0.7399543488835789.[0m


Early stopping, best iteration is:
[325]	valid's auc: 0.739954
Time history [5.456510305404663]. Time left 8999999994.54176
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.723661
[200]	valid's auc: 0.728991
[300]	valid's auc: 0.733219
[400]	valid's auc: 0.737463
[500]	valid's auc: 0.739142
[600]	valid's auc: 0.739516
[700]	valid's auc: 0.739158
[800]	valid's auc: 0.739938
[900]	valid's auc: 0.739259
[1000]	valid's auc: 0.738335


[32m[I 2020-11-11 22:36:32,001][0m Trial 1 finished with value: 0.7406920367133334 and parameters: {'feature_fraction': 0.5917173949330818, 'num_leaves': 87}. Best is trial 1 with value: 0.7406920367133334.[0m


Early stopping, best iteration is:
[839]	valid's auc: 0.740692
Time history [9.359676837921143]. Time left 8999999990.638464
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.732877
[200]	valid's auc: 0.733791
[300]	valid's auc: 0.736619
[400]	valid's auc: 0.740067
[500]	valid's auc: 0.738436
[600]	valid's auc: 0.738613
Early stopping, best iteration is:
[413]	valid's auc: 0.740366


[32m[I 2020-11-11 22:36:37,893][0m Trial 2 finished with value: 0.740365957310326 and parameters: {'feature_fraction': 0.7993292420985183, 'num_leaves': 118}. Best is trial 1 with value: 0.7406920367133334.[0m


Time history [5.883310317993164]. Time left 8999999994.114807
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.730968
[200]	valid's auc: 0.738581
[300]	valid's auc: 0.740296
[400]	valid's auc: 0.741152
[500]	valid's auc: 0.740435
[600]	valid's auc: 0.739906


[32m[I 2020-11-11 22:36:44,347][0m Trial 3 finished with value: 0.7428944090746293 and parameters: {'feature_fraction': 0.7229163764267956, 'num_leaves': 230}. Best is trial 3 with value: 0.7428944090746293.[0m


Early stopping, best iteration is:
[442]	valid's auc: 0.742894
Time history [6.444903373718262]. Time left 8999999993.55317
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.724035
[200]	valid's auc: 0.740179
[300]	valid's auc: 0.742371
[400]	valid's auc: 0.743563
[500]	valid's auc: 0.741777


[32m[I 2020-11-11 22:36:49,160][0m Trial 4 finished with value: 0.7445996439854387 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 103}. Best is trial 4 with value: 0.7445996439854387.[0m


Early stopping, best iteration is:
[322]	valid's auc: 0.7446
Time history [4.803943395614624]. Time left 8999999995.194206
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.723527
[200]	valid's auc: 0.731952
[300]	valid's auc: 0.730054
Early stopping, best iteration is:
[239]	valid's auc: 0.732722
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.711962
[200]	valid's auc: 0.716659
[300]	valid's auc: 0.721313
[400]	valid's auc: 0.720703
Early stopping, best iteration is:
[332]	valid's auc: 0.723845
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.705014
[200]	valid's auc: 0.708475
Early stopping, best iteration is:
[178]	valid's auc: 0.709234
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.748392
Early stopping, best iteration is:
[70]	valid's auc: 0.749034
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.718134
[200]	va


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



Train process start. Time left 168.346182346344 secs
C = 1e-05 score = 0.6836121044950847
C = 5e-05 score = 0.6982108397346461
C = 0.0001 score = 0.7110508844235611
C = 0.0005 score = 0.7335717454870077
C = 0.001 score = 0.7364690411661883
C = 0.005 score = 0.7343896167765179
C = 0.01 score = 0.7319199662160356
C = 1e-05 score = 0.6664136803668479
C = 5e-05 score = 0.6735521399456522
C = 0.0001 score = 0.6803084663722826
C = 0.0005 score = 0.6996486497961956
C = 0.001 score = 0.7061396059782609
C = 0.005 score = 0.7119034476902174
C = 0.01 score = 0.7093558933423912
C = 0.05 score = 0.6994045091711957
C = 1e-05 score = 0.6814177139945652
C = 5e-05 score = 0.6925473420516304
C = 0.0001 score = 0.7027853260869565
C = 0.0005 score = 0.7245350713315217
C = 0.001 score = 0.7302033797554348
C = 0.005 score = 0.7308243461277174
C = 0.01 score = 0.7284253991168478
C = 0.05 score = 0.7216372282608695
C = 1e-05 score = 0.6823040506114131
C = 5e-05 score = 0.6971170176630435
C = 0.0001 score = 0.


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer

[32m[I 2020-11-11 22:37:20,359][0m A new study created in memory with name: no-name-3295e54f-5913-42fa-a5e3-b7b823d4ce6b[0m


Early stopping, best iteration is:
[295]	valid's auc: 0.762823
Time history [3.136124849319458, 1.8027794361114502, 1.22336745262146, 4.711403846740723, 1.7692232131958008, 2.4265689849853516]. Time left 23.888627791404723
Optuna may run 107.85589895248413 secs
Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.725334
[200]	valid's auc: 0.733759
[300]	valid's auc: 0.732102


[32m[I 2020-11-11 22:37:24,157][0m Trial 0 finished with value: 0.7361322706352132 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 108}. Best is trial 0 with value: 0.7361322706352132.[0m


Early stopping, best iteration is:
[181]	valid's auc: 0.736132
Time history [3.7915778160095215]. Time left 8999999996.206453
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.733941
[200]	valid's auc: 0.738217
[300]	valid's auc: 0.743381
[400]	valid's auc: 0.743065
[500]	valid's auc: 0.741948


[32m[I 2020-11-11 22:37:29,149][0m Trial 1 finished with value: 0.7446317173693411 and parameters: {'feature_fraction': 0.5917173949330818, 'num_leaves': 87}. Best is trial 1 with value: 0.7446317173693411.[0m


Early stopping, best iteration is:
[363]	valid's auc: 0.744632
Time history [4.983178615570068]. Time left 8999999995.014942
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.719892
[200]	valid's auc: 0.731653
[300]	valid's auc: 0.733203
[400]	valid's auc: 0.736319
[500]	valid's auc: 0.736261
[600]	valid's auc: 0.737682
[700]	valid's auc: 0.737816
[800]	valid's auc: 0.737731
[900]	valid's auc: 0.737971


[32m[I 2020-11-11 22:37:38,550][0m Trial 2 finished with value: 0.7388638538308984 and parameters: {'feature_fraction': 0.7993292420985183, 'num_leaves': 118}. Best is trial 1 with value: 0.7446317173693411.[0m


Early stopping, best iteration is:
[745]	valid's auc: 0.738864
Time history [9.390758514404297]. Time left 8999999990.607367
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.718941
[200]	valid's auc: 0.7255
[300]	valid's auc: 0.72705
[400]	valid's auc: 0.727056
[500]	valid's auc: 0.729691
[600]	valid's auc: 0.731188
[700]	valid's auc: 0.731925
[800]	valid's auc: 0.732075


[32m[I 2020-11-11 22:37:47,045][0m Trial 3 finished with value: 0.7329356233729439 and parameters: {'feature_fraction': 0.7229163764267956, 'num_leaves': 230}. Best is trial 1 with value: 0.7446317173693411.[0m


Early stopping, best iteration is:
[654]	valid's auc: 0.732936
Time history [8.484450340270996]. Time left 8999999991.51349
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.736271
[200]	valid's auc: 0.738383
[300]	valid's auc: 0.741328
[400]	valid's auc: 0.740772
[500]	valid's auc: 0.740083


[32m[I 2020-11-11 22:37:51,832][0m Trial 4 finished with value: 0.7424988373398335 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 103}. Best is trial 1 with value: 0.7446317173693411.[0m


Early stopping, best iteration is:
[328]	valid's auc: 0.742499
Time history [4.77767276763916]. Time left 8999999995.220495
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.727638
[200]	valid's auc: 0.731973
Early stopping, best iteration is:
[167]	valid's auc: 0.735812
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.71486
[200]	valid's auc: 0.722646
[300]	valid's auc: 0.725172
Early stopping, best iteration is:
[290]	valid's auc: 0.726955
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.756592
Early stopping, best iteration is:
[93]	valid's auc: 0.758407
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.733542
Early stopping, best iteration is:
[73]	valid's auc: 0.7399
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.752383
Early stopping, best iteration is:
[72]	valid's auc: 0.755769
Time history [3.136124849319458, 


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Feats was rejected during automatic roles guess: []
Train process start. Time left 105.96959352493286 secs
C = 1e-05 score = 0.6498441768098744
C = 5e-05 score = 0.6692859930186934
C = 0.0001 score = 0.6859053514441042
C = 0.0005 score = 0.7249226229613355
C = 0.001 score = 0.7332991217238376
C = 0.005 score = 0.7341704486531851
C = 0.01 score = 0.7309844925188833
C = 0.05 score = 0.7234472473018266
C = 1e-05 score = 0.6956468665081521
C = 5e-05 score = 0.7101785411005436
C = 0.0001 score = 0.720841117527174
C = 0.0005 score = 0.7438699473505435
C = 0.001 score = 0.7493418817934783
C = 0.005 score = 0.7510190217391305
C = 0.01 score = 0.7486837635869565
C = 0.05 score = 0.7427341627038044
C = 1e-05 score = 0.651611328125
C = 5e-05 score = 0.6675760020380433
C = 0.0001 score = 0.679390285326087
C = 0.0005 score = 0.7099290930706521
C = 0.001 score = 0.7209525730298912
C = 0.005 score = 0.7329579228940217
C = 0.01 score = 0.7338814113451088
C = 0.05 score = 0.7285050101902173
C = 0.1 sco


Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer

[32m[I 2020-11-11 22:38:23,161][0m A new study created in memory with name: no-name-f9dcf806-68f3-4ef2-8bc1-ab4cfcad3cc2[0m


Early stopping, best iteration is:
[54]	valid's auc: 0.716975
Time history [2.823606252670288, 5.829875230789185, 1.2245566844940186, 2.152358293533325, 3.026540994644165, 1.2373046875]. Time left 8.570769983530045
Optuna may run 45.60778720378876 secs
Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.721555
[200]	valid's auc: 0.724105


[32m[I 2020-11-11 22:38:26,411][0m Trial 0 finished with value: 0.7277664630006789 and parameters: {'feature_fraction': 0.6872700594236812, 'num_leaves': 108}. Best is trial 0 with value: 0.7277664630006789.[0m


[300]	valid's auc: 0.725783
Early stopping, best iteration is:
[114]	valid's auc: 0.727766
Time history [3.2438108921051025]. Time left 8999999996.754473



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



Training until validation scores don't improve for 200 rounds
[100]	valid's auc: 0.735384
[200]	valid's auc: 0.735277
[300]	valid's auc: 0.737602
[400]	valid's auc: 0.73965
[500]	valid's auc: 0.74182
[600]	valid's auc: 0.739992


[32m[I 2020-11-11 22:38:33,090][0m Trial 1 finished with value: 0.7427714611030037 and parameters: {'feature_fraction': 0.5917173949330818, 'num_leaves': 87}. Best is trial 1 with value: 0.7427714611030037.[0m


Early stopping, best iteration is:
[487]	valid's auc: 0.742771
Time history [6.670206785202026]. Time left 8999999993.32798
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.708496
[200]	valid's auc: 0.718417
[300]	valid's auc: 0.720983
[400]	valid's auc: 0.72171
[500]	valid's auc: 0.723634
[600]	valid's auc: 0.726141
[700]	valid's auc: 0.727029
[800]	valid's auc: 0.725511


[32m[I 2020-11-11 22:38:42,064][0m Trial 2 finished with value: 0.7273869279578342 and parameters: {'feature_fraction': 0.7993292420985183, 'num_leaves': 118}. Best is trial 1 with value: 0.7427714611030037.[0m


Early stopping, best iteration is:
[691]	valid's auc: 0.727387
Time history [8.964248418807983]. Time left 8999999991.033913
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.714151
[200]	valid's auc: 0.719716
[300]	valid's auc: 0.721085


[32m[I 2020-11-11 22:38:45,434][0m Trial 3 finished with value: 0.7216832111871962 and parameters: {'feature_fraction': 0.7229163764267956, 'num_leaves': 230}. Best is trial 1 with value: 0.7427714611030037.[0m


Early stopping, best iteration is:
[140]	valid's auc: 0.721683
Time history [3.361238479614258]. Time left 8999999996.636917
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.731236
[200]	valid's auc: 0.734641
[300]	valid's auc: 0.732577


[32m[I 2020-11-11 22:38:48,937][0m Trial 4 finished with value: 0.7367416649293584 and parameters: {'feature_fraction': 0.5290418060840998, 'num_leaves': 103}. Best is trial 1 with value: 0.7427714611030037.[0m


Early stopping, best iteration is:
[168]	valid's auc: 0.736742
Time history [3.494029998779297]. Time left 8999999996.504158
Training until validation scores don't improve for 200 rounds



Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer



[100]	valid's auc: 0.717984
[200]	valid's auc: 0.720545
[300]	valid's auc: 0.723025
[400]	valid's auc: 0.729931
[500]	valid's auc: 0.728793
[600]	valid's auc: 0.729686
Early stopping, best iteration is:
[413]	valid's auc: 0.730445


[32m[I 2020-11-11 22:38:55,091][0m Trial 5 finished with value: 0.7304419177745349 and parameters: {'feature_fraction': 0.6668543055695109, 'num_leaves': 119}. Best is trial 1 with value: 0.7427714611030037.[0m


Time history [6.144635915756226]. Time left 8999999993.853521
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.731407
Early stopping, best iteration is:
[22]	valid's auc: 0.736378
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.761989
[200]	valid's auc: 0.754034
Early stopping, best iteration is:
[115]	valid's auc: 0.764426
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.731663
[200]	valid's auc: 0.736158
[300]	valid's auc: 0.738663
[400]	valid's auc: 0.73722
Early stopping, best iteration is:
[303]	valid's auc: 0.739693
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.725474
[200]	valid's auc: 0.723511
Early stopping, best iteration is:
[106]	valid's auc: 0.727316
Training until validation scores don't improve for 100 rounds
[100]	valid's auc: 0.706177
[200]	valid's auc: 0.707228
Early stopping, best iteration is:
[141]	valid's auc: 0.711288
Time hi

[2020-11-11 22:39:05,484] (INFO): oof_pred:
array([[0.03720199],
       [0.03239563],
       [0.03478435],
       ...,
       [0.03037913],
       [0.15554607],
       [0.09786935]], dtype=float32)
Shape = (8000, 1)


Blending, iter 4: score = 0.7575391518001122, weights = [0.31697652 0.12107207 0.56195146 0.        ]
No score update. Terminated
CPU times: user 32min 10s, sys: 11.4 s, total: 32min 21s
Wall time: 4min 16s


## Step 7. Predict to test data and check scores for utilized automl

In [15]:
%%time

test_pred = automl.predict(test_data)
logging.info('Prediction for test data:\n{}\nShape = {}'
              .format(test_pred, test_pred.shape))

logging.info('Check scores...')
logging.info('OOF score: {}'.format(roc_auc_score(train_data[TARGET_NAME].values, oof_pred.data[:, 0])))
logging.info('TEST score: {}'.format(roc_auc_score(test_data[TARGET_NAME].values, test_pred.data[:, 0])))

[2020-11-11 22:39:07,077] (INFO): Prediction for test data:
array([[0.0595268 ],
       [0.08256021],
       [0.03199889],
       ...,
       [0.04806679],
       [0.04075336],
       [0.20718434]], dtype=float32)
Shape = (2000, 1)
[2020-11-11 22:39:07,078] (INFO): Check scores...
[2020-11-11 22:39:07,081] (INFO): OOF score: 0.757617388431481
[2020-11-11 22:39:07,083] (INFO): TEST score: 0.7355808423913044


CPU times: user 3.98 s, sys: 240 ms, total: 4.22 s
Wall time: 1.59 s


## Step 8. Profiling utilized AutoML 

To build report here, we **must** turn on decorators on step 0.4. Report is interactive and you can go as deep into functions call stack as you want:

In [16]:
%%time
p.profile('my_report_profile.html')
assert os.path.exists('my_report_profile.html'), 'Profile report failed to build'

FULL_STATS_DF shape = (34506, 6)
RUN_FNAME vc head:
Loss._fw_func [53]                      1
LAMLDataset._initialize [738]           1
NumpyDataset._check_dtype [904]         1
CatLogisticRegression.forward [1368]    1
LAMLDataset.set_data [245]              1
Name: run_fname, dtype: int64
CONNECTED COMPONENTS cnt = 1
PATH LENS describe:
count    34507.000000
mean         9.721680
std          2.785337
min          0.000000
25%          8.000000
50%         10.000000
75%         11.000000
max         22.000000
dtype: float64
CPU times: user 1.66 s, sys: 100 ms, total: 1.76 s
Wall time: 1.75 s


# Appendix. Profiling report screenshots 

After loading HTML with profiling report, you can see fully folded report (please wait for green LOAD OK text for full load finish). If you click on triangle on the left, it unfolds and look like this:  

<img src="imgs/tutorial_2_initial_report.png" alt="Initial profiling report" style="width: 500px;"/>

If we go even deeper we will receive situation like this:

<img src="imgs/tutorial_2_unfolded_report.png" alt="Profiling report after several unfoldings on different levels" style="width: 600px;"/>
