In [10]:
import numpy as np
import os
import pandas as pd
import requests

from sklearn.model_selection import train_test_split
from tqdm import tqdm

from lightautoml.automl.presets.tabular_presets import TabularAutoML
from lightautoml.tasks import Task

from lightautoml.report.monitoring_deco import MonitoringDeco

In [11]:
N_THREADS = 8 # threads cnt for lgbm and linear models
N_FOLDS = 5 # folds cnt for AutoML
RANDOM_STATE = 42 # fixed random state for various reasons
TEST_SIZE = 0.9 # Test size for metric check
TIMEOUT = 120 # Time in seconds for automl run
TARGET_NAME = 'TARGET' # Target column name

In [13]:
DATASET_PATH = 'LightAutoML/examples/data/sampled_app_train.csv'
data = pd.read_csv(DATASET_PATH)

In [14]:
train_data, test_data = train_test_split(data, 
                                         test_size=TEST_SIZE, 
                                         stratify=data[TARGET_NAME], 
                                         random_state=RANDOM_STATE)

In [15]:
N = 10
test_data['N_EPOCH'] = np.linspace(0.51, N+0.5, test_data.shape[0]).round().astype(int)

n_epochs = test_data['N_EPOCH'].drop_duplicates()
test_data_split = [test_data[test_data['N_EPOCH'] == i] for i in n_epochs]

In [16]:
test_data['N_EPOCH'].value_counts()

2     901
3     901
4     901
6     901
7     901
8     901
9     901
10    901
5     900
1     892
Name: N_EPOCH, dtype: int64

# Создание MonitoringDeco

In [17]:
task = Task('binary')
automl = TabularAutoML(task=task, 
                       timeout=TIMEOUT,
                       general_params={'nested_cv': False, 'use_algos': [['linear_l2', 'lgb', 'lgb_tuned']]},
                       reader_params={'cv': N_FOLDS, 'random_state': RANDOM_STATE},
                       tuning_params={'max_tuning_iter': 20, 'max_tuning_time': 30},
                       lgb_params={'default_params': {'num_threads': N_THREADS}})

Оборачиваем модель в класс MonitoringDeco.

In [18]:
MD = MonitoringDeco(output_path='./lama_monitoring_2',
                    report_file_name='lama_monitoring_report.html',
                    null_threshold=0.01, feature_shift_threshold=0.05,
                    model_shift_threshold=0.02)

automl_md = MD(automl)

Удаляем несколько признаков, чтобы не мешали.

In [19]:
roles = {'target': TARGET_NAME,
         'drop': ['SK_ID_CURR',
                  'DAYS_EMPLOYED',
                  'AMT_INCOME_TOTAL',
                  'BASEMENTAREA_AVG',
                  'NONLIVINGAPARTMENTS_MEDI',
                  'BASEMENTAREA_AVG',
                  'ELEVATORS_AVG',
                  'ENTRANCES_AVG',
                  'LIVINGAPARTMENTS_AVG',
                  'LIVINGAREA_AVG',
                  'NONLIVINGAPARTMENTS_AVG',
                  'APARTMENTS_MODE',
                  'BASEMENTAREA_MODE',
                  'YEARS_BEGINEXPLUATATION_MODE',
                  'ELEVATORS_MODE',
                  'ENTRANCES_MODE',
                  'LANDAREA_MODE',
                  'LIVINGAPARTMENTS_MODE',
                  'NONLIVINGAPARTMENTS_MODE',
                  'APARTMENTS_MEDI',
                  'BASEMENTAREA_MEDI',
                  'ELEVATORS_MEDI',
                  'ENTRANCES_MEDI',
                  'LIVINGAPARTMENTS_MEDI',
                  'NONLIVINGAPARTMENTS_MEDI']
        }

Обучаем модель.

In [20]:
oof_pred = automl_md.fit_predict(train_data, roles=roles)

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
DEBUG:lightautoml.ml_algo.boost_lgbm:[343]	valid's auc: 0.692595
DEBUG:lightautoml.ml_algo.boost_lgbm:[344]	valid's auc: 0.692595
DEBUG:lightautoml.ml_algo.boost_lgbm:[345]	valid's auc: 0.692935
DEBUG:lightautoml.ml_algo.boost_lgbm:[346]	valid's auc: 0.692595
DEBUG:lightautoml.ml_algo.boost_lgbm:[347]	valid's auc: 0.692255
DEBUG:lightautoml.ml_algo.boost_lgbm:[348]	valid's auc: 0.692595
DEBUG:lightautoml.ml_algo.boost_lgbm:[349]	valid's auc: 0.692935
DEBUG:lightautoml.ml_algo.boost_lgbm:[350]	valid's auc: 0.691916
DEBUG:lightautoml.ml_algo.boost_lgbm:[351]	valid's auc: 0.690557
DEBUG:lightautoml.ml_algo.boost_lgbm:[352]	valid's auc: 0.690897
DEBUG:lightautoml.ml_algo.boost_lgbm:[353]	valid's auc: 0.690897
DEBUG:lightautoml.ml_algo.boost_lgbm:[354]	valid's auc: 0.691236
DEBUG:lightautoml.ml_algo.boost_lgbm:[355]	valid's auc: 0.691236
DEBUG:lightautoml.ml_algo.boost_lgbm:[356]	valid's auc: 0.691576
DEBUG:li

In [21]:
preds = automl_md._model.predict(test_data)

Считаем результаты для всех эпох, они сохраняются в `lama_monitoring_report.html`

In [22]:
for i in tqdm(range(N)):
    automl_md.update(test_data_split[i])

100%|██████████| 10/10 [02:55<00:00, 17.53s/it]


Сохраняем информацию об алертах.

In [23]:
automl_md.save_data_json("info.json")