In [1]:
# Standard python libraries
import logging
import os
logging.basicConfig(format='[%(asctime)s] (%(levelname)s): %(message)s', level=logging.INFO)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# Installed libraries
from copy import deepcopy as copy
from collections import OrderedDict
import torch.nn as nn
import numpy as np
import pandas as pd
import joblib
import optuna
import yaml
import torch
from sklearn.metrics import log_loss, accuracy_score, roc_auc_score, f1_score
from sklearn.model_selection import train_test_split

from lightautoml.pipelines.features.torch_pipeline import TorchSimpleFeatures
from lightautoml.reader.base import PandasToPandasReader
from lightautoml.tasks import Task
from lightautoml.pipelines.ml.base import MLPipeline
from lightautoml.validation.np_iterators import FoldsIterator

from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau, CosineAnnealingLR

from lightautoml.ml_algo.dl_model import TorchModel
from lightautoml.ml_algo.torch_based.nn_models import DenseLightModel, DenseModel, ResNetModel, MLP, LinearLayer, SNN
from lightautoml.ml_algo.tuning.optuna import DLOptunaTuner
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML

import logging
from lightautoml.utils.logging import set_stdout_level
from lightautoml.utils.logging import verbosity_to_loglevel

from lightautoml.automl.presets.text_presets import TabularNLPAutoML
from lightautoml.addons.interpretation import LimeTextExplainer, L2XTextExplainer
from lightautoml.report import ReportDecoNLP

# Выключим предупреждения от HuggingFace
import transformers
transformers.logging.set_verbosity(50)

from pprint import pprint

logger = logging.getLogger()
level = verbosity_to_loglevel(1)
set_stdout_level(level)
logger.info(f"Stdout logging level is {logging._levelToName[level]}.")

%load_ext autoreload
%autoreload 2


[2022-11-12 15:22:15,386] (INFO): Stdout logging level is INFO.


#### Таски Tabular Preset

In [3]:
def get_data_multireg(n_targets=10):
    from sklearn.datasets import make_regression
    X, y = make_regression(n_targets=n_targets, random_state=1)
    y = y.reshape(y.shape[0], -1)
    
    RANDOM_STATE = 42
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2, random_state=RANDOM_STATE)

    train = pd.DataFrame(data=X_train, columns=['col_' + str(i) for i in range(X_train.shape[1])])
    test = pd.DataFrame(data=X_test, columns=['col_' + str(i) for i in range(X_test.shape[1])])

    test_cols = []
    for i in range(y.shape[1]):
        col = 'target_' + str(i)
        train[col] = y_train[:, i]
        test[col] = y_test[:, i]
        test_cols.append(col)

    return train, test, test_cols

def get_data_multiclass(n_classes=10, rs=1):
    from sklearn.datasets import make_classification
    X, y = make_classification(n_classes=n_classes, n_informative=30, n_features=100, random_state=rs)
    y = y.reshape(y.shape[0], -1)

    RANDOM_STATE = 42
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2, random_state=RANDOM_STATE)

    train = pd.DataFrame(data=X_train, columns=['col_' + str(i) for i in range(X_train.shape[1])])
    test = pd.DataFrame(data=X_test, columns=['col_' + str(i) for i in range(X_test.shape[1])])

    test_cols = []
    for i in range(y.shape[1]):
        col = 'target_' + str(i)
        train[col] = y_train[:, i]
        test[col] = y_test[:, i]
        test_cols.append(col)
    
    return train, test, test_cols


def get_data_reg():
    return get_data_multireg(1)

def get_data_binary(rs=10):
    return get_data_multiclass(2, rs=rs)


def get_data_multilabel(n_labels=10):
    X = []
    y = []
    for l in range(n_labels):
        train, test, _ = get_data_binary(rs=l)
        X = train.values[:, :-1]
        y.append(train.values[:, -1].flatten().tolist())
    
    y = np.array(y).T
    
    RANDOM_STATE = 42
    X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                        test_size=0.2, random_state=RANDOM_STATE)

    train = pd.DataFrame(data=X_train, columns=['col_' + str(i) for i in range(X_train.shape[1])])
    test = pd.DataFrame(data=X_test, columns=['col_' + str(i) for i in range(X_test.shape[1])])

    test_cols = []
    for i in range(y.shape[1]):
        col = 'target_' + str(i)
        train[col] = y_train[:, i]
        test[col] = y_test[:, i]
        test_cols.append(col)

    return train, test, test_cols


tasks_to_data = {
    "multilabel": get_data_multilabel,
    "multiclass": get_data_multiclass,
    "binary": get_data_binary,
    "reg": get_data_reg,
    "multi:reg": get_data_multireg,
}

res = {}
# def logloss(true, pred, sample_weight=None, **kwargs):
#     mask = sample_weight > 0
#     return (true[mask] == pred[mask]).mean()

for task in tasks_to_data:
    print("##########", task, "############")
    train, test, test_cols = tasks_to_data[task]()
    
    # train["weight"] = np.random.randn(len(train))
    roles = {'target': test_cols}
    task = Task(task)#, metric=logloss, greater_is_better=False,
                # loss="quantile", loss_params={"q": 0.1})
    
    automl = TabularAutoML(
        task=task,
        timeout=600,
        general_params={
            "use_algos": [
                [
                    "linear_layer",
                    "mlp",
                    "dense",
                    "denselight",
                    "resnet",
                    "snn"
                ],
            ],
            "nested_cv": False,
            "skip_conn": False,
        },
        reader_params={"cv": 2},
        nn_params={
            "n_epochs": 2, "bs": 16, "num_workers": 0, "path_to_save": None,
        }
    )
    
    oof = automl.fit_predict(train, roles)
    preds_te = automl.predict(test)
    
    res[task._name] = dict()
    res[task._name]["model"] = automl
    res[task._name]["oof"] = oof
    res[task._name]["preds_te"] = preds_te


[2022-11-12 15:24:46,891] (INFO2): CatBoost uses as obj. MultiCrossEntropy.
[2022-11-12 15:24:46,913] (INFO): Stdout logging level is ERROR.
[2022-11-12 15:24:46,915] (INFO): Task: multilabel

[2022-11-12 15:24:46,915] (INFO): Start automl preset with listed constraints:
[2022-11-12 15:24:46,915] (INFO): - time: 600.00 seconds
[2022-11-12 15:24:46,916] (INFO): - CPU: 4 cores
[2022-11-12 15:24:46,916] (INFO): - memory: 16 GB

[2022-11-12 15:24:46,917] (INFO): [1mTrain data shape: (64, 110)[0m



########## multilabel ############
multilabel isn`t supported in lgb


[2022-11-12 15:24:47,238] (INFO3): Feats was rejected during automatic roles guess: []
[2022-11-12 15:24:47,252] (INFO): Layer [1m1[0m train process start. Time left 599.66 secs
[2022-11-12 15:24:47,348] (INFO): Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN_linear_layer_0[0m ...
[2022-11-12 15:24:47,349] (DEBUG): Training params: {'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 256, 'bert_name': None, 'pooling': 'cls', 'device': device(type='cuda', index=0), 'use_cont': True, 'use_cat': True, 'use_text': True, 'lang': 'en', 'deterministic': True, 'multigpu': False, 'random_state': 42, 'model': 'linear_layer', 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 2, 'snap_params': {'k': 3, 'early_stopping': True, 'patience': 16, 'swa': True}, 'bs': 16, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': <class 'torch.optim.adam.Adam'>, 'opt_params': {'weight_decay': 0, 'lr': 0.0003}, 'sch': <class 'torch.optim.lr_

########## multiclass ############


[2022-11-12 15:24:51,955] (INFO3): Feats was rejected during automatic roles guess: []
[2022-11-12 15:24:51,969] (INFO): Layer [1m1[0m train process start. Time left 599.67 secs
[2022-11-12 15:24:52,065] (INFO): Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN_linear_layer_0[0m ...
[2022-11-12 15:24:52,066] (DEBUG): Training params: {'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 256, 'bert_name': None, 'pooling': 'cls', 'device': device(type='cuda', index=0), 'use_cont': True, 'use_cat': True, 'use_text': True, 'lang': 'en', 'deterministic': True, 'multigpu': False, 'random_state': 42, 'model': 'linear_layer', 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 2, 'snap_params': {'k': 3, 'early_stopping': True, 'patience': 16, 'swa': True}, 'bs': 16, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': <class 'torch.optim.adam.Adam'>, 'opt_params': {'weight_decay': 0, 'lr': 0.0003}, 'sch': <class 'torch.optim.lr_

########## binary ############


[2022-11-12 15:24:56,432] (INFO3): Feats was rejected during automatic roles guess: []
[2022-11-12 15:24:56,446] (INFO): Layer [1m1[0m train process start. Time left 599.72 secs
[2022-11-12 15:24:56,542] (INFO): Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN_linear_layer_0[0m ...
[2022-11-12 15:24:56,543] (DEBUG): Training params: {'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 256, 'bert_name': None, 'pooling': 'cls', 'device': device(type='cuda', index=0), 'use_cont': True, 'use_cat': True, 'use_text': True, 'lang': 'en', 'deterministic': True, 'multigpu': False, 'random_state': 42, 'model': 'linear_layer', 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 2, 'snap_params': {'k': 3, 'early_stopping': True, 'patience': 16, 'swa': True}, 'bs': 16, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': <class 'torch.optim.adam.Adam'>, 'opt_params': {'weight_decay': 0, 'lr': 0.0003}, 'sch': <class 'torch.optim.lr_

########## reg ############


[2022-11-12 15:25:01,604] (INFO3): Feats was rejected during automatic roles guess: []
[2022-11-12 15:25:01,618] (INFO): Layer [1m1[0m train process start. Time left 599.73 secs
[2022-11-12 15:25:01,716] (INFO): Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN_linear_layer_0[0m ...
[2022-11-12 15:25:01,717] (DEBUG): Training params: {'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 256, 'bert_name': None, 'pooling': 'cls', 'device': device(type='cuda', index=0), 'use_cont': True, 'use_cat': True, 'use_text': True, 'lang': 'en', 'deterministic': True, 'multigpu': False, 'random_state': 42, 'model': 'linear_layer', 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 2, 'snap_params': {'k': 3, 'early_stopping': True, 'patience': 16, 'swa': True}, 'bs': 16, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': <class 'torch.optim.adam.Adam'>, 'opt_params': {'weight_decay': 0, 'lr': 0.0003}, 'sch': <class 'torch.optim.lr_

########## multi:reg ############
multi:reg isn`t supported in lgb


[2022-11-12 15:25:05,958] (INFO3): Feats was rejected during automatic roles guess: []
[2022-11-12 15:25:05,971] (INFO): Layer [1m1[0m train process start. Time left 599.68 secs
[2022-11-12 15:25:06,067] (INFO): Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN_linear_layer_0[0m ...
[2022-11-12 15:25:06,068] (DEBUG): Training params: {'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 256, 'bert_name': None, 'pooling': 'cls', 'device': device(type='cuda', index=0), 'use_cont': True, 'use_cat': True, 'use_text': True, 'lang': 'en', 'deterministic': True, 'multigpu': False, 'random_state': 42, 'model': 'linear_layer', 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 2, 'snap_params': {'k': 3, 'early_stopping': True, 'patience': 16, 'swa': True}, 'bs': 16, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': <class 'torch.optim.adam.Adam'>, 'opt_params': {'weight_decay': 0, 'lr': 0.0003}, 'sch': <class 'torch.optim.lr_

#### Игра с параметрами Tabular Preset

In [4]:
class SimpleNet(nn.Module):
    def __init__(
        self,
        n_in,
        n_out,
        hidden_size=128,
        drop_rate=0.1,
        **kwargs,
    ):
        super(SimpleNet, self).__init__()
        self.features = nn.Sequential(OrderedDict([]))

        self.features.add_module("norm", nn.BatchNorm1d(n_in))
        self.features.add_module("dense1", nn.Linear(n_in, hidden_size))
        self.features.add_module("act", nn.SiLU())
        self.features.add_module("dropout", nn.Dropout(p=drop_rate))
        self.features.add_module("dense2", nn.Linear(hidden_size, n_out))

    def forward(self, x):
        for layer in self.features:
            x = layer(x)
        return x

def my_opt_space(trial: optuna.trial.Trial, estimated_n_trials, suggested_params):
    # optionally
    trial_values = copy(suggested_params)

    trial_values["bs"] = trial.suggest_categorical(
        "bs", [2 ** i for i in range(6, 11)]
    )
    trial_values["hidden_size"] = trial.suggest_categorical(
        "hidden_size", [2 ** i for i in range(6, 11)]
    )
    trial_values["drop_rate"] = trial.suggest_float(
        "drop_rate", 0.0, 0.3
    )
    return trial_values

In [9]:
configs = {
    "0": {
        "general_params": {
            "use_algos": [
                [
                    "nn",
                    "lgbm",
                ],
            ],
        },
        "nn_params": {
            "n_epochs": 2
        }
    },
    "1": {
        "general_params": {
            "use_algos": [
                [
                    "dense",
                    "lgbm",
                ],
            ],
        },
        "nn_params": {
           "n_epochs": 2,
        }
    },
    "2": {
        "general_params": {
            "use_algos": [
                [
                    "lgbm",
                    "mlp",
                    "dense",
                ],
            ],
        },
        "nn_params": {
            "0": {"n_epochs": 2},
            "1": {"n_epochs": 5}
        }
    },
    "3": {
        "general_params": {
            "use_algos": [
                [
                    "lgbm",
                    "mlp_tuned",
                    "dense",
                ],
            ],
        },
        "nn_params": {
            "0": {"n_epochs": 2},
            "1": {"n_epochs": 5},
            "tuning_params": {
            "max_tuning_iter": 5,
            "max_tuning_time": 3600,
            "fit_on_holdout": True,
        }
        },
    },
    "4": {
        "general_params": {
            "use_algos": [
                [
                    "mlp_tuned",
                    "dense_tuned",
                ],
            ],
        },
        "nn_params": {
            "0": {"n_epochs": 2},
            "1": {"n_epochs": 5},
            "tuning_params": {
            "max_tuning_iter": 5,
            "max_tuning_time": 3600,
            "fit_on_holdout": True,
        }
        },
    },
    "5": {
        "general_params": {
            "use_algos": [
                [
                    "lgbm",
                    MLP
                ],
            ],
        },
        "nn_params": {
            "tuned": True,
            "tuning_params": {
            "max_tuning_iter": 5,
            "max_tuning_time": 3600,
            "fit_on_holdout": True,
        }
        },
    },
    "6": {
        "general_params": {
            "use_algos": [
                [
                    # "lgb,
                    SimpleNet
                ],
            ],
        },
        "nn_params": {
            "tuned": True,
            "optimization_search_space": my_opt_space,
            "tuning_params": {
            "max_tuning_iter": 5,
            "max_tuning_time": 3600,
            "fit_on_holdout": True,
        }
        },
    },
}


In [10]:
for _id, config in configs.items():
    if _id != "5":
        continue
    
    print("\n\n\n\n############ CONFIG ############")
    pprint(config)
    print("################################")
    
    _config = config
    _config["nn_params"] = {**_config["nn_params"], "path_to_save": None, "verbose": None,}
    automl = TabularAutoML(
        debug=True,
        task=task,
        timeout=600,
        reader_params={"cv": 2},
        **_config
    )

    oof_pred = automl.fit_predict(train, roles=roles)
    test_pred = automl.predict(test)

[2022-11-12 15:28:12,213] (INFO): Stdout logging level is ERROR.
[2022-11-12 15:28:12,215] (INFO): Task: binary

[2022-11-12 15:28:12,215] (INFO): Start automl preset with listed constraints:
[2022-11-12 15:28:12,216] (INFO): - time: 600.00 seconds
[2022-11-12 15:28:12,216] (INFO): - CPU: 4 cores
[2022-11-12 15:28:12,216] (INFO): - memory: 16 GB

[2022-11-12 15:28:12,217] (INFO): [1mTrain data shape: (700, 4)[0m

[2022-11-12 15:28:12,262] (INFO3): Feats was rejected during automatic roles guess: []
[2022-11-12 15:28:12,265] (INFO): Layer [1m1[0m train process start. Time left 599.95 secs
[2022-11-12 15:28:12,272] (INFO): Start hyperparameters optimization for [1mLvl_0_Pipe_0_Mod_0_Tuned_TorchNN_0[0m ... Time budget is 314.97 secs
[2022-11-12 15:28:12,274] (INFO): A new study created in memory with name: no-name-45d48ece-c4fd-4850-90fe-262a70868da7
[2022-11-12 15:28:12,278] (DEBUG): number of text features: 0 
[2022-11-12 15:28:12,279] (DEBUG): number of categorical features: 0 
[





############ CONFIG ############
{'general_params': {'use_algos': [['lgbm',
                                   <class 'lightautoml.ml_algo.torch_based.nn_models.MLP'>]]},
 'nn_params': {'tuned': True,
               'tuning_params': {'fit_on_holdout': True,
                                 'max_tuning_iter': 5,
                                 'max_tuning_time': 3600}}}
################################


[2022-11-12 15:28:13,042] (INFO): Trial 0 finished with value: 0.5766060606060607 and parameters: {'bs': 128, 'weight_decay_bin': 0, 'lr': 0.029154431891537533}. Best is trial 0 with value: 0.5766060606060607.
[2022-11-12 15:28:13,046] (INFO3): [1mTrial 1[0m with hyperparameters {'bs': 128, 'weight_decay_bin': 0, 'lr': 0.029154431891537533} scored 0.5766060606060607 in 0:00:00.767212
[2022-11-12 15:28:13,051] (DEBUG): number of text features: 0 
[2022-11-12 15:28:13,051] (DEBUG): number of categorical features: 0 
[2022-11-12 15:28:13,051] (DEBUG): number of continuous features: 1 
[2022-11-12 15:28:13,528] (INFO): Trial 1 finished with value: 0.5976242424242424 and parameters: {'bs': 512, 'weight_decay_bin': 0, 'lr': 5.415244119402538e-05}. Best is trial 1 with value: 0.5976242424242424.
[2022-11-12 15:28:13,532] (INFO3): [1mTrial 2[0m with hyperparameters {'bs': 512, 'weight_decay_bin': 0, 'lr': 5.415244119402538e-05} scored 0.5976242424242424 in 0:00:00.480910
[2022-11-12 15:28:

#### NLP тесты

In [11]:
df = pd.read_csv("/home/user/MKuznetsov/Tabular_nn/multi_nn_merge/LightAutoML_13062022/nlp_test/LightAutoML/data/bankiru_new_sample100k.csv")
df = df.sample(1000)
train, test = train_test_split(df, test_size=300, random_state=42, stratify=df.is_good)

roles = {'target': 'is_good',
         'text': ['bank', 'message'],
        }


task = Task('binary')

automl = TabularNLPAutoML(task = task, 
                          timeout = 3600,
                          gpu_ids = '0',
                          general_params = {'use_algos': ['nn']},
                          nn_params = {'lang': 'ru', 'bert_name': "cointegrated/rubert-tiny",
                                       'pooling': 'mean', 
                                       'snap_params': { 'k': 1, 'early_stopping': True, 'patience': 1, 'swa': False},
                                       'n_epochs': 2,
                                       'num_workers': 0, "path_to_save": None,}, 
                          )

oof_pred = automl.fit_predict(train, roles=roles, verbose=1) 
not_nan = np.any(~np.isnan(oof_pred.data), axis=1)



[15:28:18] Stdout logging level is INFO.


[2022-11-12 15:28:18,568] (INFO): Stdout logging level is INFO.
[2022-11-12 15:28:18,570] (INFO3): Model language mode: ru


[15:28:18] Task: binary



[2022-11-12 15:28:18,570] (INFO): Task: binary



[15:28:18] Start automl preset with listed constraints:


[2022-11-12 15:28:18,571] (INFO): Start automl preset with listed constraints:


[15:28:18] - time: 3600.00 seconds


[2022-11-12 15:28:18,571] (INFO): - time: 3600.00 seconds


[15:28:18] - CPU: 4 cores


[2022-11-12 15:28:18,572] (INFO): - CPU: 4 cores


[15:28:18] - memory: 16 GB



[2022-11-12 15:28:18,573] (INFO): - memory: 16 GB



[15:28:18] [1mTrain data shape: (700, 4)[0m



[2022-11-12 15:28:18,574] (INFO): [1mTrain data shape: (700, 4)[0m

[2022-11-12 15:28:18,614] (INFO3): Feats was rejected during automatic roles guess: []


[15:28:18] Layer [1m1[0m train process start. Time left 3599.96 secs


[2022-11-12 15:28:18,617] (INFO): Layer [1m1[0m train process start. Time left 3599.96 secs


[15:28:18] Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN[0m ...


[2022-11-12 15:28:18,638] (INFO): Start fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN[0m ...
[2022-11-12 15:28:18,639] (DEBUG): Training params: {'num_workers': 0, 'pin_memory': False, 'max_length': 256, 'is_snap': False, 'input_bn': False, 'max_emb_size': 50, 'bert_name': 'cointegrated/rubert-tiny', 'pooling': 'mean', 'device': device(type='cuda', index=0), 'use_cont': True, 'use_cat': True, 'use_text': True, 'lang': 'ru', 'deterministic': False, 'multigpu': False, 'random_state': 42, 'model': '_linear_layer', 'path_to_save': None, 'verbose_inside': None, 'verbose': 1, 'n_epochs': 2, 'snap_params': {'k': 1, 'early_stopping': True, 'patience': 1, 'swa': False}, 'bs': 16, 'emb_dropout': 0.1, 'emb_ratio': 3, 'opt': <class 'torch.optim.adam.Adam'>, 'opt_params': {'lr': 1e-05}, 'sch': <class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, 'scheduler_params': {'patience': 5, 'factor': 0.5, 'verbose': True}, 'loss': None, 'loss_params': {}, 'loss_on_logits': True, 'clip_grad': False, 'clip_grad_par

[15:28:47] Fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN[0m finished. score = [1m0.7236848484848485[0m


[2022-11-12 15:28:47,044] (INFO): Fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN[0m finished. score = [1m0.7236848484848485[0m


[15:28:47] [1mLvl_0_Pipe_0_Mod_0_TorchNN[0m fitting and predicting completed


[2022-11-12 15:28:47,045] (INFO): [1mLvl_0_Pipe_0_Mod_0_TorchNN[0m fitting and predicting completed


[15:28:47] Time left 3571.53 secs



[2022-11-12 15:28:47,047] (INFO): Time left 3571.53 secs



[15:28:47] [1mLayer 1 training completed.[0m



[2022-11-12 15:28:47,048] (INFO): [1mLayer 1 training completed.[0m



[15:28:47] [1mAutoml preset training completed in 28.48 seconds[0m



[2022-11-12 15:28:47,049] (INFO): [1mAutoml preset training completed in 28.48 seconds[0m



[15:28:47] Model description:
Final prediction for new objects (level 0) = 
	 1.00000 * (3 averaged models Lvl_0_Pipe_0_Mod_0_TorchNN) 



[2022-11-12 15:28:47,050] (INFO): Model description:
Final prediction for new objects (level 0) = 
	 1.00000 * (3 averaged models Lvl_0_Pipe_0_Mod_0_TorchNN) 



#### Utilized таски

In [12]:
for id, config in configs.items():
    if int(id) > 3:
        continue
    
    print("\n\n\n\n############ CONFIG ############")
    pprint(config)
    print("################################")
    
    path = "/home/user/MKuznetsov/Tabular_nn/multi_nn_merge/LightAutoML_13062022/nlp_test/LightAutoML/temp/lama_master_preset/LightAutoML/lightautoml/automl/presets/tabular_configs/"
    with open(path + "conf_6_sel_type_1_tuning_full_no_int_lgbm.yml", "rb") as f:
        yml_config = yaml.full_load(f)
    
    for k, v in config.items():
        if k in yml_config:
            yml_config[k].update(v)
        else:
            yml_config[k] = v
    
    yml_config["nn_params"].update({"path_to_save": None})
    
    yml_config = {**yml_config, "nn_pipeline_params": {}}
    with open(path + "example.yaml", mode="wb") as file:
        yaml.dump(yml_config, file, encoding="utf-8")
    
    automl = TabularUtilizedAutoML(
        debug=True,
        task=task,
        timeout=600,
        reader_params={"cv": 2},
        configs_list=[path + "example.yaml"],
        **config
    )
    
    oof_pred = automl.fit_predict(train, roles=roles)
    test_pred = automl.predict(test)
    
    os.remove(path + "example.yaml")






############ CONFIG ############
{'general_params': {'use_algos': [['nn', 'lgbm']]},
 'nn_params': {'n_epochs': 2}}
################################


[2022-11-12 15:28:47,247] (INFO): Start automl [1mutilizator[0m with listed constraints:
[2022-11-12 15:28:47,248] (INFO): - time: 600.00 seconds
[2022-11-12 15:28:47,248] (INFO): - CPU: 4 cores
[2022-11-12 15:28:47,248] (INFO): - memory: 16 GB

[2022-11-12 15:28:47,249] (INFO): [1mIf one preset completes earlier, next preset configuration will be started[0m

[2022-11-12 15:28:47,251] (INFO): Start 0 automl preset configuration:
[2022-11-12 15:28:47,251] (INFO): [1mexample.yaml[0m, random state: {'reader_params': {'random_state': 42}, 'nn_params': {'default_params': {'random_state': 42}}, 'general_params': {'return_all_predictions': False}}
[2022-11-12 15:28:47,251] (INFO3): Found reader_params in kwargs, need to combine
[2022-11-12 15:28:47,252] (INFO3): Merged variant for reader_params = {'cv': 2, 'random_state': 42}
[2022-11-12 15:28:47,252] (INFO3): Found nn_params in kwargs, need to combine
[2022-11-12 15:28:47,252] (INFO3): Merged variant for nn_params = {'n_epochs': 2, 'pa





############ CONFIG ############
{'general_params': {'use_algos': [['dense', 'lgbm']]},
 'nn_params': {'n_epochs': 2}}
################################


[2022-11-12 15:28:51,735] (INFO3): Epoch: 0, train loss: 0.5195798277854919, val loss: 0.5195204615592957, val metric: 0.597769696969697
train (loss=0.517228): 100%|██████████| 1/1 [00:00<00:00, 19.46it/s]
val: 100%|██████████| 1/1 [00:00<00:00, 63.15it/s]
[2022-11-12 15:28:51,814] (INFO3): Epoch: 1, train loss: 0.5172278881072998, val loss: 0.5194492936134338, val metric: 0.5872484848484849
[2022-11-12 15:28:51,992] (INFO2): ===== Start working with [1mfold 1[0m for [1mLvl_0_Pipe_0_Mod_0_TorchNN_dense_0[0m =====
[2022-11-12 15:28:51,996] (DEBUG): number of text features: 0 
[2022-11-12 15:28:51,997] (DEBUG): number of categorical features: 0 
[2022-11-12 15:28:51,997] (DEBUG): number of continuous features: 1 
train (loss=0.51958): 100%|██████████| 1/1 [00:00<00:00, 18.45it/s]
val: 100%|██████████| 1/1 [00:00<00:00, 60.21it/s]
[2022-11-12 15:28:52,094] (INFO3): Epoch: 0, train loss: 0.5195798277854919, val loss: 0.5195225477218628, val metric: 0.5777696969696969
train (loss=0.5171





############ CONFIG ############
{'general_params': {'use_algos': [['lgbm', 'mlp', 'dense']]},
 'nn_params': {'0': {'n_epochs': 2}, '1': {'n_epochs': 5}}}
################################


[2022-11-12 15:28:57,445] (INFO2): ===== Start working with [1mfold 1[0m for [1mLvl_0_Pipe_0_Mod_0_TorchNN_mlp_0[0m =====
[2022-11-12 15:28:57,449] (DEBUG): number of text features: 0 
[2022-11-12 15:28:57,450] (DEBUG): number of categorical features: 0 
[2022-11-12 15:28:57,450] (DEBUG): number of continuous features: 1 
train (loss=0.51958): 100%|██████████| 1/1 [00:00<00:00, 51.28it/s]
val: 100%|██████████| 1/1 [00:00<00:00, 136.67it/s]
[2022-11-12 15:28:57,492] (INFO3): Epoch: 0, train loss: 0.5195798277854919, val loss: 0.5194867253303528, val metric: 0.5405575757575758
train (loss=0.51833): 100%|██████████| 1/1 [00:00<00:00, 58.29it/s]
val: 100%|██████████| 1/1 [00:00<00:00, 153.58it/s]
[2022-11-12 15:28:57,523] (INFO3): Epoch: 1, train loss: 0.5183302760124207, val loss: 0.5193183422088623, val metric: 0.5536969696969698
[2022-11-12 15:28:57,685] (INFO): Fitting [1mLvl_0_Pipe_0_Mod_0_TorchNN_mlp_0[0m finished. score = [1m0.5767515151515151[0m
[2022-11-12 15:28:57,686] (I





############ CONFIG ############
{'general_params': {'use_algos': [['lgbm', 'mlp_tuned', 'dense']]},
 'nn_params': {'0': {'n_epochs': 2},
               '1': {'n_epochs': 5},
               'tuning_params': {'fit_on_holdout': True,
                                 'max_tuning_iter': 5,
                                 'max_tuning_time': 3600}}}
################################


train (loss=3.62826): 100%|██████████| 3/3 [00:00<00:00, 90.39it/s]
val: 100%|██████████| 3/3 [00:00<00:00, 330.65it/s]
[2022-11-12 15:29:08,544] (INFO3): Epoch: 1, train loss: 3.6282565593719482, val loss: 3.1632421016693115, val metric: 0.5029090909090909
[2022-11-12 15:29:08,715] (INFO): Trial 0 finished with value: 0.5029090909090909 and parameters: {'bs': 128, 'weight_decay_bin': 0, 'lr': 0.029154431891537533}. Best is trial 0 with value: 0.5029090909090909.
[2022-11-12 15:29:08,719] (INFO3): [1mTrial 1[0m with hyperparameters {'bs': 128, 'weight_decay_bin': 0, 'lr': 0.029154431891537533} scored 0.5029090909090909 in 0:00:00.287404
[2022-11-12 15:29:08,723] (DEBUG): number of text features: 0 
[2022-11-12 15:29:08,724] (DEBUG): number of categorical features: 0 
[2022-11-12 15:29:08,724] (DEBUG): number of continuous features: 1 
train (loss=0.51958): 100%|██████████| 1/1 [00:00<00:00, 53.76it/s]
val: 100%|██████████| 1/1 [00:00<00:00, 157.21it/s]
[2022-11-12 15:29:08,764] (INFO