# Load dependencies

In [None]:
pip install -U lightautoml

In [None]:
# Standard python libraries
import os
import time
import re

# Installed libraries
import numpy as np
import pandas as pd
from sklearn.metrics import log_loss
from sklearn.model_selection import train_test_split

# Imports from our package
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.tasks import Task
from lightautoml.dataset.roles import NumericRole

In [None]:
train=pd.read_csv('../input/tabular-playground-series-jun-2021/train.csv')
test=pd.read_csv('../input/tabular-playground-series-jun-2021/test.csv')
sub=pd.read_csv('../input/tabular-playground-series-jun-2021/sample_submission.csv')

In [None]:
conditions = [
    (train.target == "Class_1"), (train.target == "Class_2"), (train.target == "Class_3"),
    (train.target == "Class_4"), (train.target == "Class_5"), (train.target == "Class_6"),
    (train.target == "Class_7"), (train.target == "Class_8"), (train.target == "Class_9")
]
choices = [0, 1, 2, 3, 4, 5, 6, 7, 8]
train["target"] = np.select(conditions, choices)

In [None]:
#X_test = test.drop(['id'], axis=1)
#X = train.drop(['id', 'target'], axis=1)
#y = train.target

# LightAutoML

In [None]:
N_THREADS = 4 # threads cnt for lgbm and linear models
N_FOLDS = 5 # folds cnt for AutoML
RANDOM_STATE = 42 # fixed random state for various reasons
TEST_SIZE = 0.2 # Test size for metric check
TIMEOUT = 8 * 3600 # Time in seconds for automl run
TARGET_NAME = 'target'

task = Task('multiclass',)

roles = {
    'target': TARGET_NAME,
    'drop': ['id'],
}

In [None]:
%%time

automl = TabularUtilizedAutoML(task = task, 
                               timeout = 60*60*8,
                               cpu_limit = N_THREADS,
                               general_params = {
                                   'use_algos': [['lgb_tuned', 'cb_tuned'], ['lgb_tuned', 'cb_tuned']],
                                   'return_all_predictions': True,
                                   'weighted_blender_max_nonzero_coef': 0.0
                               },
                               #tuning_params = {'max_tuning_time': 60*5},
                               reader_params = {'n_jobs': N_THREADS},
                               #config_path = '../input/lightautoml-configs/conf_0_sel_type_0.yml'
                               )
oof_pred = automl.fit_predict(train, roles = roles)

In [None]:
test_pred = automl.predict(test)

In [None]:
#columns = ['pred_Linear_' + str(i) for i in range(1, 10)] + \
#          ['pred_LGBM_' + str(i) for i in range(1, 10)] + \
#          ['pred_CB_' + str(i) for i in range(1, 10)] 

In [None]:
#OOF_preds = pd.DataFrame(oof_pred.data, columns = columns)
#TEST_preds = pd.DataFrame(test_pred.data, columns = columns)

# Submission

In [None]:
sub.iloc[:, 1:] = test_pred.data
sub.to_csv('sub_lightautoml.csv', index = False)

pd.concat([train.id, pd.DataFrame(oof_pred.data, columns=['Class_' + str(i) for i in range(1, 10)] )], axis=1).\
to_csv('oof_lightautoml.csv', index = False)

In [None]:
#pred1 = TEST_preds.iloc[:, 0:9]
#pred2 = TEST_preds.iloc[:, 9:18]
#pred3 = TEST_preds.iloc[:, 18:27]
#
#preds = pd.DataFrame(np.mean([pred1.values, 
#                              pred2.values,
#                              pred3.values], axis=0), 
#                     columns=['Class_' + str(i) for i in range(1, 10)])
#
#sub.iloc[:, 1:] = preds
#
#sub.to_csv('sub_lightautoml.csv', index = False)

In [None]:
# Export oof
#pd.concat([train.id, OOF_preds], axis=1).to_csv('oof_lightautoml.csv', index = False)