Step 0.0. Install LightAutoML

In [None]:
!pip install -U lightautoml

Step 0.1. Import necessary libraries

In [None]:
# Standard python libraries
import os
import time
import re

# Installed libraries
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score
import torch

# Imports from our package
from lightautoml.automl.presets.tabular_presets import TabularAutoML, TabularUtilizedAutoML
from lightautoml.dataset.roles import DatetimeRole
from lightautoml.tasks import Task

Step 0.2. Parameters

In [None]:
N_THREADS = 4 # threads cnt for lgbm and linear models
N_FOLDS = 5 # folds cnt for AutoML
RANDOM_STATE = 42 # fixed random state for various reasons
TEST_SIZE = 0.2 # Test size for metric check
TIMEOUT = 5*3600 # Time in seconds for automl run

Step 0.3. Fix torch number of threads and numpy seed

In [None]:
np.random.seed(RANDOM_STATE)
torch.set_num_threads(N_THREADS)

Step 0.4. Data load

In [None]:
train_data = pd.read_csv("../input/tabular-playground-series-oct-2021/train.csv")

In [None]:
test = pd.read_csv("../input/tabular-playground-series-oct-2021/test.csv")

In [None]:
sample_submission = pd.read_csv("../input/tabular-playground-series-oct-2021/sample_submission.csv")

========= AutoML preset usage =========                              
Step 1. Create Task

In [None]:
task = Task('binary')

Step 2. Setup columns roles

In [None]:
roles = {
    'target': 'target',
    'drop': 'Id',
}

Step 3. Create AutoML from preset

In [None]:
%%time 

automl = TabularUtilizedAutoML(task = task, 
                       timeout = TIMEOUT,
                       cpu_limit = N_THREADS,
                       general_params={'use_algos': [['lgb', 'cb'], ['lgb_tuned'],]},
                       reader_params = {'n_jobs': N_THREADS})
oof_pred = automl.fit_predict(train_data, roles = roles)
print('oof_pred:\n{}\nShape = {}'.format(oof_pred[:10], oof_pred.shape))

Step 4. Predict to test data and check scores

In [None]:
%%time

test_pred = automl.predict(test)
print('Prediction for test data:\n{}\nShape = {}'.format(test_pred[:10], test_pred.shape))


Step 5. Generate submission

In [None]:
sample_submission['target'] = test_pred.data[:, 0]
sample_submission.head()

In [None]:
sample_submission.to_csv('TPS10_21_LightAutoML.csv', index = False)