# Install

In [1]:
!pip install -U lightautoml



# Library

In [2]:
import pandas as pd
from lightautoml.tasks import Task
from lightautoml.automl.presets.tabular_presets import TabularAutoML

# Load

In [3]:
train_data = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
train_data.head()

Unnamed: 0,id,f0,f1,f2,f3,f4,f5,f6,f7,f8,...,f91,f92,f93,f94,f95,f96,f97,f98,f99,loss
0,0,-0.00235,59,0.766739,-1.35046,42.2727,16.6857,30.3599,1.2673,0.392007,...,-42.4399,26.854,1.45751,0.696161,0.941764,1.82847,0.92409,2.29658,10.4898,15
1,1,0.784462,145,-0.463845,-0.530421,27324.9,3.47545,160.498,0.828007,3.73586,...,-184.132,7.90137,1.70644,-0.494699,-2.0583,0.819184,0.439152,2.3647,1.14383,3
2,2,0.317816,19,-0.432571,-0.382644,1383.26,19.7129,31.1026,-0.515354,34.4308,...,7.43721,37.2181,3.25339,0.337934,0.615037,2.21676,0.745268,1.69679,12.3055,6
3,3,0.210753,17,-0.616454,0.946362,-119.253,4.08235,185.257,1.38331,-47.5214,...,9.66778,0.626942,1.49425,0.517513,-10.2221,2.62731,0.61727,1.45645,10.0288,2
4,4,0.439671,20,0.968126,-0.092546,74.302,12.3065,72.186,-0.233964,24.3991,...,290.657,15.6043,1.73557,-0.476668,1.39019,2.19574,0.826987,1.78485,7.07197,1


In [4]:
test_data = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')
test_data.head()

Unnamed: 0,id,f0,f1,f2,f3,f4,f5,f6,f7,f8,...,f90,f91,f92,f93,f94,f95,f96,f97,f98,f99
0,250000,0.812665,15,-1.23912,-0.893251,295.577,15.8712,23.0436,0.942256,29.898,...,0.446389,-422.332,-1.4463,1.69075,1.0593,-3.01057,1.94664,0.52947,1.38695,8.78767
1,250001,0.190344,131,-0.501361,0.801921,64.8866,3.09703,344.805,0.807194,38.4219,...,0.377179,10352.2,21.0627,1.84351,0.251895,4.44057,1.90309,0.248534,0.863881,11.7939
2,250002,0.919671,19,-0.057382,0.901419,11961.2,16.3965,273.24,-0.0033,37.94,...,0.99014,3224.02,-2.25287,1.551,-0.559157,17.8386,1.83385,0.931796,2.33687,9.054
3,250003,0.860985,19,-0.549509,0.471799,7501.6,2.80698,71.0817,0.792136,0.395235,...,1.39688,9689.76,14.7715,1.4139,0.329272,0.802437,2.23251,0.893348,1.35947,4.84833
4,250004,0.313229,89,0.588509,0.167705,2931.26,4.34986,1.57187,1.1183,7.75463,...,0.862502,2693.35,44.1805,1.5802,-0.191021,26.253,2.68238,0.361923,1.5328,3.7066


In [5]:
samp_sub = pd.read_csv('../input/tabular-playground-series-aug-2021/sample_submission.csv')
samp_sub.head()

Unnamed: 0,id,loss
0,250000,0
1,250001,0
2,250002,0
3,250003,0
4,250004,0


# Parameter

In [6]:
lgb_params = {
    'metric': 'RMSE',
    'lambda_l1': 1e-07, 
    'lambda_l2': 1e-07, 
    'num_leaves': 31, 
    'feature_fraction': 0.8, 
    'bagging_fraction': 0.8, 
    'bagging_freq': 3, 
    'min_child_samples': 19,
    'num_threads': 4
}

In [7]:
cb_params = {
    'num_trees': 1000, 
    'od_wait': 1200, 
    'learning_rate': 0.1, 
    'l2_leaf_reg': 64, 
    'subsample': 0.83, 
    'random_strength': 17.17, 
    'max_depth': 16, 
    'min_data_in_leaf': 20, 
    'leaf_estimation_iterations': 3,
    'loss_function': 'RMSE',
    'eval_metric': 'RMSE',
    'bootstrap_type': 'Bernoulli',
    'leaf_estimation_method': 'Newton',
    'random_seed': 42,
    "thread_count": 4
}

# Train

In [8]:
automl = TabularAutoML(task = Task('reg', ),
                       timeout = 3600,
                       cpu_limit = 4,
                       reader_params = {'n_jobs': 4, 'cv': 10, 'random_state': 42},
                       general_params = {'use_algos': [['lgb', 'cb']]},
                       lgb_params = {'default_params': lgb_params, 'freeze_defaults': True},
                       cb_params = {'default_params': cb_params, 'freeze_defaults': True},
                       verbose = 2
                      )

pred = automl.fit_predict(train_data, roles = {'target': 'loss', 'drop': ['id']})

Copying TaskTimer may affect the parent PipelineTimer, so copy will create new unlimited TaskTimer


Start automl preset with listed constraints:
- time: 3600 seconds
- cpus: 4 cores
- memory: 16 gb

Train data shape: (250000, 102)
Feats was rejected during automatic roles guess: []


Layer 1 ...
Train process start. Time left 3555.799206018448 secs
Start fitting Selector_LightGBM ...

===== Start working with fold 0 for Selector_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's l2: 62.5172
[200]	valid's l2: 62.2844
[300]	valid's l2: 62.255
[400]	valid's l2: 62.2481
Early stopping, best iteration is:
[327]	valid's l2: 62.2324
Selector_LightGBM fitting and predicting completed
Start fitting Lvl_0_Pipe_0_Mod_0_LightGBM ...

===== Start working with fold 0 for Lvl_0_Pipe_0_Mod_0_LightGBM =====

Training until validation scores don't improve for 100 rounds
[100]	valid's l2: 62.5233
[200]	valid's l2: 62.2446
[300]	valid's l2: 62.1996
[400]	valid's l2: 62.199
Early stopping, best iteration is:
[349]	valid's l2: 62.1746

===== Start working with fold

Time limit exceeded after calculating fold 6


Lvl_0_Pipe_0_Mod_1_CatBoost fitting and predicting completed
Time left -4.289296388626099


Time limit exceeded. Last level models will be blended and unused pipelines will be pruned.


Blending: Optimization starts with equal weights and score -61.84571189533628
Blending, iter 0: score = -61.663040741569525, weights = [1. 0.]
Blending, iter 1: score = -61.663040741569525, weights = [1. 0.]
No score update. Terminated

Automl preset training completed in 3604.51 seconds.


# Predict

In [9]:
test_pred = automl.predict(test_data)

# Submission

In [10]:
samp_sub['loss'] = test_pred.data[:, 0]
samp_sub.to_csv('submission.csv', index = False)