In [1]:
%%capture
!pip install --upgrade autogluon ipywidgets

In [2]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from autogluon.tabular import TabularPredictor

In [3]:
DATA_DIR = '/kaggle/input/playground-series-s4e4'

train = pd.read_csv(f'{DATA_DIR}/train.csv')
test = pd.read_csv(f'{DATA_DIR}/test.csv')
sample_sub = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

In [4]:
train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

In [5]:
TARGET = 'Rings'
features = [f for f in test.columns]

In [6]:
train[TARGET] = np.log1p(train[TARGET])

In [7]:
time_limit = 60 * 60 * 10
metric = 'rmse'

predictor = TabularPredictor(
    label=TARGET, 
    eval_metric=metric,
    path='/kaggle/working/models')

predictor.fit(
    train,
    time_limit=time_limit,
    presets='best_quality')

Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Dynamic stacking is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
Detecting stacked overfitting by sub-fitting AutoGluon on the input data. That is, copies of AutoGluon will be sub-fit on subset(s) of the data. Then, the holdout validation data is used to detect stacked overfitting.
Sub-fit(s) time limit is: 36000 seconds.
Starting holdout-based sub-fit for dynamic stacking. Context path is: /kaggle/working/models/ds_sub_fit/sub_fit_ho.
Running the sub-fit in a ray process to avoid memory leakage.
Spend 9191 seconds for the sub-fit(s) during dynamic stacking.
Time left for full fit of AutoGluon: 26809 se

<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7ac3c8f5aa70>

In [8]:
%%time
predictor.leaderboard(train)

INFO:sklearnex: sklearn.neighbors.KNeighborsRegressor.predict: running accelerated version on CPU
INFO:sklearnex: sklearn.utils.validation._assert_all_finite: running accelerated version on CPU
INFO:sklearnex: sklearn.neighbors.KNeighborsRegressor.predict: running accelerated version on CPU
INFO:sklearnex: sklearn.utils.validation._assert_all_finite: running accelerated version on CPU


CPU times: user 1h 27min 10s, sys: 34.8 s, total: 1h 27min 45s
Wall time: 44min 11s


Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,KNeighborsDist_BAG_L1,-0.008900,-0.163956,root_mean_squared_error,0.857852,0.813467,0.123846,0.857852,0.813467,0.123846,1,True,2
1,RandomForest_r195_BAG_L1,-0.055975,-0.151740,root_mean_squared_error,23.866434,6.505976,57.385278,23.866434,6.505976,57.385278,1,True,24
2,RandomForestMSE_BAG_L1,-0.069901,-0.151892,root_mean_squared_error,6.760302,4.836047,62.088589,6.760302,4.836047,62.088589,1,True,5
3,ExtraTrees_r42_BAG_L1,-0.080895,-0.150771,root_mean_squared_error,8.984286,4.607958,16.226547,8.984286,4.607958,16.226547,1,True,20
4,ExtraTreesMSE_BAG_L1,-0.084212,-0.150675,root_mean_squared_error,6.692741,4.411572,17.638795,6.692741,4.411572,17.638795,1,True,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,NeuralNetFastAI_r103_BAG_L1,-0.152755,-0.154633,root_mean_squared_error,14.845546,3.211777,505.271704,14.845546,3.211777,505.271704,1,True,36
66,NeuralNetFastAI_r143_BAG_L1,-0.152931,-0.153539,root_mean_squared_error,5.764209,1.434450,231.788147,5.764209,1.434450,231.788147,1,True,39
67,NeuralNetFastAI_r156_BAG_L1,-0.153394,-0.154230,root_mean_squared_error,3.382385,2.148957,62.065611,3.382385,2.148957,62.065611,1,True,41
68,NeuralNetFastAI_r11_BAG_L1,-0.153439,-0.155755,root_mean_squared_error,30.420574,6.791892,1054.694964,30.420574,6.791892,1054.694964,1,True,32


In [9]:
preds = predictor.predict(test)

INFO:sklearnex: sklearn.neighbors.KNeighborsRegressor.predict: running accelerated version on CPU
INFO:sklearnex: sklearn.utils.validation._assert_all_finite: running accelerated version on CPU
INFO:sklearnex: sklearn.neighbors.KNeighborsRegressor.predict: running accelerated version on CPU
INFO:sklearnex: sklearn.utils.validation._assert_all_finite: running accelerated version on CPU


In [10]:
def create_submission_files(preds, config, notebook='04'):
    sub = sample_sub.copy()
    # inverse transform for log-transformed target and clipping range based on train data
    sub[TARGET] = np.expm1(preds).clip(1, 29)
    sub.to_csv(f'nb{notebook}_{config}.csv', index=False)

In [11]:
config = f'autogluon_rmse_{time_limit // 3600}hr'
create_submission_files(preds, config)