In [1]:
%%capture
!pip install --upgrade flaml[automl] ipywidgets

In [2]:
import gc
gc.enable()

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from flaml import AutoML

SEED = 2024

In [3]:
DATA_DIR = '/kaggle/input/playground-series-s4e4'

train = pd.read_csv(f'{DATA_DIR}/train.csv')
test = pd.read_csv(f'{DATA_DIR}/test.csv')

In [4]:
train = train.drop('id', axis=1)
test = test.drop('id', axis=1)

In [5]:
TARGET = 'Rings'
features = [f for f in test.columns]

In [6]:
train[TARGET] = np.log1p(train[TARGET])

In [7]:
TIME_BUDGET = 60 * 60 * 11
NUM_FOLDS = 10

automl_settings = {
    'time_budget': TIME_BUDGET,
    'task': 'regression',
    'metric': 'rmse',
    'ensemble': True,
    'eval_method': 'cv',
    'split_type': KFold(n_splits=NUM_FOLDS, shuffle=True, random_state=SEED),
    'retrain_full': True,
    'early_stop': True,
#     'verbose': 2,
    'seed': SEED
}

In [8]:
automl = AutoML()
automl.fit(X_train=train[features], y_train=train[TARGET], **automl_settings)

[flaml.automl.logger: 04-25 11:43:32] {1680} INFO - task = regression
[flaml.automl.logger: 04-25 11:43:32] {1691} INFO - Evaluation method: cv
[flaml.automl.logger: 04-25 11:43:32] {1789} INFO - Minimizing error metric: rmse
[flaml.automl.logger: 04-25 11:43:32] {1901} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth']
[flaml.automl.logger: 04-25 11:43:32] {2219} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 04-25 11:43:34] {2345} INFO - Estimated sufficient time budget=21071s. Estimated necessary time budget=180s.
[flaml.automl.logger: 04-25 11:43:34] {2392} INFO -  at 2.5s,	estimator lgbm's best error=0.2351,	best estimator lgbm's best error=0.2351
[flaml.automl.logger: 04-25 11:43:34] {2219} INFO - iteration 1, current learner lgbm
[flaml.automl.logger: 04-25 11:43:36] {2392} INFO -  at 4.7s,	estimator lgbm's best error=0.2351,	best estimator lgbm's best error=0.2351
[flaml.automl.logger: 04-25 11:43:36] 

In [9]:
automl.best_estimator

'lgbm'

In [10]:
preds = automl.predict(test[features])

In [11]:
sub = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')
sub[TARGET] = np.expm1(preds).clip(1, 29)

In [12]:
sub.to_csv('submission.csv', index=False)

In [13]:
!head submission.csv

id,Rings
90615,9.954658386395014
90616,9.756967595256462
90617,9.951528042365068
90618,10.418389505972485
90619,7.534368385255064
90620,9.236722525546163
90621,10.795400074816992
90622,6.194490249429808
90623,7.893314865617428
