In [None]:
%run imports.py

In [None]:
store = DataStore('/data/tabular_data')
dataset = store.load_dataset('Australian', 0)
dataset = DatasetSplitter.from_string('cv5_seed0_fold0').split(dataset)
print(dataset.task_type)

metric = {
    TaskType.REGRESSION: 'd2_absolute_error',
    TaskType.BINARY: 'neg_log_loss',
    TaskType.MULTICLASS: 'neg_log_loss',
}[dataset.task_type]

In [None]:
kwargs = Bunch()
kwargs.thread_count = 32
kwargs.task_type = 'GPU'
kwargs.depth = 16
kwargs.learning_rate = 0.1
kwargs.metric_period = 1
if dataset.task_type == TaskType.REGRESSION:
    kwargs.loss_function = 'RMSEWithUncertainty'
    # kwargs.loss_function = 'RMSE'
    kwargs.eval_metric = 'RMSE'
elif dataset.task_type == TaskType.BINARY:
    kwargs.loss_function = 'Logloss'
    kwargs.eval_metric = 'AUC'
elif dataset.task_type == TaskType.MULTICLASS:
    kwargs.loss_function = 'MultiClass'
    kwargs.eval_metric = 'MultiClass'
kwargs.grow_policy = 'Lossguide'
kwargs.random_strength = 0
kwargs.bootstrap_type = 'No'
kwargs.l2_leaf_reg = 0.1

model = CatBoostModel(dataset.task_type, catboost_kwargs=kwargs)

model.fit(
    dataset.X_train,
    dataset.y_train,
    eval_set=(dataset.X_val, dataset.y_val),
    use_best_model=False,
    max_iterations=1_000_000,
    early_stopping_rounds=1_000,
    verbose=1,
)

In [None]:
evals = model.catboost_model.get_evals_result()
keys = list(evals['learn'].keys())
keys

In [None]:
fig, axs = plt.subplots(
    nrows=len(keys), ncols=2, figsize=(6, 2*len(keys)), squeeze=False
)

steps = np.arange(len(evals['learn'][keys[0]]))

s = slice(1, None)

for i, key in enumerate(keys):

    axs[i, 0].set_title(f'train {key}')
    axs[i, 0].plot(steps[s], evals['learn'][key][s])

    axs[i, 1].set_title(f'val {key}')
    axs[i, 1].plot(steps[s], evals['validation'][key][s])

    axs[i, 0].set_xscale('log')
    axs[i, 1].set_xscale('log')

    if key == 'Logloss':
        axs[i, 0].set_yscale('log')
        axs[i, 1].set_yscale('log')

plt.tight_layout()
plt.show()