# Setup

In [1]:
import gc
import time
import warnings
import subprocess

gc.enable()
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold

import xgboost as xgb
import optuna

SEED = 55

In [2]:
assert xgb.__version__ == '2.0.2', 'XGBoost version differs from original notebook.' 

In [3]:
#Check GPU availability
try:
    subprocess.check_output('nvidia-smi')
    DEVICE = 'cuda'
except Exception:
    DEVICE = 'cpu'

print(f'Available device: {DEVICE}')

Available device: cuda


# Data preparation

In [4]:
DATA_DIR = '/kaggle/input/playground-series-s3e26'
train = pd.read_csv(f'{DATA_DIR}/train.csv')
test = pd.read_csv(f'{DATA_DIR}/test.csv')
sample_sub = pd.read_csv(f'{DATA_DIR}/sample_submission.csv')

original = pd.read_csv('/kaggle/input/cirrhosis-patient-survival-prediction/cirrhosis.csv')

### Reference: [EDA + Linear Model](https://www.kaggle.com/code/sid4ds/ps-s3e26-eda-linear-model)

In [5]:
# trial-specific portion of original data
trial = original.loc[~original.Drug.isna()].reset_index(drop=True)

In [6]:
original['Drug'] = original['Drug'].fillna('Non_trial')

**Engineered features:**

In [7]:
def create_cat(df):
    df['N_Days_cat'] = (df['N_Days'] < 1000).astype('category')
    df['Age_cat'] = (df['Age'] > 22000).astype('category')
    df['Bilirubin_cat'] = (df['Bilirubin'] > 2).astype('category')
    df['Cholesterol_cat'] = (df['Cholesterol'] > 400).astype('category')
    df['Albumin_cat'] = (df['Albumin'] < 3.2).astype('category')
    df['Copper_cat'] = (df['Copper'] > 90).astype('category')
    df['Alk_Phos_cat'] = (df['Alk_Phos'] > 2000).astype('category')
    df['SGOT_cat'] = (df['SGOT'] > 130).astype('category')
    df['Tryglicerides_cat'] = (df['Tryglicerides'] > 150).astype('category')
    df['Platelets_cat'] = (df['Platelets'] < 175).astype('category')
    df['Prothrombin_cat'] = (df['Prothrombin'] > 11).astype('category')
    return df


train = create_cat(train.copy())
test = create_cat(test.copy())
original = create_cat(original.copy())
trial = create_cat(trial.copy())

**Encoding categorical columns:**

In [8]:
def encode_cat(df):
    df['Drug'] = df['Drug'].replace({'Non_trial': -1, 'Placebo': 0, 'D-penicillamine': 1}) \
                           .astype('category')
    df['Sex'] = df['Sex'].replace({'F': 0, 'M': 1}).astype('category')
    df['Ascites'] = df['Ascites'].replace({'N': 0, 'Y': 1}).astype('category')
    df['Hepatomegaly'] = df['Hepatomegaly'].replace({'N': 0, 'Y': 1}).astype('category')
    df['Spiders'] = df['Spiders'].replace({'N': 0, 'Y': 1}).astype('category')
    df['Edema'] = df['Edema'].replace({'N': 0, 'S': 1, 'Y': 2}).astype('category')
    df['Stage'] = df['Stage'].astype('category')
    return df


train = encode_cat(train.copy())
test = encode_cat(test.copy())
original = encode_cat(original.copy())
trial = encode_cat(trial.copy())

**Encoding target:**

In [9]:
TARGET = 'Status'
target_mapping = {'C': 0, 'CL': 1, 'D': 2}

train[TARGET] = train[TARGET].replace(target_mapping)
original[TARGET] = original[TARGET].replace(target_mapping)
trial[TARGET] = trial[TARGET].replace(target_mapping)

**Feature sets:**

In [10]:
base_features = [
    'N_Days', 'Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 
    'Edema', 'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 
    'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin', 'Stage'
]

cat_features = test.select_dtypes(include='category').columns

all_features = test.columns

# Baseline

In [11]:
# competition metric
def comp_metric(y_true, y_pred):
    return log_loss(y_true, y_pred)

In [12]:
X, y = train[base_features], train[TARGET]
oof_preds = {}

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
for fold, (train_ids, val_ids) in enumerate(cv.split(X, y)):
    X_train, y_train = X.iloc[train_ids], y.iloc[train_ids]
    X_val, y_val = X.iloc[val_ids], y.iloc[val_ids]
    
    model = xgb.XGBClassifier(
        n_estimators=1000,
        learning_rate=0.1,
        early_stopping_rounds=100,
        objective='multi:softprob',
        eval_metric=comp_metric,
        booster='gbtree',
        tree_method='hist',
        device=DEVICE,
        enable_categorical=True,
        verbosity=0,
        n_jobs=-1,
        random_state=SEED)
    
    model.fit(
        X_train, y_train,
        eval_set=[(X_val, y_val)],
        verbose=False)
        
    val_preds = model.predict_proba(X_val)
    oof_preds.update(dict(zip(val_ids, val_preds)))
    
    score = comp_metric(y_val, val_preds)
    print(f'Fold #{fold}: {score:.4f}', end = ' | ')        
    _ = gc.collect()
    
oof_preds = pd.DataFrame.from_dict(oof_preds, orient='index').sort_index()
print(f'OOF score: {comp_metric(y, oof_preds):.4f}\n')

Fold #0: 0.4413 | Fold #1: 0.4459 | Fold #2: 0.4769 | Fold #3: 0.4617 | Fold #4: 0.4302 | OOF score: 0.4512



# Hyperparameter tuning

In [13]:
def objective(trial, features, model, extend, folds, seed):
    oof_preds = {}
    
    param_grid = {
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, step=0.01),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_weight': trial.suggest_int('min_child_weight', 2, 15),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0, step=0.05),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0, step=0.05),
        'gamma': trial.suggest_float('gamma', 0, 20, step=0.1), #complexity-control
        'alpha': trial.suggest_float('alpha', 0, 5, step=0.1), #L1-reg
        'lambda': trial.suggest_float('lambda', 5e-3, 5e3, log=True), #L2-reg
        'max_cat_to_onehot': trial.suggest_categorical('max_cat_to_onehot', [2, 3, 4]),
        'max_delta_step': trial.suggest_float('max_delta_step', 0, 10, step=0.5),
        'grow_policy': trial.suggest_categorical('grow_policy', ['depthwise', 'lossguide'])
    }
    
    cv = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
    for fold, (train_ids, val_ids) in enumerate(cv.split(train, train[TARGET])):
        X_train, X_val = train.iloc[train_ids], train.iloc[val_ids]
        if extend is not None:
            X_train = pd.concat([X_train, extend], axis=0) \
                        .drop_duplicates(keep='last', ignore_index=True)
        
        y_train, y_val = X_train.pop(TARGET), X_val.pop(TARGET)
        X_train, X_val = X_train[features], X_val[features]
        
        model.set_params(**param_grid)
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            verbose=0)
        
        val_preds = model.predict_proba(X_val)
        oof_preds.update(dict(zip(val_ids, val_preds)))
        
    oof_preds = pd.DataFrame.from_dict(oof_preds, orient='index').sort_index()
    return comp_metric(train[TARGET], oof_preds)

In [14]:
def tune_params(features, model, extend, folds, seed, n_trials, direction):
    study = optuna.create_study(
        sampler=optuna.samplers.TPESampler(
            consider_endpoints=True,
            multivariate=True,
            group=True,
            seed=seed),
        pruner=optuna.pruners.HyperbandPruner(),
        direction=direction
    )
    study.optimize(
        func=lambda trial: objective(
            trial, features, model, extend, folds, seed),
        n_trials=n_trials,
        gc_after_trial=True
    )
    return study

# Cross-validation framework

In [15]:
def custom_cv(features, model, extend, folds, seed, verbose=True):
    oof_preds = {}
    test_preds = {}
    
    X_test = test[features]
    
    cv = StratifiedKFold(n_splits=folds, shuffle=True, random_state=seed)
    for fold, (train_ids, val_ids) in enumerate(cv.split(train, train[TARGET])):
        X_train, X_val = train.iloc[train_ids], train.iloc[val_ids]
        if extend is not None:
            X_train = pd.concat([X_train, extend], axis=0) \
                        .drop_duplicates(keep='last', ignore_index=True)
        
        y_train, y_val = X_train.pop(TARGET), X_val.pop(TARGET)
        X_train, X_val = X_train[features], X_val[features]
        
        model.fit(
            X_train, y_train,
            eval_set=[(X_val, y_val)],
            verbose=0)
        
        val_preds = model.predict_proba(X_val)
        oof_preds.update(dict(zip(val_ids, val_preds)))
        test_preds[f'fold{fold}'] = model.predict_proba(X_test)
        
        if verbose:
            score = comp_metric(y_val, val_preds)
            print(f'Fold #{fold}: {score:.5f} ({model.best_iteration} rounds)')
            
        _ = gc.collect()
    
    test_preds['mean'] = sum(test_preds.values()) / folds  # mean of fold-wise predictions
    oof_preds = pd.DataFrame.from_dict(oof_preds, orient='index').sort_index()
    print(f'\nOOF score: {comp_metric(train[TARGET], oof_preds):.5f}')
    
    return oof_preds, test_preds

In [16]:
def run_experiment(features, extend=None, folds=7, seed=SEED, n_trials=50):
    
    base_params = {
        'booster': 'gbtree',
        'tree_method': 'hist',
        'objective': 'multi:softprob',
        'num_class': 3,
        'n_estimators': 5000,
        'eval_metric': comp_metric,
        'early_stopping_rounds': 100,
        'device': DEVICE,
        'enable_categorical': True,
        'verbosity': 0,
        'n_jobs': -1,
        'seed': seed
    }
    model = xgb.XGBClassifier(**base_params)
    
    print(f'----------Hyperparameter tuning----------')
    start = time.time()
    
    study = tune_params(features, model, extend, folds, seed, n_trials, direction='minimize') 
    #metric: Logloss -> lower is better
    
    end = time.time()
    
    print(f'Best trial: {study.best_trial.number} -> Best value: {study.best_value:.5f}')
    print(f'Best hyperparameters:')
    for k, v in study.best_params.items():
        print(f'{k:15} - {v}')
    print(f'\n[Time taken: {end - start:.2f}s]\n')
    
    print(f'-----Cross-validation and prediction-----')
    start = time.time()
    
    model.set_params(**study.best_params)
    oof_preds, test_preds = custom_cv(features, model, extend, folds, seed)
    
    end = time.time()
    print(f'\n[Time taken: {end - start:.2f}s]\n')
    
    return oof_preds, test_preds

In [17]:
def create_submission_files(test_preds, config, notebook='01'):
    sub = sample_sub.copy()
    sub['Status_C'] = test_preds['mean'][:, 0]
    sub['Status_CL'] = test_preds['mean'][:, 1]
    sub['Status_D'] = test_preds['mean'][:, 2]
    sub.to_csv(f'{notebook}_{config}.csv', index=False)

**Trial run:**

In [18]:
optuna.logging.set_verbosity(optuna.logging.INFO)

In [19]:
_ , _ = run_experiment(features=base_features, n_trials=3)

[I 2024-01-05 05:16:38,036] A new study created in memory with name: no-name-825e5789-0f67-452a-ab18-9038dcbf6d0b


----------Hyperparameter tuning----------


[I 2024-01-05 05:19:28,267] Trial 0 finished with value: 0.46348371170781844 and parameters: {'learning_rate': 0.01, 'max_depth': 12, 'min_child_weight': 8, 'subsample': 0.7, 'colsample_bytree': 0.8, 'gamma': 5.7, 'alpha': 4.3, 'lambda': 0.008822078646927935, 'max_cat_to_onehot': 3, 'max_delta_step': 8.0, 'grow_policy': 'lossguide'}. Best is trial 0 with value: 0.46348371170781844.
[I 2024-01-05 05:19:53,661] Trial 1 finished with value: 0.4980331016721894 and parameters: {'learning_rate': 0.09, 'max_depth': 11, 'min_child_weight': 15, 'subsample': 0.8, 'colsample_bytree': 0.75, 'gamma': 17.400000000000002, 'alpha': 2.0, 'lambda': 36.38424097673977, 'max_cat_to_onehot': 3, 'max_delta_step': 5.5, 'grow_policy': 'depthwise'}. Best is trial 0 with value: 0.46348371170781844.
[I 2024-01-05 05:20:19,616] Trial 2 finished with value: 0.4398730414827313 and parameters: {'learning_rate': 0.09999999999999999, 'max_depth': 9, 'min_child_weight': 12, 'subsample': 0.95, 'colsample_bytree': 0.75, '

Best trial: 2 -> Best value: 0.43987
Best hyperparameters:
learning_rate   - 0.09999999999999999
max_depth       - 9
min_child_weight - 12
subsample       - 0.95
colsample_bytree - 0.75
gamma           - 0.30000000000000004
alpha           - 1.8
lambda          - 0.3298547272191968
max_cat_to_onehot - 2
max_delta_step  - 9.5
grow_policy     - depthwise

[Time taken: 221.65s]

-----Cross-validation and prediction-----
Fold #0: 0.45283 (74 rounds)
Fold #1: 0.41575 (131 rounds)
Fold #2: 0.41146 (131 rounds)
Fold #3: 0.46147 (101 rounds)
Fold #4: 0.47259 (117 rounds)
Fold #5: 0.42874 (72 rounds)
Fold #6: 0.43628 (121 rounds)

OOF score: 0.43987

[Time taken: 26.49s]



In [20]:
optuna.logging.set_verbosity(optuna.logging.ERROR)

# Experiments

In [21]:
op = {} # Train-set OOF predictions
tp = {} # Test-set predictions

In [22]:
feature_set = 'base'
model_name = 'xgb'
extension = 'noext'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=base_features, 
    extend=None, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 44 -> Best value: 0.43332
Best hyperparameters:
learning_rate   - 0.02
max_depth       - 7
min_child_weight - 13
subsample       - 0.9
colsample_bytree - 0.6
gamma           - 0.30000000000000004
alpha           - 2.7
lambda          - 2.389553140880608
max_cat_to_onehot - 4
max_delta_step  - 5.0
grow_policy     - depthwise

[Time taken: 3660.03s]

-----Cross-validation and prediction-----
Fold #0: 0.44889 (554 rounds)
Fold #1: 0.40856 (823 rounds)
Fold #2: 0.40653 (1157 rounds)
Fold #3: 0.45874 (701 rounds)
Fold #4: 0.46474 (879 rounds)
Fold #5: 0.42467 (470 rounds)
Fold #6: 0.42114 (1212 rounds)

OOF score: 0.43332

[Time taken: 107.92s]



In [23]:
feature_set = 'nodrugbase'
model_name = 'xgb'
extension = 'noext'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=[f for f in base_features if f != 'Drug'], 
    extend=None, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 26 -> Best value: 0.43340
Best hyperparameters:
learning_rate   - 0.05
max_depth       - 5
min_child_weight - 13
subsample       - 0.75
colsample_bytree - 0.7
gamma           - 0.2
alpha           - 1.4000000000000001
lambda          - 0.3886414456448555
max_cat_to_onehot - 3
max_delta_step  - 7.5
grow_policy     - lossguide

[Time taken: 3248.75s]

-----Cross-validation and prediction-----
Fold #0: 0.45088 (231 rounds)
Fold #1: 0.41015 (350 rounds)
Fold #2: 0.40823 (609 rounds)
Fold #3: 0.45960 (244 rounds)
Fold #4: 0.46352 (358 rounds)
Fold #5: 0.42293 (191 rounds)
Fold #6: 0.41850 (583 rounds)

OOF score: 0.43340

[Time taken: 56.30s]



In [24]:
feature_set = 'base'
model_name = 'xgb'
extension = 'orig'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=base_features, 
    extend=original, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 43 -> Best value: 0.42799
Best hyperparameters:
learning_rate   - 0.01
max_depth       - 3
min_child_weight - 13
subsample       - 0.8
colsample_bytree - 0.6
gamma           - 0.30000000000000004
alpha           - 2.7
lambda          - 0.38074949176073125
max_cat_to_onehot - 3
max_delta_step  - 9.5
grow_policy     - depthwise

[Time taken: 4009.81s]

-----Cross-validation and prediction-----
Fold #0: 0.44300 (2635 rounds)
Fold #1: 0.40555 (4007 rounds)
Fold #2: 0.40526 (4993 rounds)
Fold #3: 0.45376 (3666 rounds)
Fold #4: 0.45529 (4238 rounds)
Fold #5: 0.42407 (2010 rounds)
Fold #6: 0.40898 (4999 rounds)

OOF score: 0.42799

[Time taken: 226.93s]



In [25]:
feature_set = 'base'
model_name = 'xgb'
extension = 'trial'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=base_features, 
    extend=trial, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 49 -> Best value: 0.42849
Best hyperparameters:
learning_rate   - 0.01
max_depth       - 6
min_child_weight - 15
subsample       - 0.8
colsample_bytree - 0.65
gamma           - 0.0
alpha           - 2.5
lambda          - 0.04120355473583373
max_cat_to_onehot - 2
max_delta_step  - 8.0
grow_policy     - depthwise

[Time taken: 3905.21s]

-----Cross-validation and prediction-----
Fold #0: 0.44547 (1066 rounds)
Fold #1: 0.40555 (1582 rounds)
Fold #2: 0.40067 (2220 rounds)
Fold #3: 0.45246 (1362 rounds)
Fold #4: 0.45829 (2032 rounds)
Fold #5: 0.42043 (999 rounds)
Fold #6: 0.41658 (1954 rounds)

OOF score: 0.42849

[Time taken: 166.41s]



In [26]:
feature_set = 'cat'
model_name = 'xgb'
extension = 'orig'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=cat_features,
    extend=original, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 14 -> Best value: 0.49194
Best hyperparameters:
learning_rate   - 0.09
max_depth       - 8
min_child_weight - 9
subsample       - 0.95
colsample_bytree - 0.6
gamma           - 0.2
alpha           - 1.3
lambda          - 0.01913834677877739
max_cat_to_onehot - 3
max_delta_step  - 9.0
grow_policy     - depthwise

[Time taken: 1280.07s]

-----Cross-validation and prediction-----
Fold #0: 0.49502 (125 rounds)
Fold #1: 0.46168 (311 rounds)
Fold #2: 0.47837 (103 rounds)
Fold #3: 0.51485 (157 rounds)
Fold #4: 0.52364 (97 rounds)
Fold #5: 0.47028 (74 rounds)
Fold #6: 0.49973 (345 rounds)

OOF score: 0.49194

[Time taken: 16.77s]



In [27]:
feature_set = 'cat'
model_name = 'xgb'
extension = 'trial'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=cat_features,
    extend=trial, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 14 -> Best value: 0.49187
Best hyperparameters:
learning_rate   - 0.09
max_depth       - 8
min_child_weight - 9
subsample       - 0.95
colsample_bytree - 0.6
gamma           - 0.2
alpha           - 1.3
lambda          - 0.01913834677877739
max_cat_to_onehot - 3
max_delta_step  - 9.0
grow_policy     - depthwise

[Time taken: 1265.22s]

-----Cross-validation and prediction-----
Fold #0: 0.49542 (125 rounds)
Fold #1: 0.46209 (437 rounds)
Fold #2: 0.47718 (101 rounds)
Fold #3: 0.51525 (107 rounds)
Fold #4: 0.52406 (84 rounds)
Fold #5: 0.46931 (81 rounds)
Fold #6: 0.49981 (170 rounds)

OOF score: 0.49187

[Time taken: 15.78s]



In [28]:
feature_set = 'all'
model_name = 'xgb'
extension = 'orig'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=all_features,
    extend=original, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 47 -> Best value: 0.43215
Best hyperparameters:
learning_rate   - 0.08
max_depth       - 3
min_child_weight - 15
subsample       - 0.7
colsample_bytree - 0.8
gamma           - 0.0
alpha           - 1.5
lambda          - 0.0733271850212784
max_cat_to_onehot - 2
max_delta_step  - 10.0
grow_policy     - depthwise

[Time taken: 2483.24s]

-----Cross-validation and prediction-----
Fold #0: 0.44495 (406 rounds)
Fold #1: 0.40195 (396 rounds)
Fold #2: 0.41323 (564 rounds)
Fold #3: 0.45704 (276 rounds)
Fold #4: 0.46040 (493 rounds)
Fold #5: 0.42775 (206 rounds)
Fold #6: 0.41979 (551 rounds)

OOF score: 0.43215

[Time taken: 31.08s]



In [29]:
feature_set = 'all'
model_name = 'xgb'
extension = 'trial'
folds = 7
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=all_features,
    extend=trial, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 49 -> Best value: 0.43108
Best hyperparameters:
learning_rate   - 0.05
max_depth       - 4
min_child_weight - 13
subsample       - 0.65
colsample_bytree - 0.65
gamma           - 0.8
alpha           - 0.9
lambda          - 0.16350101478775597
max_cat_to_onehot - 3
max_delta_step  - 9.5
grow_policy     - depthwise

[Time taken: 3137.56s]

-----Cross-validation and prediction-----
Fold #0: 0.44861 (230 rounds)
Fold #1: 0.40385 (496 rounds)
Fold #2: 0.40809 (606 rounds)
Fold #3: 0.45338 (335 rounds)
Fold #4: 0.46178 (390 rounds)
Fold #5: 0.42079 (275 rounds)
Fold #6: 0.42105 (516 rounds)

OOF score: 0.43108

[Time taken: 36.71s]



In [30]:
feature_set = 'base'
model_name = 'xgb'
extension = 'orig'
folds = 10
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=base_features,
    extend=original, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 39 -> Best value: 0.42976
Best hyperparameters:
learning_rate   - 0.05
max_depth       - 4
min_child_weight - 10
subsample       - 0.65
colsample_bytree - 0.65
gamma           - 0.1
alpha           - 0.5
lambda          - 0.032190301192523495
max_cat_to_onehot - 4
max_delta_step  - 8.5
grow_policy     - depthwise

[Time taken: 2925.24s]

-----Cross-validation and prediction-----
Fold #0: 0.44440 (407 rounds)
Fold #1: 0.40898 (236 rounds)
Fold #2: 0.42136 (484 rounds)
Fold #3: 0.42217 (410 rounds)
Fold #4: 0.43174 (428 rounds)
Fold #5: 0.47541 (291 rounds)
Fold #6: 0.44237 (541 rounds)
Fold #7: 0.43435 (173 rounds)
Fold #8: 0.38929 (442 rounds)
Fold #9: 0.42753 (356 rounds)

OOF score: 0.42976

[Time taken: 47.74s]



In [31]:
feature_set = 'base'
model_name = 'xgb'
extension = 'orig'
folds = 15
seed = SEED
config = f'{feature_set}_{model_name}_{extension}_f{folds}_s{seed}'

op[config], tp[config] = run_experiment(
    features=base_features,
    extend=original, 
    folds=folds, 
    seed=seed)

create_submission_files(tp[config], config)

----------Hyperparameter tuning----------
Best trial: 34 -> Best value: 0.42786
Best hyperparameters:
learning_rate   - 0.04
max_depth       - 4
min_child_weight - 13
subsample       - 0.6
colsample_bytree - 0.6
gamma           - 0.1
alpha           - 1.5
lambda          - 0.6981343670385046
max_cat_to_onehot - 3
max_delta_step  - 10.0
grow_policy     - lossguide

[Time taken: 7221.80s]

-----Cross-validation and prediction-----
Fold #0: 0.43241 (474 rounds)
Fold #1: 0.44528 (440 rounds)
Fold #2: 0.39255 (386 rounds)
Fold #3: 0.44182 (674 rounds)
Fold #4: 0.39601 (836 rounds)
Fold #5: 0.41317 (466 rounds)
Fold #6: 0.40206 (862 rounds)
Fold #7: 0.46257 (720 rounds)
Fold #8: 0.48249 (514 rounds)
Fold #9: 0.44971 (710 rounds)
Fold #10: 0.42472 (693 rounds)
Fold #11: 0.45452 (328 rounds)
Fold #12: 0.39241 (544 rounds)
Fold #13: 0.38429 (1038 rounds)
Fold #14: 0.44394 (692 rounds)

OOF score: 0.42786

[Time taken: 140.88s]

