In [1]:
import os
import sys
from datetime import datetime

import yaml
import pandas as pd


os.chdir('..')
sys.path.append(os.getcwd())

# Initial configurations
config_filepath = 'config.yml'

with open(config_filepath, 'r') as f:
    config = yaml.safe_load(f)

# Modeling

In [2]:
abt = pd.read_csv(config['filepaths']['abt'])

In [3]:
abt

Unnamed: 0,Age,SibSp,Parch,Fare,Survived,Pclass,Sex,Embarked
0,22,1,0,7.250,0,3,male,S
1,38,1,0,50.000,1,1,female,C
2,26,0,0,7.925,1,3,female,S
3,35,1,0,50.000,1,1,female,S
4,35,0,0,8.050,0,3,male,S
...,...,...,...,...,...,...,...,...
709,39,0,5,29.125,0,3,female,Q
710,27,0,0,13.000,0,2,male,S
711,19,0,0,30.000,1,1,female,S
712,26,0,0,30.000,1,1,male,C


## 1 Establishing a Baseline

In [4]:
label = 'Survived'

In [5]:
pcc = ((abt[label].value_counts() / len(abt))**2).sum() * 100

In [6]:
pcc

51.761096595500945

In [7]:
baseline = pcc * 1.25

In [8]:
baseline

64.70137074437618

## 2 Data Segregation

In [9]:
train_size = 0.80
random_state = 1337

train_data = abt.sample(frac=0.80, random_state=random_state)
test_data = abt.loc[~abt.index.isin(train_data.index)]

## 3 Model Training

In [10]:
from autogluon.tabular import TabularDataset, TabularPredictor

In [11]:
train_dataset = TabularDataset(train_data)
test_dataset = TabularDataset(test_data)

### Automatic

In [12]:
metric = 'accuracy'
presets = 'best_quality'
time_limit = 60 # in seconds
save_path = f'models/{datetime.now().strftime("%Y%m%d")}'
verbosity = 2

predictor = TabularPredictor(label=label, eval_metric=metric, path=save_path)
predictor = predictor.fit(
    train_dataset, time_limit=time_limit, presets=presets, verbosity=verbosity,
    dynamic_stacking=False
)

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.10.17
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #52-Ubuntu SMP PREEMPT_DYNAMIC Thu Dec  5 13:09:44 UTC 2024
CPU Count:          32
Memory Avail:       235.88 GB / 251.52 GB (93.8%)
Disk Space Avail:   3801.69 GB / 71394.83 GB (5.3%)
Presets specified: ['best_quality']
Stack configuration (auto_stack=True): num_stack_levels=0, num_bag_folds=8, num_bag_sets=1
Beginning AutoGluon training ... Time limit = 60s
AutoGluon will save models to "/home/llorenzo/year-2025/Acads/msds2025-ft/adhoc/insta-ml/models/20250425"
Train Data Rows:    571
Train Data Columns: 7
Label Column:       Survived
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [0, 1]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['bin

### Manual

In [13]:
from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config

In [14]:
get_hyperparameter_config('default')

{'NN_TORCH': {},
 'GBM': [{'extra_trees': True, 'ag_args': {'name_suffix': 'XT'}},
  {},
  {'learning_rate': 0.03,
   'num_leaves': 128,
   'feature_fraction': 0.9,
   'min_data_in_leaf': 3,
   'ag_args': {'name_suffix': 'Large',
    'priority': 0,
    'hyperparameter_tune_kwargs': None}}],
 'CAT': {},
 'XGB': {},
 'FASTAI': {},
 'RF': [{'criterion': 'gini',
   'ag_args': {'name_suffix': 'Gini',
    'problem_types': ['binary', 'multiclass']}},
  {'criterion': 'entropy',
   'ag_args': {'name_suffix': 'Entr',
    'problem_types': ['binary', 'multiclass']}},
  {'criterion': 'squared_error',
   'ag_args': {'name_suffix': 'MSE',
    'problem_types': ['regression', 'quantile']}}],
 'XT': [{'criterion': 'gini',
   'ag_args': {'name_suffix': 'Gini',
    'problem_types': ['binary', 'multiclass']}},
  {'criterion': 'entropy',
   'ag_args': {'name_suffix': 'Entr',
    'problem_types': ['binary', 'multiclass']}},
  {'criterion': 'squared_error',
   'ag_args': {'name_suffix': 'MSE',
    'problem_ty

In [15]:
from autogluon.common import space

nn_options = {  # specifies non-default hyperparameter values for neural network models
    'num_epochs': 10,  # number of training epochs (controls training time of NN models)
    'learning_rate': space.Real(1e-4, 1e-2, default=5e-4, log=True),  # learning rate used in training (real-valued hyperparameter searched on log-scale)
    'activation': space.Categorical('relu', 'softrelu', 'tanh'),  # activation function used in NN (categorical hyperparameter, default = first entry)
    'dropout_prob': space.Real(0.0, 0.5, default=0.1),  # dropout probability (real-valued hyperparameter)
}

gbm_options = {  # specifies non-default hyperparameter values for lightGBM gradient boosted trees
    'num_boost_round': 100,  # number of boosting rounds (controls training time of GBM models)
    'num_leaves': space.Int(lower=26, upper=66, default=36),  # number of leaves in trees (integer hyperparameter)
}

hyperparameters = {  # hyperparameters of each model type
                   'GBM': gbm_options,
                   'NN_TORCH': nn_options,  # NOTE: comment this line out if you get errors on Mac OSX
                  }  # When these keys are missing from hyperparameters dict, no models of that type are trained

search_strategy = 'auto'  # to tune hyperparameters using random search routine with a local scheduler

hyperparameter_tune_kwargs = {  # HPO is not performed unless hyperparameter_tune_kwargs is specified
    'scheduler': 'local',
    'searcher': search_strategy,
}  # Refer to TabularPredictor.fit docstring for all valid values

In [16]:
metric = 'accuracy'
presets = 'best_quality'
time_limit = 30 # in seconds
save_path = f'models/{datetime.now().strftime("%Y%m%d")}'
verbosity = 2

predictor = TabularPredictor(label=label, eval_metric=metric, path=save_path)
predictor = predictor.fit(
    train_dataset, time_limit=time_limit, presets=presets, verbosity=verbosity,
    dynamic_stacking=False, hyperparameters=hyperparameters,
    hyperparameter_tune_kwargs=hyperparameter_tune_kwargs,
)

Fitted model: NeuralNetTorch_BAG_L1/68720436 ...
	0.8196	 = Validation score   (accuracy)
	8.21s	 = Training   runtime
	0.17s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 29.88s of the -2.13s of remaining time.
	Ensemble Weights: {'LightGBM_BAG_L1/T2': 1.0}
	0.8441	 = Validation score   (accuracy)
	0.04s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 32.23s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 2834.4 rows/s (72 batch size)
Disabling decision threshold calibration for metric `accuracy` due to having fewer than 10000 rows of validation data for calibration, to avoid overfitting (571 rows).
	`accuracy` is generally not improved through threshold calibration. Force calibration via specifying `calibrate_decision_threshold=True`.
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("/home/llorenzo/year-2025/Acads/msds2025-ft/adhoc/insta-ml/models/202504

## 4 Model Performance

In [17]:
predictor.predict(test_dataset)

0      0
5      0
7      1
10     1
22     0
      ..
694    1
696    1
705    0
707    0
712    1
Name: Survived, Length: 143, dtype: int64

In [18]:
predictor.evaluate(test_dataset)

{'accuracy': 0.8041958041958042,
 'balanced_accuracy': 0.785411622276029,
 'mcc': 0.5921140028457315,
 'roc_auc': 0.8617836965294592,
 'f1': 0.7407407407407407,
 'precision': 0.8163265306122449,
 'recall': 0.6779661016949152}

In [19]:
predictor.leaderboard(test_dataset)

Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM_BAG_L1/T1,0.811189,0.842382,accuracy,0.16968,0.026654,3.513128,0.16968,0.026654,3.513128,1,True,1
1,LightGBM_BAG_L1/T4,0.811189,0.812609,accuracy,1.170597,0.0252,3.921769,1.170597,0.0252,3.921769,1,True,4
2,NeuralNetTorch_BAG_L1/68720436,0.811189,0.819615,accuracy,2.718895,0.170979,8.207555,2.718895,0.170979,8.207555,1,True,5
3,LightGBM_BAG_L1/T2,0.804196,0.844133,accuracy,0.192918,0.025267,3.31885,0.192918,0.025267,3.31885,1,True,2
4,WeightedEnsemble_L2,0.804196,0.844133,accuracy,0.195556,0.026337,3.357702,0.002639,0.00107,0.038852,2,True,6
5,LightGBM_BAG_L1/T3,0.797203,0.830123,accuracy,0.362842,0.02383,3.216524,0.362842,0.02383,3.216524,1,True,3


In [20]:
predictor.model_best

'WeightedEnsemble_L2'

In [21]:
predictor.model_names()

['LightGBM_BAG_L1/T1',
 'LightGBM_BAG_L1/T2',
 'LightGBM_BAG_L1/T3',
 'LightGBM_BAG_L1/T4',
 'NeuralNetTorch_BAG_L1/68720436',
 'WeightedEnsemble_L2']

In [22]:
predictor.evaluate(model='LightGBM_BAG_L1/T1', data=test_dataset)

{'accuracy': 0.8111888111888111,
 'balanced_accuracy': 0.7913640032284099,
 'mcc': 0.6075039676874432,
 'roc_auc': 0.8876109765940274,
 'f1': 0.7476635514018691,
 'precision': 0.8333333333333334,
 'recall': 0.6779661016949152}