In [1]:
import pandas as pd
import numpy as np
import os

from predunder.functions import convert_labels, get_metrics, kfold_metrics_to_df, df_to_nparray
from predunder.training import train_random_forest, train_xgboost, train_dnn, train_kfold, train_nnrf
from predunder.hypertuning import tune_model

In [2]:
DATA_DIR = '../train-test-data'
NUM_FOLDS = 10
TASK_TO_RUN = '2aii'

In [3]:
train_df = pd.read_csv(os.path.join(DATA_DIR, f'{TASK_TO_RUN}_train.csv'), index_col=0)
test_df = pd.read_csv(os.path.join(DATA_DIR, f'{TASK_TO_RUN}_test.csv'), index_col=0)

## Random Forest

We want to tune the following parameters
- `n_estimators`
- `max_depth`
- `min_samples_split`
- `min_samples_leaf`
- `bootstrap`

In [4]:
# tune oversample technique
param_grid = {
    'oversample': ['none', 'smote', 'adasyn', 'borderline'],
    'n_estimators': [200, 500]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_random_forest, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.04062695205627563.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.10706514684679477.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 compl

KeyboardInterrupt: 

In [7]:
# tune number of estimators
param_grid = {
    'oversample': ['adasyn'],
    'n_estimators': [int(x) for x in np.linspace(100, 1000, num=10)]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_random_forest, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.13341770297580113.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.12792432946384147.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 compl

{'n_estimators': 100, 'oversample': 'adasyn'}

In [8]:
# tune max depth
param_grid = {
    'oversample': ['adasyn'],
    'n_estimators': [100],
    'max_depth': [int(x) for x in np.linspace(2, 100, num=20)]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_random_forest, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.20917984605594048.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.13668945981517083.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 compl

Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 18: 0.1224830157068788.

Starting parameters 19...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 19: 0.09911123039109312.



{'max_depth': 2, 'n_estimators': 100, 'oversample': 'adasyn'}

In [10]:
# tune minimum number of samples to split a node and leaf
param_grid = {
    'oversample': ['adasyn'],
    'n_estimators': [100],
    'max_depth': [2],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_random_forest, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.2022010808700218.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.18890667667574768.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'max_depth': 2,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 100,
 'oversample': 'adasyn'}

In [11]:
# tune bootstrap
param_grid = {
    'oversample': ['adasyn'],
    'n_estimators': [100],
    'max_depth': [2],
    'min_samples_split': [2],
    'min_samples_leaf': [1],
    'bootstrap': [True, False]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_random_forest, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.20385606959092706.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.20498593591528347.



{'bootstrap': False,
 'max_depth': 2,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'n_estimators': 100,
 'oversample': 'adasyn'}

In [14]:
params = {
    'bootstrap': False,
    'max_depth': 2,
    'min_samples_leaf': 1,
    'min_samples_split': 2,
    'n_estimators': 100,
    'oversample': 'adasyn'
}

predicted = train_random_forest(train_df, test_df, TASK_TO_RUN, **params)
accuracy, sensitivity, specificity, kappa = get_metrics(predicted, convert_labels(test_df[TASK_TO_RUN]))
accuracy, sensitivity, specificity, kappa

(0.66996699669967, 0.656, 0.7358490566037735, 0.25523547340477826)

## XGBoost

In [5]:
# tune oversampling
param_grid = {
    'oversample': ['none', 'smote', 'adasyn', 'borderline'],
    'n_estimators': [200, 500],
    'learning_rate': [0.1],
    'max_depth': [5],
    'min_child_weight': [1],
    'gamma': [0],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.044838674772856704.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...


KeyboardInterrupt: 

In [17]:
# tune number of estimators
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [int(x) for x in np.linspace(100, 500, num=10)],
    'learning_rate': [0.1],
    'max_depth': [5],
    'min_child_weight': [1],
    'gamma': [0],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.1332436984377238.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.17573551563595052.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'colsample_bytree': 0.8,
 'gamma': 0,
 'learning_rate': 0.1,
 'max_depth': 5,
 'min_child_weight': 1,
 'n_estimators': 322,
 'oversample': 'borderline',
 'scale_pos_weight': 1,
 'subsample': 0.8}

In [18]:
# tune depth and weight
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': range(2,10,2),
    'min_child_weight': range(1,6,2),
    'gamma': [0],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.16817914589442104.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.1793521161458721.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'colsample_bytree': 0.8,
 'gamma': 0,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 5,
 'n_estimators': 322,
 'oversample': 'borderline',
 'scale_pos_weight': 1,
 'subsample': 0.8}

In [20]:
# tune minimum child weight
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': [2],
    'min_child_weight': range(5,20,2),
    'gamma': [0],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.1645762486599897.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.1834455292199334.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 complet

{'colsample_bytree': 0.8,
 'gamma': 0,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'scale_pos_weight': 1,
 'subsample': 0.8}

In [21]:
# tune gamma
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': [2],
    'min_child_weight': [13],
    'gamma': [i/10 for i in range(5)],
    'subsample': [0.8],
    'colsample_bytree': [0.8],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.11453419064394903.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.18229823372190862.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 compl

{'colsample_bytree': 0.8,
 'gamma': 0.1,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'scale_pos_weight': 1,
 'subsample': 0.8}

In [23]:
# tune subsample and colsample
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': [2],
    'min_child_weight': [13],
    'gamma': [0.1],
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.1412499189776677.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.16454171109878163.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'colsample_bytree': 0.8,
 'gamma': 0.1,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'scale_pos_weight': 1,
 'subsample': 0.7}

In [24]:
# tune subsample and colsample
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': [2],
    'min_child_weight': [13],
    'gamma': [0.1],
    'subsample':[i/100 for i in range(60,80,5)],
    'colsample_bytree':[i/100 for i in range(70,90,5)],
    'scale_pos_weight': [1]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.1089182173551975.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.15950843569253667.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'colsample_bytree': 0.8,
 'gamma': 0.1,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'scale_pos_weight': 1,
 'subsample': 0.7}

In [25]:
# tune regularization
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': [2],
    'min_child_weight': [13],
    'gamma': [0.1],
    'subsample':[0.7],
    'colsample_bytree':[0.8],
    'scale_pos_weight': [1],
    'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.1331789730048561.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.12740602972013743.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'colsample_bytree': 0.8,
 'gamma': 0.1,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'reg_alpha': 1,
 'scale_pos_weight': 1,
 'subsample': 0.7}

In [26]:
# tune regularization
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.1],
    'max_depth': [2],
    'min_child_weight': [13],
    'gamma': [0.1],
    'subsample':[0.7],
    'colsample_bytree':[0.8],
    'scale_pos_weight': [1],
    'reg_alpha': [0.3, 0.5, 0.8, 1, 3, 5, 8]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.16900459216205882.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.19942025042218314.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 compl

{'colsample_bytree': 0.8,
 'gamma': 0.1,
 'learning_rate': 0.1,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'reg_alpha': 0.5,
 'scale_pos_weight': 1,
 'subsample': 0.7}

In [27]:
# lower learning rate
param_grid = {
    'oversample': ['borderline'],
    'n_estimators': [322],
    'learning_rate': [0.01],
    'max_depth': [2],
    'min_child_weight': [13],
    'gamma': [0.1],
    'subsample':[0.7],
    'colsample_bytree':[0.8],
    'scale_pos_weight': [1],
    'reg_alpha': [0.5]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_xgboost, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.17887555863145313.



{'colsample_bytree': 0.8,
 'gamma': 0.1,
 'learning_rate': 0.01,
 'max_depth': 2,
 'min_child_weight': 13,
 'n_estimators': 322,
 'oversample': 'borderline',
 'reg_alpha': 0.5,
 'scale_pos_weight': 1,
 'subsample': 0.7}

In [28]:
params = {
'colsample_bytree': 0.8,
'gamma': 0.1,
'learning_rate': 0.01,
'max_depth': 2,
'min_child_weight': 13,
'n_estimators': 322,
'oversample': 'borderline',
'reg_alpha': 0.5,
'scale_pos_weight': 1,
'subsample': 0.7
}

predicted = train_xgboost(train_df, test_df, TASK_TO_RUN, **params)
accuracy, sensitivity, specificity, kappa = get_metrics(predicted, convert_labels(test_df[TASK_TO_RUN]))
accuracy, sensitivity, specificity, kappa

(0.7128712871287128, 0.76, 0.49056603773584906, 0.20120602405987692)

## DNN

In [31]:
train_df.drop([TASK_TO_RUN], axis=1).columns.tolist(),

(['CHILD_SEX',
  'IDD_SCORE',
  'AGE',
  'HHID_count',
  'HH_AGE',
  'FOOD_EXPENSE_WEEKLY',
  'NON-FOOD_EXPENSE_WEEKLY',
  'HDD_SCORE',
  'FOOD_INSECURITY',
  'YoungBoys',
  'YoungGirls',
  'AverageMonthlyIncome',
  'BEN_4PS',
  'AREA_TYPE',
  'FOOD_EXPENSE_WEEKLY_pc',
  'NON-FOOD_EXPENSE_WEEKLY_pc',
  'AverageMonthlyIncome_pc'],)

In [33]:
# lower learning rate
param_grid = {
    'oversample': ['none', 'smote', 'adasyn', 'borderline'],
    'features': [train_df.drop([TASK_TO_RUN], axis=1).columns.tolist()],
    'layers': [[16, 7, 2]]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_dnn, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Fold 1 completed.

Starting fold 2...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30


Epoch 13/30
Fold 2 completed.

Starting fold 3...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30


Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Fold 3 completed.

Starting fold 4...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30


Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Fold 4 completed.

Starting fold 5...
Epoch 1/30
Epoch 2/30


Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Fold 5 completed.

Starting fold 6...
Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Fold 6 completed.

Starting fold 7...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Fold 7 completed.

Starting fold 8...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30


Epoch 28/30
Epoch 29/30
Epoch 30/30
Fold 8 completed.

Starting fold 9...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Fold 9 completed.

Starting fold 10...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30


Epoch 29/30
Epoch 30/30
Fold 10 completed.

Completed parameters 0: 0.023529411764705917.

Starting parameters 1...
Starting fold 1...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30


Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Fold 1 completed.

Starting fold 2...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30


Epoch 9/30
Fold 2 completed.

Starting fold 3...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30


Epoch 20/30
Epoch 21/30
Epoch 22/30
Fold 3 completed.

Starting fold 4...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Fold 4 completed.

Starting fold 5...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
Fold 5 completed.

Starting fold 6...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Fold 6 completed.

Starting fold 7...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Fold 7 completed.

Starting fold 8...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Fold 8 completed.

Starting fold 9...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


Epoch 8/30
Epoch 9/30
Fold 9 completed.

Starting fold 10...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30


Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Fold 10 completed.

Completed parameters 1: -0.01103311953251251.

Starting parameters 2...
Starting fold 1...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30


Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Fold 1 completed.

Starting fold 2...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


Epoch 6/30
Fold 2 completed.

Starting fold 3...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Fold 3 completed.

Starting fold 4...
Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Fold 4 completed.

Starting fold 5...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Fold 5 completed.

Starting fold 6...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


Epoch 7/30
Epoch 8/30
Epoch 9/30
Fold 6 completed.

Starting fold 7...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


Epoch 18/30
Epoch 19/30
Epoch 20/30
Fold 7 completed.

Starting fold 8...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Fold 8 completed.

Starting fold 9...
Epoch 1/30


Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Fold 9 completed.

Starting fold 10...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Fold 10 completed.

Completed parameters 2: -0.033967511958754595.

Starting parameters 3...
Starting fold 1...
Epoch 1/30
Epoch 2/30


Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Fold 1 completed.

Starting fold 2...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Fold 2 completed.

Starting fold 3...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30


Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Fold 3 completed.

Starting fold 4...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30


Epoch 11/30
Epoch 12/30
Epoch 13/30
Fold 4 completed.

Starting fold 5...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Fold 5 completed.

Starting fold 6...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Fold 6 completed.

Starting fold 7...


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Fold 7 completed.

Starting fold 8...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Fold 8 completed.

Starting fold 9...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30


Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Fold 9 completed.

Starting fold 10...
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Fold 10 completed.

Completed parameters 3: -0.001622608585266673.



{'features': ['CHILD_SEX',
  'IDD_SCORE',
  'AGE',
  'HHID_count',
  'HH_AGE',
  'FOOD_EXPENSE_WEEKLY',
  'NON-FOOD_EXPENSE_WEEKLY',
  'HDD_SCORE',
  'FOOD_INSECURITY',
  'YoungBoys',
  'YoungGirls',
  'AverageMonthlyIncome',
  'BEN_4PS',
  'AREA_TYPE',
  'FOOD_EXPENSE_WEEKLY_pc',
  'NON-FOOD_EXPENSE_WEEKLY_pc',
  'AverageMonthlyIncome_pc'],
 'layers': [16, 7, 2],
 'oversample': 'none'}

In [35]:
params = {
    'oversample': 'none',
    'features': train_df.drop([TASK_TO_RUN], axis=1).columns.tolist(),
    'layers': [16, 7, 2]
}

predicted = train_dnn(train_df, test_df, TASK_TO_RUN, **params)
accuracy, sensitivity, specificity, kappa = get_metrics(predicted, convert_labels(test_df[TASK_TO_RUN]))
accuracy, sensitivity, specificity, kappa

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


(0.8250825082508251, 1.0, 0.0, 0.0)

## NNRF

In [15]:
from nnrf import ml

In [18]:
help(ml.loss)

Help on module nnrf.ml.loss in nnrf.ml:

NAME
    nnrf.ml.loss

CLASSES
    abc.ABC(builtins.object)
        LossFunction(nnrf.utils._base.Base, abc.ABC)
            CrossEntropy
            Hinge
            Huber
            MAE
            MSE
    nnrf.utils._base.Base(builtins.object)
        LossFunction(nnrf.utils._base.Base, abc.ABC)
            CrossEntropy
            Hinge
            Huber
            MAE
            MSE
    
    class CrossEntropy(LossFunction)
     |  Cross Entropy Loss.
     |  
     |  Method resolution order:
     |      CrossEntropy
     |      LossFunction
     |      nnrf.utils._base.Base
     |      abc.ABC
     |      builtins.object
     |  
     |  Methods defined here:
     |  
     |  __init__(self)
     |      Initialize self.  See help(type(self)) for accurate signature.
     |  
     |  gradient(self, Y_hat, Y, axis=1)
     |      Derivative of loss/error between labels `Y_hat` and targets `Y`.
     |      
     |      Parameters
     |     

In [7]:
# tune
param_grid = {
    'oversample': ['none', 'smote', 'borderline', 'adasyn'],
    'n': [5, 10, 20],
    'd': [2],
    'learning_rate': [0.1],
    'reg_factor': [1],
    'to_normalize': [True]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.0.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.0.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...


KeyboardInterrupt: 

In [5]:
# tune
param_grid = {
    'oversample': ['adasyn'],
    'n': [20],
    'd': [2],
    'learning_rate': [0.1],
    'reg_factor': [1e-5, 1e-2, 0.1, 1, 10, 100]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.04243196116926238.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: -0.001798623855220144.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 com

{'d': 2,
 'learning_rate': 0.1,
 'n': 20,
 'oversample': 'adasyn',
 'reg_factor': 1e-05}

In [6]:
# tune
param_grid = {
    'oversample': ['adasyn'],
    'n': [20],
    'd': [2],
    'learning_rate': [0.1],
    'reg_factor': [1e-10, 1e-8, 1e-5, 1e-3]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.059913809083530487.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.020849499321348202.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 com

{'d': 2,
 'learning_rate': 0.1,
 'n': 20,
 'oversample': 'adasyn',
 'reg_factor': 0.001}

In [7]:
# tune
param_grid = {
    'oversample': ['adasyn'],
    'n': [20],
    'd': [2],
    'learning_rate': [0.1],
    'reg_factor': [5e-4, 1e-3, 3e-3]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.00030362880891083855.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.051477462785325015.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 c

{'d': 2,
 'learning_rate': 0.1,
 'n': 20,
 'oversample': 'adasyn',
 'reg_factor': 0.001}

In [8]:
# tune
param_grid = {
    'oversample': ['adasyn'],
    'n': [20, 50, 100],
    'd': [2],
    'learning_rate': [0.1],
    'reg_factor': [1e-3]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.03490711683728204.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.0725851499574243.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'d': 2,
 'learning_rate': 0.1,
 'n': 100,
 'oversample': 'adasyn',
 'reg_factor': 0.001}

In [9]:
# tune
param_grid = {
    'oversample': ['adasyn', 'smote', 'borderline', 'none'],
    'n': [100],
    'd': [2],
    'learning_rate': [0.1],
    'reg_factor': [1e-3]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.0631783749547793.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.04597710705811423.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comple

{'d': 2,
 'learning_rate': 0.1,
 'n': 100,
 'oversample': 'adasyn',
 'reg_factor': 0.001}

In [10]:
# tune
param_grid = {
    'oversample': ['adasyn'],
    'n': [100],
    'd': [2],
    'learning_rate': [1e-3, 0.01, 0.03, 0.1],
    'reg_factor': [1e-3]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: 0.014285714285714285.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: -0.007593069137073255.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 co

{'d': 2,
 'learning_rate': 0.1,
 'n': 100,
 'oversample': 'adasyn',
 'reg_factor': 0.001}

In [20]:
# tune
param_grid = {
    'oversample': ['adasyn'],
    'n': [100],
    'd': [2],
    'r': [2, 5, 10],
    'learning_rate': [0.1],
    'reg_factor': [1e-3]
}

best_params = tune_model(train_df, TASK_TO_RUN, NUM_FOLDS, train_nnrf, param_grid)
best_params

Starting parameters 0...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 0: -0.0022293239460631858.

Starting parameters 1...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 completed.

Starting fold 3...
Fold 3 completed.

Starting fold 4...
Fold 4 completed.

Starting fold 5...
Fold 5 completed.

Starting fold 6...
Fold 6 completed.

Starting fold 7...
Fold 7 completed.

Starting fold 8...
Fold 8 completed.

Starting fold 9...
Fold 9 completed.

Starting fold 10...
Fold 10 completed.

Completed parameters 1: 0.066754121340761.

Starting parameters 2...
Starting fold 1...
Fold 1 completed.

Starting fold 2...
Fold 2 comp

{'d': 2,
 'learning_rate': 0.1,
 'n': 100,
 'oversample': 'adasyn',
 'r': 5,
 'reg_factor': 0.001}

In [21]:
params = {'d': 2, 'n': 500, 'r': 5,  'oversample': 'adasyn', 'learning_rate': 0.01, 'reg_factor': 1e-3}

predicted = train_nnrf(train_df, test_df, TASK_TO_RUN, **params)
accuracy, sensitivity, specificity, kappa = get_metrics(predicted, convert_labels(test_df[TASK_TO_RUN]))
accuracy, sensitivity, specificity, kappa

(0.7227722772277227, 0.82, 0.2641509433962264, 0.08055776316740122)