In [1]:
from openml import tasks

import torch

import sklearn
from sklearn.model_selection import train_test_split, StratifiedKFold

from nds import ndomsort

from classes import Dataset, GroupStructure, WeightClipper
from functions import run_eagga_cv, generate_offspring

In [2]:
oml_task_diabetes = tasks.get_task(37)

In [3]:
X, y, categorical_indicator, attribute_names = oml_task_diabetes.get_dataset().get_data()

In [4]:
X

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,tested_positive
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,tested_negative
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,tested_positive
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,tested_negative
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,tested_positive
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,tested_negative
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,tested_negative
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,tested_negative
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,tested_positive


In [5]:
Xy = X.copy()

In [6]:
gs_1 = GroupStructure(
    {0, 1, 2, 3, 4, 5, 6, 7},
    {0, 1},
    [[2, 5], 1],
    [[4], 0],
    [[7, 3, 6], 1]
)

gs_2 = GroupStructure(
    {0, 1, 2, 3, 4, 5, 6, 7},
    {0},
    [[1, 2, 3, 4, 5], 1],
    [[6, 7], -1]
)

tmp = Dataset(
    X=Xy.loc[:, Xy.columns != 'class'],
    y=Xy.loc[:, 'class'],
    class_pos='tested_positive',
    group_structure=gs_1
)
len(tmp)
tmp[2]

gs_1.get_unconstrained_features()

population = [
    {'total_layers': 3, 'nodes_per_hidden_layer': 14, 'group_structure': gs_1, 'metrics': {'mean': (0.75, 0.15, 0.5, 0.4)}},
    {'total_layers': 5, 'nodes_per_hidden_layer': 5, 'group_structure': gs_2, 'metrics': {'mean': (0.8, 0.15, 0.5, 0.4)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_2, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}}
]
ranks_nds = ndomsort.non_domin_sort(
    [individual['metrics']['mean'] for individual in population],
    get_objectives=lambda elem: (1 - elem[0], *[elem[i] for i in range(1, len(elem))]),
    only_front_indices=True
)
print(ranks_nds)
hp_bounds = {
    'total_layers': (3, 10),
    'nodes_per_hidden_layer': (3, 20)
}
gs_1, gs_2 = generate_offspring(2, population, ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_1, gs_2)
print(gs_1, gs_1['group_structure'])
print(gs_2, gs_2['group_structure'])

(1, 0, 0)
{'total_layers': 5, 'nodes_per_hidden_layer': 5, 'group_structure': <classes.GroupStructure object at 0x000000E1E7AB7820>} ({0, 1, 3, 6}, [[[2, 4, 5], 1], [[7], -1]])
{'total_layers': 5, 'nodes_per_hidden_layer': 6, 'group_structure': <classes.GroupStructure object at 0x000000E1E7639C70>} ({3}, [[[2, 4, 6, 0], 1], [[7, 1, 5], 0]])


In [7]:
categorical_indicator

[False, False, False, False, False, False, False, False, True]

In [8]:
# outer
data_train_test, data_val = train_test_split(
    Xy,
    train_size=2/3,
    shuffle=True,
    stratify=Xy.loc[:, 'class']
)

# reset indices as StratifiedKFold assumes consecutive index
data_train_test = data_train_test.reset_index(drop=True)
data_val = data_val.reset_index(drop=True)

# inner
cv_inner = StratifiedKFold(
    n_splits=5,
    shuffle=False  # TODO: set to True
)

'''
- for each individual in the configuration, run k folds + average its performance
    -> in each fold, additionally split the training data, train on larger split, use smaller split to determine early stopping
    -> average early stopping epoch over all folds, report back with average performance
- find pareto front
- evaluate pareto front's performance on holdout test set, each model of the front is trained for the average of the epochs determined by early stopping in CV
'''
mu = 3  # TODO: set to 100
la = 2  # TODO: set to 10
monotonicity_clipper = WeightClipper(0, None)  # enforce monotonicity by clipping weights to [0, infty) after each epoch (in def train)
run_eagga_cv(mu, la, cv_inner, data_train_test, epochs=10, batch_size=8, weight_clipper=monotonicity_clipper)

Evolution 1, evaluate 3 individuals
running HPO for individual 1/3: 3 total_layers, 9 nodes per hidden layer
fold 1/5
(0.599502487562189, 0.75, 0.14285714285714285, 0.125)
fold 2/5
(0.5870646766169154, 0.75, 0.14285714285714285, 0.125)
fold 3/5
(0.6148989898989898, 0.75, 0.14285714285714285, 0.125)
fold 4/5
(0.5425084175084176, 0.75, 0.14285714285714285, 0.125)
fold 5/5
(0.6204690831556503, 0.75, 0.14285714285714285, 0.125)
running HPO for individual 2/3: 5 total_layers, 8 nodes per hidden layer
fold 1/5
(0.5182421227197347, 0.75, 0.14285714285714285, 0.125)
fold 2/5
(0.5169983416252073, 0.75, 0.14285714285714285, 0.125)
fold 3/5
(0.5963804713804713, 0.75, 0.14285714285714285, 0.125)
fold 4/5
(0.5420875420875421, 0.75, 0.14285714285714285, 0.125)
fold 5/5
(0.5837953091684436, 0.75, 0.14285714285714285, 0.125)
running HPO for individual 3/3: 3 total_layers, 8 nodes per hidden layer
fold 1/5
(0.5393864013266998, 0.75, 0.14285714285714285, 0.125)
fold 2/5
(0.5646766169154228, 0.75, 0.1428