In [1]:
from openml import tasks

from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_classif

import numpy as np

from nds import ndomsort

from classes import Dataset, GroupStructure, WeightClipper, Prob
from functions import run_eagga_cv, generate_offspring

In [2]:
oml_task_diabetes = tasks.get_task(37)

In [3]:
X, y, categorical_indicator, attribute_names = oml_task_diabetes.get_dataset().get_data()

In [4]:
X

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,tested_positive
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,tested_negative
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,tested_positive
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,tested_negative
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,tested_positive
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,tested_negative
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,tested_negative
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,tested_negative
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,tested_positive


In [5]:
Xy = X.copy()

In [6]:
categorical_indicator

[False, False, False, False, False, False, False, False, True]

In [7]:
feats_selected = GroupStructure.detector_features(Xy, categorical_indicator)
#feats_selected = {0,1,2,3,4,5,6,7}  # TODO: remove
print(feats_selected)

[7, 4]


In [8]:
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
poly.feature_names_in_=Xy.iloc[:, [*feats_selected]].columns
interaction_terms = poly.fit_transform(
    X=Xy.iloc[:, [*feats_selected]],
    y=Xy.loc[:, 'class']
)
print(poly.get_feature_names_out())
print(interaction_terms)

['age' 'insu' 'age insu']
[[  50.    0.    0.]
 [  31.    0.    0.]
 [  32.    0.    0.]
 ...
 [  30.  112. 3360.]
 [  47.    0.    0.]
 [  23.    0.    0.]]


In [9]:
population_interactions = GroupStructure.detector_interactions(Xy, feats_selected)
population_interactions

[[4, 7]]

In [10]:
population = GroupStructure.detector_monotonicity(Xy, population_interactions)
population

([[[4, 7], 0]], [1, 1, 1, 1, 1, 1, 1, 1])

In [11]:
gs_1 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, -1],
    [0, 1],
    [[2, 5], 1],
    [[4], 0],
    [[7, 3, 6], 1]
)

gs_2 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, -1, -1],
    [0],
    [[1, 2, 3, 4, 5], 1],
    [[6, 7], 1]
)

gs_3 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [0, 1, 2, 3, 4, 5, 6, 7]
)

gs_4 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [],
    [[0, 1, 2, 3, 4, 5, 6, 7], 1]
)

tmp = Dataset(
    X=Xy.loc[:, Xy.columns != 'class'],
    y=Xy.loc[:, 'class'],
    class_pos='tested_positive',
    group_structure=gs_1
)
len(tmp)
tmp[2]

gs_1.get_unconstrained_features()

print(gs_1.get_feature_signs()[0])
print(gs_1.get_feature_signs()[2])
print(gs_1.get_feature_signs()[6])

population = [
    {'total_layers': 3, 'nodes_per_hidden_layer': 14, 'group_structure': gs_1, 'metrics': {'mean': (0.75, 0.15, 0.5, 0.4)}},
    {'total_layers': 5, 'nodes_per_hidden_layer': 5, 'group_structure': gs_2, 'metrics': {'mean': (0.8, 0.15, 0.5, 0.4)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_3, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_4, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}}
]
ranks_nds = ndomsort.non_domin_sort(
    [individual['metrics']['mean'] for individual in population],
    get_objectives=lambda elem: (1 - elem[0], *[elem[i] for i in range(1, len(elem))]),
    only_front_indices=True
)
print(f'ranks_nds: {ranks_nds}')
hp_bounds = {
    'total_layers': (3, 10),
    'nodes_per_hidden_layer': (3, 20)
}

os_1, os_2 = generate_offspring(2, population[:2], ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_1, gs_2)
print('os_1', os_1, os_1['group_structure'])
print('os_2', os_2, os_2['group_structure'])

os_3, os_4 = generate_offspring(2, population[2:], ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_3, gs_4)
print('os_3', os_3, os_3['group_structure'])
print('os_4', os_4, os_4['group_structure'])


1
1
1
ranks_nds: (1, 0, 0, 0)
os_1 {'total_layers': 6, 'nodes_per_hidden_layer': 4, 'group_structure': <classes.GroupStructure object at 0x0000007781911040>} ([2, 4, 5], [[[1, 6], 1], [[3, 0, 7], 0]])
os_2 {'total_layers': 5, 'nodes_per_hidden_layer': 5, 'group_structure': <classes.GroupStructure object at 0x00000077819DE580>} ([4, 6, 7, 0], [[[1, 2, 3, 5], 1], [[], 1]])
os_3 {'total_layers': 7, 'nodes_per_hidden_layer': 5, 'group_structure': <classes.GroupStructure object at 0x0000007781AC01F0>} ([1, 3, 5], [[[0, 2, 4, 6, 7], -1]])
os_4 {'total_layers': 8, 'nodes_per_hidden_layer': 3, 'group_structure': <classes.GroupStructure object at 0x000000778197E8D0>} ([0, 1, 3, 4], [[[2, 5, 6, 7], 1]])


In [12]:
# outer split, holdout
data_train_val, data_test = train_test_split(
    Xy,
    train_size=2/3,
    shuffle=True,
    stratify=Xy.loc[:, 'class']
)

# reset indices as StratifiedKFold assumes consecutive index
data_train_val = data_train_val.reset_index(drop=True)
data_test = data_test.reset_index(drop=True)

'''
- for each individual in the configuration, run k (inner) folds + average its performance
    -> in each fold, additionally split the training data, train on larger split, use smaller split to determine early stopping
    -> average early stopping epoch over all folds, report back with average performance
- find pareto front
- evaluate pareto front's performance on holdout test set (data_val), each model of the front is trained for the average of the epochs determined by early stopping in CV
'''
cv_k = 5
mu = 3  # TODO: set to 100
la = 2  # TODO: set to 10
patience = 2  # TODO: set higher, e.g. 10
secs_per_fold = 15
secs_total = secs_per_fold * cv_k * la  # also breaks mid-generation, will only finish current individual in case of breach
monotonicity_clipper = WeightClipper(0, None)  # enforce monotonicity by clipping weights to [0, infty) after each epoch (in def train)
run_eagga_cv(mu, la, cv_k, data_train_val, categorical_indicator, epochs=10, batch_size=8, patience=patience, weight_clipper=monotonicity_clipper, secs_per_fold=secs_per_fold, secs_total=secs_total)

initial population
total layers 3, nodes_per_hidden_layer 3, gs: ([0, 2, 3, 4, 5, 6, 7], [[[1], 0]])
total layers 4, nodes_per_hidden_layer 4, gs: ([0, 1, 2, 3, 5, 6, 7], [[[4], 0]])
total layers 3, nodes_per_hidden_layer 4, gs: ([0, 1, 2, 3, 6, 7], [[[4, 5], 0]])
start EA at 2025-02-25T21:45:24.370640
Evolution 1, evaluate 3 individuals
running HPO for individual 1/3: 3 total_layers, 3 nodes per hidden layer
fold 1/5 | (0.7991293532338309, 0.125, 0.0, 0.125) 172
fold 2/5 | (0.7914593698175788, 0.125, 0.0, 0.125) 181
fold 3/5 | stop early: 0.6556266275319187 < 0.6557675274935636, optimal epoch 69 | (0.1811868686868687, 0.125, 0.0, 0.125) 69
fold 4/5 | stop early: 0.6344185010953383 < 0.6348534443161704, optimal epoch 75 | (0.4861111111111111, 0.125, 0.0, 0.125) 75
fold 5/5 | stop early: 0.6549730924042788 < 0.6550161621787332, optimal epoch 1 | (0.4925373134328358, 0.125, 0.0, 0.125) 1
running HPO for individual 2/3: 4 total_layers, 4 nodes per hidden layer
fold 1/5 | stop early: 0.635