In [1]:
from openml import tasks

from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.feature_selection import mutual_info_classif

import numpy as np

from nds import ndomsort

from classes import Dataset, GroupStructure, WeightClipper, Prob
from functions import run_eagga_cv, generate_offspring

In [2]:
oml_task_diabetes = tasks.get_task(37)

In [3]:
X, y, categorical_indicator, attribute_names = oml_task_diabetes.get_dataset().get_data()

In [4]:
X

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,tested_positive
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,tested_negative
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,tested_positive
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,tested_negative
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,tested_positive
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,tested_negative
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,tested_negative
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,tested_negative
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,tested_positive


In [5]:
Xy = X.copy()

In [6]:
categorical_indicator

[False, False, False, False, False, False, False, False, True]

In [7]:
feats_selected = GroupStructure.detector_features(Xy, categorical_indicator)
#feats_selected = {0,1,2,3,4,5,6,7}  # TODO: remove
print(feats_selected)

[2]


In [8]:
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
poly.feature_names_in_=Xy.iloc[:, [*feats_selected]].columns
interaction_terms = poly.fit_transform(
    X=Xy.iloc[:, [*feats_selected]],
    y=Xy.loc[:, 'class']
)
print(poly.get_feature_names_out())
print(interaction_terms)

['pres']
[[ 72.]
 [ 66.]
 [ 64.]
 [ 66.]
 [ 40.]
 [ 74.]
 [ 50.]
 [  0.]
 [ 70.]
 [ 96.]
 [ 92.]
 [ 74.]
 [ 80.]
 [ 60.]
 [ 72.]
 [  0.]
 [ 84.]
 [ 74.]
 [ 30.]
 [ 70.]
 [ 88.]
 [ 84.]
 [ 90.]
 [ 80.]
 [ 94.]
 [ 70.]
 [ 76.]
 [ 66.]
 [ 82.]
 [ 92.]
 [ 75.]
 [ 76.]
 [ 58.]
 [ 92.]
 [ 78.]
 [ 60.]
 [ 76.]
 [ 76.]
 [ 68.]
 [ 72.]
 [ 64.]
 [ 84.]
 [ 92.]
 [110.]
 [ 64.]
 [ 66.]
 [ 56.]
 [ 70.]
 [ 66.]
 [  0.]
 [ 80.]
 [ 50.]
 [ 66.]
 [ 90.]
 [ 66.]
 [ 50.]
 [ 68.]
 [ 88.]
 [ 82.]
 [ 64.]
 [  0.]
 [ 72.]
 [ 62.]
 [ 58.]
 [ 66.]
 [ 74.]
 [ 88.]
 [ 92.]
 [ 66.]
 [ 85.]
 [ 66.]
 [ 64.]
 [ 90.]
 [ 86.]
 [ 75.]
 [ 48.]
 [ 78.]
 [ 72.]
 [  0.]
 [ 66.]
 [ 44.]
 [  0.]
 [ 78.]
 [ 65.]
 [108.]
 [ 74.]
 [ 72.]
 [ 68.]
 [ 70.]
 [ 68.]
 [ 55.]
 [ 80.]
 [ 78.]
 [ 72.]
 [ 82.]
 [ 72.]
 [ 62.]
 [ 48.]
 [ 50.]
 [ 90.]
 [ 72.]
 [ 60.]
 [ 96.]
 [ 72.]
 [ 65.]
 [ 56.]
 [122.]
 [ 58.]
 [ 58.]
 [ 85.]
 [ 72.]
 [ 62.]
 [ 76.]
 [ 62.]
 [ 54.]
 [ 92.]
 [ 74.]
 [ 48.]
 [ 60.]
 [ 76.]
 [ 76.]
 [ 64.]
 [ 74.]
 [ 80.]

In [9]:
population_interactions = GroupStructure.detector_interactions(Xy, feats_selected)
population_interactions

[[2]]

In [10]:
population = GroupStructure.detector_monotonicity(Xy, population_interactions)
population

([[[2], 0]], [1, 1, 1, 1, 1, 1, 1, 1])

In [11]:
gs_1 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, -1],
    [0, 1],
    [[2, 5], 1],
    [[4], 0],
    [[7, 3, 6], 1]
)

gs_2 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, -1, -1],
    [0],
    [[1, 2, 3, 4, 5], 1],
    [[6, 7], 1]
)

gs_3 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [0, 1, 2, 3, 4, 5, 6, 7]
)

gs_4 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [],
    [[0, 1, 2, 3, 4, 5, 6, 7], 1]
)

tmp = Dataset(
    X=Xy.loc[:, Xy.columns != 'class'],
    y=Xy.loc[:, 'class'],
    class_pos='tested_positive',
    group_structure=gs_1
)
len(tmp)
tmp[2]

gs_1.get_unconstrained_features()

print(gs_1.get_feature_signs()[0])
print(gs_1.get_feature_signs()[2])
print(gs_1.get_feature_signs()[6])

population = [
    {'total_layers': 3, 'nodes_per_hidden_layer': 14, 'group_structure': gs_1, 'metrics': {'mean': (0.75, 0.15, 0.5, 0.4)}},
    {'total_layers': 5, 'nodes_per_hidden_layer': 5, 'group_structure': gs_2, 'metrics': {'mean': (0.8, 0.15, 0.5, 0.4)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_3, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_4, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}}
]
ranks_nds = ndomsort.non_domin_sort(
    [individual['metrics']['mean'] for individual in population],
    get_objectives=lambda elem: (1 - elem[0], *[elem[i] for i in range(1, len(elem))]),
    only_front_indices=True
)
print(f'ranks_nds: {ranks_nds}')
hp_bounds = {
    'total_layers': (3, 10),
    'nodes_per_hidden_layer': (3, 20)
}

os_1, os_2 = generate_offspring(2, population[:2], ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_1, gs_2)
print('os_1', os_1, os_1['group_structure'])
print('os_2', os_2, os_2['group_structure'])

os_3, os_4 = generate_offspring(2, population[2:], ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_3, gs_4)
print('os_3', os_3, os_3['group_structure'])
print('os_4', os_4, os_4['group_structure'])


1
1
1
ranks_nds: (1, 0, 0, 0)
os_1 {'total_layers': 6, 'nodes_per_hidden_layer': 4, 'group_structure': <classes.GroupStructure object at 0x00000065C577D150>} ([3, 5], [[[7, 1, 4, 0], 0], [[6, 2], -1]])
os_2 {'total_layers': 4, 'nodes_per_hidden_layer': 3, 'group_structure': <classes.GroupStructure object at 0x00000065C584E580>} ([1, 0], [[[3, 4, 5], -1], [[2, 6, 7], 1]])
os_3 {'total_layers': 7, 'nodes_per_hidden_layer': 4, 'group_structure': <classes.GroupStructure object at 0x00000065C5930390>} ([3], [[[0, 1, 2, 4, 5, 6, 7], 1]])
os_4 {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': <classes.GroupStructure object at 0x00000065C581A750>} ([2, 3, 4, 5, 7], [[[0, 1, 6], 0]])


In [12]:
# outer
data_train_test, data_val = train_test_split(
    Xy,
    train_size=2/3,
    shuffle=True,
    stratify=Xy.loc[:, 'class']
)

# reset indices as StratifiedKFold assumes consecutive index
data_train_test = data_train_test.reset_index(drop=True)
data_val = data_val.reset_index(drop=True)

# inner
cv_inner = StratifiedKFold(
    n_splits=5,
    shuffle=False  # TODO: set to True
)

'''
- for each individual in the configuration, run k folds + average its performance
    -> in each fold, additionally split the training data, train on larger split, use smaller split to determine early stopping
    -> average early stopping epoch over all folds, report back with average performance
- find pareto front
- evaluate pareto front's performance on holdout test set, each model of the front is trained for the average of the epochs determined by early stopping in CV
'''
mu = 3  # TODO: set to 100
la = 2  # TODO: set to 10
monotonicity_clipper = WeightClipper(0, None)  # enforce monotonicity by clipping weights to [0, infty) after each epoch (in def train)
run_eagga_cv(mu, la, cv_inner, data_train_test, categorical_indicator, epochs=10, batch_size=8, weight_clipper=monotonicity_clipper)

initial population
total layers 4, nodes_per_hidden_layer 3, gs: ([0, 1, 2, 5, 6, 7], [[[3, 4], 0]])
total layers 3, nodes_per_hidden_layer 3, gs: ([0, 2, 3, 4, 5, 6, 7], [[[1], 1]])
total layers 6, nodes_per_hidden_layer 3, gs: ([1, 2, 3, 4, 6, 7], [[[0, 5], 0]])
Evolution 1, evaluate 3 individuals
running HPO for individual 1/3: 4 total_layers, 3 nodes per hidden layer
fold 1/5 | (0.5296434494195689, 0.25, 0.03571428571428571, 0.25)
fold 2/5 | (0.47346600331674965, 0.25, 0.03571428571428571, 0.25)
fold 3/5 | (0.5, 0.25, 0.03571428571428571, 0.25)
fold 4/5 | (0.6574074074074074, 0.25, 0.03571428571428571, 0.25)
fold 5/5 | (0.5, 0.25, 0.03571428571428571, 0.25)
running HPO for individual 2/3: 3 total_layers, 3 nodes per hidden layer
fold 1/5 | (0.5, 0.125, 0.0, 0.0)
fold 2/5 | (0.8009950248756219, 0.125, 0.0, 0.0)
fold 3/5 | (0.19907407407407407, 0.125, 0.0, 0.0)
fold 4/5 | (0.26557239057239057, 0.125, 0.0, 0.0)
fold 5/5 | (0.2260127931769723, 0.125, 0.0, 0.0)
running HPO for individua