In [1]:
from openml import tasks

from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.feature_selection import mutual_info_classif

import numpy as np

from nds import ndomsort

from classes import Dataset, GroupStructure, WeightClipper, Prob
from functions import run_eagga_cv, generate_offspring

In [2]:
oml_task_diabetes = tasks.get_task(37)

In [3]:
X, y, categorical_indicator, attribute_names = oml_task_diabetes.get_dataset().get_data()

In [4]:
X

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,tested_positive
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,tested_negative
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,tested_positive
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,tested_negative
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,tested_positive
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,tested_negative
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,tested_negative
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,tested_negative
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,tested_positive


In [5]:
Xy = X.copy()

In [6]:
categorical_indicator

[False, False, False, False, False, False, False, False, True]

In [7]:
feats_selected = GroupStructure.detector_features(Xy, categorical_indicator)
#feats_selected = {0,1,2,3,4,5,6,7}  # TODO: remove
print(feats_selected)

[1, 5, 6, 4, 7, 0, 2]


In [8]:
poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False)
poly.feature_names_in_=Xy.iloc[:, [*feats_selected]].columns
interaction_terms = poly.fit_transform(
    X=Xy.iloc[:, [*feats_selected]],
    y=Xy.loc[:, 'class']
)
print(poly.get_feature_names_out())
print(interaction_terms)

['plas' 'mass' 'pedi' 'insu' 'age' 'preg' 'pres' 'plas mass' 'plas pedi'
 'plas insu' 'plas age' 'plas preg' 'plas pres' 'mass pedi' 'mass insu'
 'mass age' 'mass preg' 'mass pres' 'pedi insu' 'pedi age' 'pedi preg'
 'pedi pres' 'insu age' 'insu preg' 'insu pres' 'age preg' 'age pres'
 'preg pres']
[[1.480e+02 3.360e+01 6.270e-01 ... 3.000e+02 3.600e+03 4.320e+02]
 [8.500e+01 2.660e+01 3.510e-01 ... 3.100e+01 2.046e+03 6.600e+01]
 [1.830e+02 2.330e+01 6.720e-01 ... 2.560e+02 2.048e+03 5.120e+02]
 ...
 [1.210e+02 2.620e+01 2.450e-01 ... 1.500e+02 2.160e+03 3.600e+02]
 [1.260e+02 3.010e+01 3.490e-01 ... 4.700e+01 2.820e+03 6.000e+01]
 [9.300e+01 3.040e+01 3.150e-01 ... 2.300e+01 1.610e+03 7.000e+01]]


In [9]:
population_interactions = GroupStructure.detector_interactions(Xy, feats_selected)
population_interactions

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

[[1, 2], [5], [6], [4], [7], [0]]

In [10]:
population = GroupStructure.detector_monotonicity(Xy, population_interactions)
population

([[[1, 2], 1], [[5], 0], [[6], 1], [[4], 0], [[7], 1], [[0], 1]],
 [1, 1, 1, 1, 1, 1, 1, 1])

In [11]:
gs_1 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, -1],
    [0, 1],
    [[2, 5], 1],
    [[4], 0],
    [[7, 3, 6], 1]
)

gs_2 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, -1, -1],
    [0],
    [[1, 2, 3, 4, 5], 1],
    [[6, 7], 1]
)

gs_3 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [0, 1, 2, 3, 4, 5, 6, 7]
)

gs_4 = GroupStructure(
    [0, 1, 2, 3, 4, 5, 6, 7],
    [1, 1, 1, 1, 1, 1, 1, 1],
    [],
    [[0, 1, 2, 3, 4, 5, 6, 7], 1]
)

tmp = Dataset(
    X=Xy.loc[:, Xy.columns != 'class'],
    y=Xy.loc[:, 'class'],
    class_pos='tested_positive',
    group_structure=gs_1
)
len(tmp)
tmp[2]

gs_1.get_unconstrained_features()

print(gs_1.get_feature_signs()[0])
print(gs_1.get_feature_signs()[2])
print(gs_1.get_feature_signs()[6])

population = [
    {'total_layers': 3, 'nodes_per_hidden_layer': 14, 'group_structure': gs_1, 'metrics': {'mean': (0.75, 0.15, 0.5, 0.4)}},
    {'total_layers': 5, 'nodes_per_hidden_layer': 5, 'group_structure': gs_2, 'metrics': {'mean': (0.8, 0.15, 0.5, 0.4)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_3, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}},
    {'total_layers': 7, 'nodes_per_hidden_layer': 3, 'group_structure': gs_4, 'metrics': {'mean': (0.6, 0.27, 0.4, 0.6)}}
]
ranks_nds = ndomsort.non_domin_sort(
    [individual['metrics']['mean'] for individual in population],
    get_objectives=lambda elem: (1 - elem[0], *[elem[i] for i in range(1, len(elem))]),
    only_front_indices=True
)
print(f'ranks_nds: {ranks_nds}')
hp_bounds = {
    'total_layers': (3, 10),
    'nodes_per_hidden_layer': (3, 20)
}

os_1, os_2 = generate_offspring(2, population[:2], ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_1, gs_2)
print('os_1', os_1, os_1['group_structure'])
print('os_2', os_2, os_2['group_structure'])

os_3, os_4 = generate_offspring(2, population[2:], ranks_nds, hp_bounds)# GroupStructure.gga_crossover(gs_3, gs_4)
print('os_3', os_3, os_3['group_structure'])
print('os_4', os_4, os_4['group_structure'])


1
1
1
ranks_nds: (1, 0, 0, 0)
os_1 {'total_layers': 5, 'nodes_per_hidden_layer': 4, 'group_structure': <classes.GroupStructure object at 0x0000005222CD9150>} ([1, 2, 5], [[[], -1], [[3, 4, 0, 6, 7], 0]])
os_2 {'total_layers': 6, 'nodes_per_hidden_layer': 9, 'group_structure': <classes.GroupStructure object at 0x0000005222DAEBE0>} ([2, 3, 0], [[[1, 5], -1], [[4, 6, 7], 1]])
os_3 {'total_layers': 6, 'nodes_per_hidden_layer': 3, 'group_structure': <classes.GroupStructure object at 0x0000005226ED9300>} ([1, 2, 5, 7], [[[0, 3, 4, 6], -1]])
os_4 {'total_layers': 8, 'nodes_per_hidden_layer': 4, 'group_structure': <classes.GroupStructure object at 0x0000005222D4E510>} ([0, 3, 5, 6], [[[1, 2, 4, 7], -1]])


In [12]:
# outer
data_train_test, data_val = train_test_split(
    Xy,
    train_size=2/3,
    shuffle=True,
    stratify=Xy.loc[:, 'class']
)

# reset indices as StratifiedKFold assumes consecutive index
data_train_test = data_train_test.reset_index(drop=True)
data_val = data_val.reset_index(drop=True)

# inner
cv_inner = StratifiedKFold(
    n_splits=5,
    shuffle=False  # TODO: set to True
)

'''
- for each individual in the configuration, run k folds + average its performance
    -> in each fold, additionally split the training data, train on larger split, use smaller split to determine early stopping
    -> average early stopping epoch over all folds, report back with average performance
- find pareto front
- evaluate pareto front's performance on holdout test set, each model of the front is trained for the average of the epochs determined by early stopping in CV
'''
mu = 3  # TODO: set to 100
la = 2  # TODO: set to 10
patience = 2
max_runtime_secs = 15# * 60 * 60  # this is not a hard stopping limit, once we're running, all offspring will be evaluated across all folds before actually stopping
monotonicity_clipper = WeightClipper(0, None)  # enforce monotonicity by clipping weights to [0, infty) after each epoch (in def train)
run_eagga_cv(mu, la, cv_inner, data_train_test, categorical_indicator, epochs=10, batch_size=8, patience=patience, weight_clipper=monotonicity_clipper, secs=max_runtime_secs)

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

initial population
total layers 4, nodes_per_hidden_layer 5, gs: ([2, 3, 5, 6, 7], [[[0, 1], 1], [[4], 1]])
total layers 4, nodes_per_hidden_layer 3, gs: ([0, 2, 3, 5], [[[4, 7], 0], [[6], 0], [[1], 1]])
total layers 3, nodes_per_hidden_layer 4, gs: ([0, 2, 3, 6, 7], [[[1, 4, 5], 1]])
start EA at 2025-02-25T00:28:20.403318
Evolution 1, evaluate 3 individuals
running HPO for individual 1/3: 4 total_layers, 5 nodes per hidden layer
fold 1/5 | stop early: 0.47648696736855944 < 0.476510841738094, optimal epoch 88 | (0.8271144278606967, 0.375, 0.03571428571428571, 0.0) 88
fold 2/5 | (0.8314676616915423, 0.375, 0.03571428571428571, 0.0) 97
fold 3/5 | stop early: 0.5373477461663159 < 0.5379447747360576, optimal epoch 48 | (0.7535181236673775, 0.375, 0.03571428571428571, 0.0) 48
fold 4/5 | stop early: 0.649298375303095 < 0.6512377749789845, optimal epoch 3 | (0.5117845117845118, 0.375, 0.03571428571428571, 0.0) 3
fold 5/5 | stop early: 0.643987853418697 < 0.6504439603198658, optimal epoch 4 | 