In [1]:
import pickle
import pandas as pd

import tensorflow as tf
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer

from src.MonoFADLModel import MonoFADLModel
from src.MultiFADLModelOvR import MultiFADLModelOvR
from src.NoSelectionModel import NoSelectionModel

# Seed for neural network executions
SEED = 1234
np.random.seed(SEED)
tf.random.set_seed(SEED)


In [2]:
comparative_results = pd.DataFrame(columns=['Accuracy', 'Number of selected features', 'Selected Features'])

* Preprocesamiento

In [16]:
# Load dataset and preprocess it
# https://www.kaggle.com/datasets/andrewmvd/fetal-health-classification

fetalHealth = pd.read_csv('data/fetal_health.csv')
fetalHealth['fetal_health'] = fetalHealth['fetal_health']-1
fetalHealth['fetal_health'] = fetalHealth['fetal_health'].astype('int32').astype('category')

# Picke store 
with open('data/fetal_health.pkl', 'wb') as f:
    pickle.dump(fetalHealth, f)

fetalHealth = pickle.load(open('data/fetal_health.pkl', 'rb'))
fetalHealth

Unnamed: 0,baseline value,accelerations,fetal_movement,uterine_contractions,light_decelerations,severe_decelerations,prolongued_decelerations,abnormal_short_term_variability,mean_value_of_short_term_variability,percentage_of_time_with_abnormal_long_term_variability,...,histogram_min,histogram_max,histogram_number_of_peaks,histogram_number_of_zeroes,histogram_mode,histogram_mean,histogram_median,histogram_variance,histogram_tendency,fetal_health
0,120.0,0.000,0.000,0.000,0.000,0.0,0.0,73.0,0.5,43.0,...,62.0,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,1
1,132.0,0.006,0.000,0.006,0.003,0.0,0.0,17.0,2.1,0.0,...,68.0,198.0,6.0,1.0,141.0,136.0,140.0,12.0,0.0,0
2,133.0,0.003,0.000,0.008,0.003,0.0,0.0,16.0,2.1,0.0,...,68.0,198.0,5.0,1.0,141.0,135.0,138.0,13.0,0.0,0
3,134.0,0.003,0.000,0.008,0.003,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,0
4,132.0,0.007,0.000,0.008,0.000,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2121,140.0,0.000,0.000,0.007,0.000,0.0,0.0,79.0,0.2,25.0,...,137.0,177.0,4.0,0.0,153.0,150.0,152.0,2.0,0.0,1
2122,140.0,0.001,0.000,0.007,0.000,0.0,0.0,78.0,0.4,22.0,...,103.0,169.0,6.0,0.0,152.0,148.0,151.0,3.0,1.0,1
2123,140.0,0.001,0.000,0.007,0.000,0.0,0.0,79.0,0.4,20.0,...,103.0,170.0,5.0,0.0,153.0,148.0,152.0,4.0,1.0,1
2124,140.0,0.001,0.000,0.006,0.000,0.0,0.0,78.0,0.4,27.0,...,103.0,169.0,6.0,0.0,152.0,147.0,151.0,4.0,1.0,1


In [17]:
XfetalHealth = fetalHealth.drop(['fetal_health'], axis=1)
yfetalHealth = fetalHealth['fetal_health']

XfetalHealth_trainval, XfetalHealth_test, yfetalHealth_trainval, yfetalHealth_test = train_test_split(
    XfetalHealth, 
    yfetalHealth, test_size=0.2, 
    random_state=SEED)

XfetalHealth_train, XfetalHealth_val, yfetalHealth_train, yfetalHealth_val = train_test_split(
    XfetalHealth_trainval, yfetalHealth_trainval, test_size=0.25, 
    random_state=SEED)

In [18]:
yfetalHealth_train.value_counts(), yfetalHealth_val.value_counts(), yfetalHealth_test.value_counts()

(fetal_health
 0    993
 1    170
 2    112
 Name: count, dtype: int64,
 fetal_health
 0    338
 1     56
 2     31
 Name: count, dtype: int64,
 fetal_health
 0    324
 1     69
 2     33
 Name: count, dtype: int64)

In [19]:
# Normalize numerical variables
def categorize_variables(df):

    categorical = []
    numerical = []

    for column in df.columns:
        unique_values = df[column].unique()
        n_unique = len(unique_values)

        if n_unique <= 10:
            categorical.append((column, unique_values.tolist()))
        else:
            numerical.append(column)

    return {
        'categorical': categorical,
        'numerical': numerical
    }
variables_numericas = categorize_variables(fetalHealth.drop('fetal_health', axis=1))['numerical']

scaler = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), variables_numericas)
    ],
    remainder='passthrough'  # No escalar las demás variables
)

# Normalize train set
XfetalHealth_train_scaled = scaler.fit_transform(XfetalHealth_train)

# Normalize val and test set
XfetalHealth_val_scaled = scaler.transform(XfetalHealth_val)
XfetalHealth_test_scaled = scaler.transform(XfetalHealth_test)

XfetalHealth_train_scaled = pd.DataFrame(XfetalHealth_train_scaled, columns=XfetalHealth_train.columns)
XfetalHealth_val_scaled = pd.DataFrame(XfetalHealth_val_scaled, columns=XfetalHealth_val.columns)
XfetalHealth_test_scaled = pd.DataFrame(XfetalHealth_test_scaled, columns=XfetalHealth_test.columns)


* Noselection results

In [20]:
model = NoSelectionModel(
    n_inputs=XfetalHealth_train_scaled.columns.values.shape[0],
    n_class=yfetalHealth_train.unique().shape[0]
)       

model.fit(
    XfetalHealth_train_scaled,
    yfetalHealth_train,
    XfetalHealth_val_scaled,
    yfetalHealth_val,
    epochs=50
)

model.evaluate(XfetalHealth_test_scaled, yfetalHealth_test)


# Pickle store
with open('results/fetalHealth_NoSelection.pkl', 'wb') as f:
    pickle.dump(model, f)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


In [21]:
verbose1 = model.get_verbose()
verbose1

{'model': <keras.src.engine.functional.Functional at 0x1f13d473a90>,
 'selected_features': array(['baseline value', 'accelerations', 'fetal_movement',
        'uterine_contractions', 'light_decelerations',
        'severe_decelerations', 'prolongued_decelerations',
        'abnormal_short_term_variability',
        'mean_value_of_short_term_variability',
        'percentage_of_time_with_abnormal_long_term_variability',
        'mean_value_of_long_term_variability', 'histogram_width',
        'histogram_min', 'histogram_max', 'histogram_number_of_peaks',
        'histogram_number_of_zeroes', 'histogram_mode', 'histogram_mean',
        'histogram_median', 'histogram_variance', 'histogram_tendency'],
       dtype=object),
 'predictionsproba': array([[9.9944705e-01, 5.5290404e-04, 2.7133705e-08],
        [9.9997818e-01, 2.1712964e-05, 1.4948013e-07],
        [9.9999964e-01, 4.1476207e-07, 8.9565395e-12],
        ...,
        [2.0556824e-04, 9.6953762e-01, 3.0256731e-02],
        [8.8017970

In [22]:
comparative_results.loc['NoSelection'] = [verbose1['results'][1], verbose1['selected_features'].shape[0], verbose1['selected_features']]
comparative_results

Unnamed: 0,Accuracy,Number of selected features,Selected Features
NoSelection,0.915493,21,"[baseline value, accelerations, fetal_movement..."


* MonoFADL results

In [23]:
model2 = MonoFADLModel(
    n_inputs=XfetalHealth_train_scaled.columns.values.shape[0],
    n_class=yfetalHealth_train.unique().shape[0]
)

model2.fit(
    XfetalHealth_train_scaled,
    yfetalHealth_train,
    XfetalHealth_val_scaled,
    yfetalHealth_val,
    epochs=50
)

model2.evaluate(XfetalHealth_test_scaled, yfetalHealth_test)

model2.get_verbose()

# Pickle store
with open('results/fetalHealth_MonoFADL.pkl', 'wb') as f:
    pickle.dump(model2, f)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50


In [24]:
verbose2 = model2.get_verbose()
verbose2

{'model': <keras.src.engine.functional.Functional at 0x1f14511f010>,
 'selected_features': array(['baseline value', 'accelerations', 'uterine_contractions',
        'light_decelerations', 'severe_decelerations',
        'abnormal_short_term_variability', 'histogram_width',
        'histogram_max', 'histogram_number_of_peaks',
        'histogram_number_of_zeroes', 'histogram_mode',
        'histogram_variance'], dtype=object),
 'predictionsproba': array([[4.1674799e-01, 5.8324796e-01, 4.0452314e-06],
        [9.9957544e-01, 4.2120714e-04, 3.3428025e-06],
        [9.9998188e-01, 1.8109533e-05, 3.0906119e-08],
        ...,
        [2.8483808e-01, 5.5359155e-01, 1.6157028e-01],
        [7.8438121e-01, 1.4272135e-01, 7.2897442e-02],
        [7.9916894e-01, 1.2450719e-01, 7.6323837e-02]], dtype=float32),
 'results': [0.27361956238746643, 0.8873239159584045]}

In [25]:
comparative_results.loc['MonoFADL'] = [verbose2['results'][1], verbose2['selected_features'].shape[0], verbose2['selected_features']]
comparative_results

Unnamed: 0,Accuracy,Number of selected features,Selected Features
NoSelection,0.915493,21,"[baseline value, accelerations, fetal_movement..."
MonoFADL,0.887324,12,"[baseline value, accelerations, uterine_contra..."


* MultiFADL One-versus-Rest results

In [26]:
model3 = MultiFADLModelOvR(
)

model3.fit(
    XfetalHealth_train_scaled,
    yfetalHealth_train,
    XfetalHealth_val_scaled,
    yfetalHealth_val,
    epochs=50
)

model3.evaluate(XfetalHealth_test_scaled, yfetalHealth_test)

model3.get_verbose()

# Pickle store
with open('results/fetalHealth_MultiFADL.pkl', 'wb') as f:
    pickle.dump(model3, f)

--> Training model class 0 vs rest
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
--> Training model class 2 vs rest
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
--> Training model class 1 vs rest
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


In [27]:
verbose3 = model3.get_verbose()
verbose3

{'models': {0: <src.MonoFADLModel.MonoFADLModel at 0x1f146789a10>,
  2: <src.MonoFADLModel.MonoFADLModel at 0x1f1489647d0>,
  1: <src.MonoFADLModel.MonoFADLModel at 0x1f14a34d610>},
 'selected_features_per_class': {0: array(['baseline value', 'accelerations', 'fetal_movement',
         'uterine_contractions', 'light_decelerations',
         'severe_decelerations', 'abnormal_short_term_variability',
         'histogram_width', 'histogram_max', 'histogram_number_of_peaks',
         'histogram_mode', 'histogram_tendency'], dtype=object),
  2: array(['baseline value', 'accelerations', 'fetal_movement',
         'uterine_contractions', 'severe_decelerations',
         'abnormal_short_term_variability',
         'mean_value_of_long_term_variability', 'histogram_max',
         'histogram_number_of_peaks', 'histogram_tendency'], dtype=object),
  1: array(['baseline value', 'accelerations', 'light_decelerations',
         'abnormal_short_term_variability',
         'mean_value_of_short_term_var

In [28]:
comparative_results.loc['MultiFADL'] = [verbose3['acc_global'], 
                                        {clas: verbose3['selected_features_per_class'][clas].shape[0] for clas in verbose3['selected_features_per_class']},
                                        verbose3['selected_features_per_class']]
comparative_results

Unnamed: 0,Accuracy,Number of selected features,Selected Features
NoSelection,0.915493,21,"[baseline value, accelerations, fetal_movement..."
MonoFADL,0.887324,12,"[baseline value, accelerations, uterine_contra..."
MultiFADL,0.908451,"{0: 12, 2: 10, 1: 8}","{0: ['baseline value', 'accelerations', 'fetal..."


In [29]:
comparative_results.to_csv('results/fetalHealth_ComparativeResults.csv')