# Modelo de Red Neuronal según los principios de Federated Learning

### Importaciones

In [8]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential, load_model, save_model, Model
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model

### Importación de los datos

In [9]:
# Leer el archivo CSV
df = pd.read_csv('diabetic_data.csv')

# Mostrar las primeras filas del dataset
print(df.head())

# Resumen de la información del dataset
print(df.info())

# Mostrar la cantidad de valores nulos por columna
print(df.isnull().sum())

   encounter_id  patient_nbr             race  gender      age weight  \
0       2278392      8222157        Caucasian  Female   [0-10)      ?   
1        149190     55629189        Caucasian  Female  [10-20)      ?   
2         64410     86047875  AfricanAmerican  Female  [20-30)      ?   
3        500364     82442376        Caucasian    Male  [30-40)      ?   
4         16680     42519267        Caucasian    Male  [40-50)      ?   

   admission_type_id  discharge_disposition_id  admission_source_id  \
0                  6                        25                    1   
1                  1                         1                    7   
2                  1                         1                    7   
3                  1                         1                    7   
4                  1                         1                    7   

   time_in_hospital  ... citoglipton insulin  glyburide-metformin  \
0                 1  ...          No      No                   No

### Procesamiento de los datos

In [10]:
# Leer el archivo CSV, tratando '?' como valores nulos
df = pd.read_csv('diabetic_data.csv', na_values='?')

# Mostrar la cantidad de valores nulos por columna
print(df.isnull().sum())

# Guardar el DataFrame modificado en un nuevo archivo CSV
df.to_csv('diabetic_data_modified.csv', index=False)

print("El archivo modificado ha sido guardado como 'diabetic_data_modified.csv'.")


  df = pd.read_csv('diabetic_data.csv', na_values='?')


encounter_id                    0
patient_nbr                     0
race                         2273
gender                          0
age                             0
weight                      98569
admission_type_id               0
discharge_disposition_id        0
admission_source_id             0
time_in_hospital                0
payer_code                  40256
medical_specialty           49949
num_lab_procedures              0
num_procedures                  0
num_medications                 0
number_outpatient               0
number_emergency                0
number_inpatient                0
diag_1                         21
diag_2                        358
diag_3                       1423
number_diagnoses                0
max_glu_serum                   0
A1Cresult                       0
metformin                       0
repaglinide                     0
nateglinide                     0
chlorpropamide                  0
glimepiride                     0
acetohexamide 

In [11]:
# Cargar el dataset
df = pd.read_csv('diabetic_data_modified.csv')

# Asegurarse de manejar correctamente los valores nulos
# Identificar las columnas con valores nulos según la descripción proporcionada
cols_with_missing = ['race', 'weight', 'payer_code', 'medical_specialty', 'diag_1', 'diag_2', 'diag_3']

# Imputar los valores nulos con la moda para variables categóricas
categorical_imputer = SimpleImputer(strategy='most_frequent')
df[cols_with_missing] = categorical_imputer.fit_transform(df[cols_with_missing])

# Limpiar y convertir 'weight' a valores numéricos y luego categorizarlo
def clean_weight(weight_str):
    if weight_str == "?":
        return None
    elif weight_str.startswith(">"):
        return float(weight_str[1:]) + 1  # Incrementar en 1 para asegurar que los límites sean correctos
    elif weight_str.startswith("["):
        return float(weight_str.strip("[]").split("-")[0])
    elif weight_str == "Unknown":
        return None
    else:
        return float(weight_str)

df['weight'] = df['weight'].apply(clean_weight)

# Definir los rangos de peso y codificar 'weight'
weight_ranges = ['[0-25)', '[25-50)', '[50-75)', '[75-100)', '[100-125)', '[125-150)', '[150-175)', '[175-200)', '>200']
df['weight_category'] = pd.cut(df['weight'], bins=[0, 25, 50, 75, 100, 125, 150, 175, 200, float('inf')], labels=weight_ranges, right=False)

# Eliminar la columna original 'weight'
df.drop(columns=['weight'], inplace=True)

# Función para asignar categorías a los códigos ICD-9
def assign_icd_category(icd_code):
    if pd.isnull(icd_code):
        return 'Unknown'
    if icd_code.startswith(('E', 'V')):
        return 'E-V codes'
    else:
        code_number = int(icd_code.split('.')[0])  # Tomar solo el número de código ICD-9
        if 1 <= code_number <= 139:
            return '001-139'
        elif 140 <= code_number <= 239:
            return '140-239'
        elif 240 <= code_number <= 279:
            return '240-279'
        elif 280 <= code_number <= 289:
            return '280-289'
        elif 290 <= code_number <= 319:
            return '290-319'
        elif 320 <= code_number <= 389:
            return '320-389'
        elif 390 <= code_number <= 459:
            return '390-459'
        elif 460 <= code_number <= 519:
            return '460-519'
        elif 520 <= code_number <= 579:
            return '520-579'
        elif 580 <= code_number <= 629:
            return '580-629'
        elif 630 <= code_number <= 679:
            return '630-679'
        elif 680 <= code_number <= 709:
            return '680-709'
        elif 710 <= code_number <= 739:
            return '710-739'
        elif 740 <= code_number <= 759:
            return '740-759'
        elif 760 <= code_number <= 779:
            return '760-779'
        elif 780 <= code_number <= 799:
            return '780-799'
        elif 800 <= code_number <= 999:
            return '800-999'
        else:
            return 'Other'  # En caso de no encontrar una categoría válida

# Aplicar la función a cada columna de diagnóstico
for col in ['diag_1', 'diag_2', 'diag_3']:
    df[col + '_category'] = df[col].apply(assign_icd_category)

# Eliminar las columnas originales de diagnóstico
df.drop(columns=['diag_1', 'diag_2', 'diag_3'], inplace=True)

# Función para asignar valores únicos a las franjas de edad
def age_to_value(age_str):
    age_mapping = {
        '[0-10)': 5,
        '[10-20)': 15,
        '[20-30)': 25,
        '[30-40)': 35,
        '[40-50)': 45,
        '[50-60)': 55,
        '[60-70)': 65,
        '[70-80)': 75,
        '[80-90)': 85,
        '[90-100)': 95
    }
    return age_mapping.get(age_str, None)

# Aplicar la función de agrupamiento de edades
df['age'] = df['age'].apply(age_to_value)

# Aplicar codificación one-hot a las variables categóricas, excluyendo 'age' ya que está mapeada a valores únicos
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
if 'age' in categorical_cols:
    categorical_cols.remove('age')
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_cols = pd.DataFrame(encoder.fit_transform(df[categorical_cols]))

# Sustituir las columnas originales con las nuevas codificadas
encoded_cols.columns = encoder.get_feature_names_out(categorical_cols)
df.drop(columns=categorical_cols, inplace=True)
df = pd.concat([df, encoded_cols], axis=1)

# Guardar el resultado en un nuevo archivo CSV
df.to_csv('processed_data.csv', index=False)

print("Datos procesados y guardados en 'processed_data.csv'.")


  df = pd.read_csv('diabetic_data_modified.csv')


Datos procesados y guardados en 'processed_data.csv'.


### Creación del modelo de red neuronal, obtención de bias, pesos y métricas

In [12]:
df = pd.read_csv('processed_data.csv')
X = df.drop(columns=['diabetesMed_Yes']) 
Y = df['diabetesMed_Yes']


X_subset = X.iloc[50883: , :]
y_subset = Y.iloc[50883: ]

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_subset,y_subset, test_size=0.2, random_state=42)

# Normalizar los datos de entrada
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Inicializar el modelo
model = Sequential()

# Añadir capas densas
model.add(Dense(258, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))  
model.add(Dropout(0.3)) 
model.add(Dense(32, activation='relu'))  
model.add(Dropout(0.3)) 
model.add(Dense(1, activation='sigmoid'))

# Compilar el modelo
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])

# Función para obtener y mostrar pesos y sesgos
def print_layer_weights(model):
    for i, layer in enumerate(model.layers):
        weights = layer.get_weights()
        if len(weights) > 0: 
            weights, biases = weights
            print(f"Layer {i} weights:\n{weights}\n")
            print(f"Layer {i} biases:\n{biases}\n")
        else:
            print(f"Layer {i} ({layer.__class__.__name__}) no tiene pesos ni bias.\n")


# Mostrar pesos y sesgos antes del entrenamiento
print("Pesos y sesgos antes del entrenamiento:")
print_layer_weights(model)

# Entrenar el modelo
parametrosModelo = model.fit(X_train_scaled, y_train, epochs=10, batch_size=64, validation_split=0.1)

# Mostrar precisión después del entrenamiento
print()
print("Precisión durante el entrenamiento:")
print("Precisión en el conjunto de entrenamiento:")
print([acc * 100 for acc in parametrosModelo.history['accuracy']])
print()
print("Precisión en el conjunto de validación:")
print([acc * 100 for acc in parametrosModelo.history['val_accuracy']]) 

# Calcular accuracy promedio
avg_accuracy_train = np.mean(parametrosModelo.history['accuracy']) * 100
avg_accuracy_val = np.mean(parametrosModelo.history['val_accuracy']) * 100

print()
print(f"Accuracy promedio en entrenamiento: {avg_accuracy_train:.2f}%")
print(f"Accuracy promedio en validación: {avg_accuracy_val:.2f}%")

# Mostrar pesos y sesgos después del entrenamiento
print()
print("Pesos y sesgos después del entrenamiento:")
print_layer_weights(model)


model.save('diabetes_model2.h5')

test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Loss en el conjunto de prueba: {test_loss}")
print(f"Accuracy en el conjunto de prueba: {test_accuracy}")

# Hacer predicciones
predictions = model.predict(X_test_scaled)
predicted_classes = (predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Pesos y sesgos antes del entrenamiento:
Layer 0 weights:
[[-0.03503197 -0.07515648 -0.00513428 ... -0.06696251 -0.03175566
   0.03766366]
 [-0.00125447  0.05520108  0.03046366 ...  0.0960763  -0.05543126
   0.06987789]
 [-0.07501873 -0.09705907 -0.00146549 ...  0.0348407  -0.04024605
  -0.06422001]
 ...
 [ 0.0397206   0.10499511  0.09925058 ... -0.09617956 -0.05967144
   0.04502701]
 [ 0.08002459 -0.06073426  0.05849119 ... -0.01215655  0.00570613
  -0.00833569]
 [ 0.08311635 -0.02588409 -0.09078434 ...  0.06704467  0.07918165
   0.01881223]]

Layer 0 biases:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.




Precisión durante el entrenamiento:
Precisión en el conjunto de entrenamiento:
[95.56162357330322, 99.54960942268372, 99.61512088775635, 99.68063235282898, 99.7215747833252, 99.7215747833252, 99.84986782073975, 99.85805749893188, 99.84168410301208, 99.74887371063232]

Precisión en el conjunto de validación:
[99.58240985870361, 99.82805252075195, 99.77892637252808, 99.80348944664001, 99.77892637252808, 99.82805252075195, 99.95087385177612, 99.85261559486389, 99.90174174308777, 99.80348944664001]

Accuracy promedio en entrenamiento: 99.31%
Accuracy promedio en validación: 99.81%

Pesos y sesgos después del entrenamiento:
Layer 0 weights:
[[ 1.98230948e-02 -5.59530444e-02  5.91089651e-02 ... -1.01457917e-05
   5.31534962e-02  1.08917661e-01]
 [ 5.92324473e-02  1.00996986e-01  1.09652855e-01 ...  6.75360188e-02
  -7.30945095e-02  1.46710560e-01]
 [-6.80574626e-02 -6.30808547e-02  3.97472316e-03 ...  4.97060120e-02
   7.91861713e-02 -1.87372327e-01]
 ...
 [ 6.91070557e-02  8.07609782e-02  

[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9969 - loss: 0.0028
Loss en el conjunto de prueba: 0.0026125183794647455
Accuracy en el conjunto de prueba: 0.9973469376564026
[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
Matriz de Confusión:
[[2087   15]
 [  12 8063]]

Reporte de Clasificación:
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      2102
         1.0       1.00      1.00      1.00      8075

    accuracy                           1.00     10177
   macro avg       1.00      1.00      1.00     10177
weighted avg       1.00      1.00      1.00     10177



In [13]:
# Evaluar el modelo
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Loss en el conjunto de prueba: {test_loss}")
print(f"Accuracy en el conjunto de prueba: {test_accuracy}")

# Hacer predicciones
predictions = model.predict(X_test_scaled)
predicted_classes = (predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))

[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.9969 - loss: 0.0028
Loss en el conjunto de prueba: 0.0026125183794647455
Accuracy en el conjunto de prueba: 0.9973469376564026
[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
Matriz de Confusión:
[[2087   15]
 [  12 8063]]

Reporte de Clasificación:
              precision    recall  f1-score   support

         0.0       0.99      0.99      0.99      2102
         1.0       1.00      1.00      1.00      8075

    accuracy                           1.00     10177
   macro avg       1.00      1.00      1.00     10177
weighted avg       1.00      1.00      1.00     10177



### Separación de los datos en 2 modelos y 1 test y creación de los mismos

In [14]:
# Cargar el dataset
df = pd.read_csv('processed_data.csv')

# Separar características y variable objetivo
X = df.drop(columns=['diabetesMed_Yes'])
Y = df['diabetesMed_Yes']

# Escalar las características
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Separar los datos para los dos modelos
X_train1 = X_scaled[:40706, :]
y_train1 = Y.iloc[:40706]

X_train2 = X_scaled[40707:81412, :]
y_train2 = Y.iloc[40707:81412]

# Datos para prueba
X_test = X_scaled[81412:, :]
y_test = Y.iloc[81412:]

def create_model(input_shape):
    model = Sequential()
    model.add(Dense(258, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Crear y compilar el modelo para el primer conjunto de datos
model1 = create_model(X_train1.shape[1])
model1.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model1.fit(X_train1, y_train1, epochs=10, batch_size=32, validation_split=0.2)

# Guardar el modelo 1
model1.save('model1.h5')

# Crear y compilar el modelo para el segundo conjunto de datos
model2 = create_model(X_train2.shape[1])
model2.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
model2.fit(X_train2, y_train2, epochs=10, batch_size=32, validation_split=0.2)

# Guardar el modelo 2
model2.save('model2.h5')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 8ms/step - accuracy: 0.9226 - loss: 0.1933 - val_accuracy: 0.9985 - val_loss: 0.0068
Epoch 2/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9973 - loss: 0.0091 - val_accuracy: 0.9971 - val_loss: 0.0067
Epoch 3/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9976 - loss: 0.0091 - val_accuracy: 0.9974 - val_loss: 0.0089
Epoch 4/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9985 - loss: 0.0046 - val_accuracy: 0.9988 - val_loss: 0.0080
Epoch 5/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9984 - loss: 0.0043 - val_accuracy: 0.9977 - val_loss: 0.0057
Epoch 6/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 8ms/step - accuracy: 0.9994 - loss: 0.0025 - val_accuracy: 0.9968 - val_loss: 0.0063
Epoch 7/10
[1m



Epoch 1/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 8ms/step - accuracy: 0.9096 - loss: 0.2046 - val_accuracy: 0.9993 - val_loss: 0.0045
Epoch 2/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9977 - loss: 0.0112 - val_accuracy: 0.9983 - val_loss: 0.0048
Epoch 3/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 7ms/step - accuracy: 0.9984 - loss: 0.0056 - val_accuracy: 0.9990 - val_loss: 0.0018
Epoch 4/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 7ms/step - accuracy: 0.9991 - loss: 0.0043 - val_accuracy: 0.9995 - val_loss: 0.0023
Epoch 5/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - accuracy: 0.9987 - loss: 0.0047 - val_accuracy: 0.9990 - val_loss: 0.0022
Epoch 6/10
[1m1018/1018[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 7ms/step - accuracy: 0.9993 - loss: 0.0025 - val_accuracy: 0.9991 - val_loss: 0.0029
Epoch 7/10
[



### Obtención de predicciones y su media mediante bagging y evaluarlo

In [15]:
# Cargar los modelos entrenados
model1 = load_model('model1.h5')
model2 = load_model('model2.h5')

# Función para hacer predicciones con Bagging
def bagging_predict(models, data):
    predictions = [model.predict(data) for model in models]
    avg_prediction = np.mean(predictions, axis=0)
    return avg_prediction

# Hacer predicciones con Bagging en los datos de prueba
models = [model1, model2]
y_pred_bagging = bagging_predict(models, X_test)
y_pred_bagging = (y_pred_bagging > 0.5).astype(int)

# Evaluar el rendimiento del modelo combinado
conf_matrix = confusion_matrix(y_test, y_pred_bagging)
class_report = classification_report(y_test, y_pred_bagging)

print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)




[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[1m637/637[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
Confusion Matrix:
[[ 4176     3]
 [   12 16163]]

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      4179
         1.0       1.00      1.00      1.00     16175

    accuracy                           1.00     20354
   macro avg       1.00      1.00      1.00     20354
weighted avg       1.00      1.00      1.00     20354



### Separación de los datos en 5 modelos y 1 test y creación de los mismos

In [16]:
# Cargar el dataset
df = pd.read_csv('processed_data.csv')

# Separar características y variable objetivo
X = df.drop(columns=['diabetesMed_Yes'])
Y = df['diabetesMed_Yes']

# Escalar las características
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Dividir los datos en 2 conjuntos de entrenamiento de 16961 filas cada uno
train_size = 40706
num_models = 2

# Dividir los datos en 5 conjuntos de entrenamiento de 16961 filas cada uno
train_size = 16961
num_models = 5

X_trains = []
y_trains = []

for i in range(num_models):
    start_index = i * train_size
    end_index = start_index + train_size
    X_trains.append(X_scaled[start_index:end_index, :])
    y_trains.append(Y.iloc[start_index:end_index])

# Datos para prueba
X_test = X_scaled[-train_size:, :]
y_test = Y.iloc[-train_size:]

def create_model(input_shape):
    model = Sequential()
    model.add(Dense(258, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.3))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Entrenar y guardar los modelos
for i in range(num_models):
    model = create_model(X_trains[i].shape[1])
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    print()
    print(f"Modelo {i}")
    model.fit(X_trains[i], y_trains[i], epochs=6, batch_size=32, validation_split=0.2)
    model.save(f'modelNuevo{i+1}.h5')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Modelo 0
Epoch 1/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 10ms/step - accuracy: 0.8572 - loss: 0.3295 - val_accuracy: 0.9965 - val_loss: 0.0263
Epoch 2/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9940 - loss: 0.0244 - val_accuracy: 0.9979 - val_loss: 0.0051
Epoch 3/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9979 - loss: 0.0137 - val_accuracy: 0.9973 - val_loss: 0.0060
Epoch 4/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9984 - loss: 0.0110 - val_accuracy: 0.9968 - val_loss: 0.0096
Epoch 5/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9979 - loss: 0.0073 - val_accuracy: 0.9973 - val_loss: 0.0073
Epoch 6/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9993 - loss: 0.0037 - val_accuracy: 0.9973 - val_loss: 0.0123





Modelo 1
Epoch 1/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - accuracy: 0.8485 - loss: 0.3214 - val_accuracy: 0.9973 - val_loss: 0.0271
Epoch 2/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9940 - loss: 0.0371 - val_accuracy: 0.9982 - val_loss: 0.0098
Epoch 3/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9975 - loss: 0.0120 - val_accuracy: 0.9976 - val_loss: 0.0122
Epoch 4/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9982 - loss: 0.0121 - val_accuracy: 0.9985 - val_loss: 0.0081
Epoch 5/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9986 - loss: 0.0114 - val_accuracy: 0.9982 - val_loss: 0.0090
Epoch 6/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9990 - loss: 0.0056 - val_accuracy: 0.9976 - val_loss: 0.0135





Modelo 2
Epoch 1/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.8557 - loss: 0.2982 - val_accuracy: 0.9962 - val_loss: 0.0197
Epoch 2/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9951 - loss: 0.0229 - val_accuracy: 0.9991 - val_loss: 0.0104
Epoch 3/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9974 - loss: 0.0154 - val_accuracy: 0.9985 - val_loss: 0.0182
Epoch 4/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9982 - loss: 0.0080 - val_accuracy: 0.9988 - val_loss: 0.0125
Epoch 5/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - accuracy: 0.9978 - loss: 0.0096 - val_accuracy: 0.9979 - val_loss: 0.0238
Epoch 6/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9980 - loss: 0.0072 - val_accuracy: 0.9965 - val_loss: 0.0127





Modelo 3
Epoch 1/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 8ms/step - accuracy: 0.8607 - loss: 0.3148 - val_accuracy: 0.9962 - val_loss: 0.0290
Epoch 2/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.9956 - loss: 0.0303 - val_accuracy: 0.9985 - val_loss: 0.0139
Epoch 3/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9968 - loss: 0.0156 - val_accuracy: 0.9982 - val_loss: 0.0044
Epoch 4/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9988 - loss: 0.0087 - val_accuracy: 0.9985 - val_loss: 0.0045
Epoch 5/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9975 - loss: 0.0088 - val_accuracy: 0.9988 - val_loss: 0.0096
Epoch 6/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9986 - loss: 0.0088 - val_accuracy: 0.9982 - val_loss: 0.0132





Modelo 4
Epoch 1/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 9ms/step - accuracy: 0.8612 - loss: 0.2983 - val_accuracy: 0.9971 - val_loss: 0.0191
Epoch 2/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9943 - loss: 0.0312 - val_accuracy: 0.9985 - val_loss: 0.0128
Epoch 3/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9970 - loss: 0.0128 - val_accuracy: 0.9991 - val_loss: 0.0064
Epoch 4/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9988 - loss: 0.0042 - val_accuracy: 0.9979 - val_loss: 0.0125
Epoch 5/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9990 - loss: 0.0035 - val_accuracy: 0.9991 - val_loss: 0.0108
Epoch 6/6
[1m424/424[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.9993 - loss: 0.0020 - val_accuracy: 0.9973 - val_loss: 0.0188




### Obtención de predicciones y su media mediante bagging y evaluarlo

In [17]:
# Cargar los modelos entrenados
models = [load_model(f'modelNuevo{i+1}.h5') for i in range(5)]

# Función para hacer predicciones con Bagging
def bagging_predict(models, data):
    predictions = [model.predict(data) for model in models]
    avg_prediction = np.mean(predictions, axis=0)
    return avg_prediction

# Hacer predicciones con Bagging en los datos de prueba
y_pred_bagging = bagging_predict(models, X_test)
y_pred_bagging = (y_pred_bagging > 0.5).astype(int)

# Evaluar el rendimiento del modelo combinado
conf_matrix = confusion_matrix(y_test, y_pred_bagging)
class_report = classification_report(y_test, y_pred_bagging)

print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)




[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
Confusion Matrix:
[[ 3541     8]
 [   29 13383]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99      3549
         1.0       1.00      1.00      1.00     13412

    accuracy                           1.00     16961
   macro avg       1.00      1.00      1.00     16961
weighted avg       1.00      1.00      1.00     16961



### Segmentación de los datos en distintos modelos y test y evaluación

##### Siguiendo los estándares de federated larning es como si existiesen varios dispositivos entrenando un modelo

In [18]:
def load_and_preprocess_data(filepath, target_column):
    df = pd.read_csv(filepath)
    X = df.drop(columns=[target_column])
    Y = df[target_column]
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, Y

def split_data(X_scaled, Y, num_models, train_size):
    X_trains = []
    y_trains = []
    for i in range(num_models):
        start_index = i * train_size
        end_index = start_index + train_size
        X_trains.append(X_scaled[start_index:end_index, :])
        y_trains.append(Y.iloc[start_index:end_index])
    X_test = X_scaled[-train_size:, :]
    y_test = Y.iloc[-train_size:]
    return X_trains, y_trains, X_test, y_test

def create_model(input_shape):
    # Definir la capa de entrada como un objeto Input
    inputs = Input(shape=(input_shape,))
    
    # Definir las capas del modelo
    x = Dense(258, activation='relu')(inputs)
    x = Dropout(0.3)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.3)(x)
    
    # Capa de salida
    outputs = Dense(1, activation='sigmoid')(x)
    
    # Crear el modelo
    model = Model(inputs=inputs, outputs=outputs)
    
    return model

def train_and_save_models(X_trains, y_trains, X_test, y_test, num_models, epochs, batch_size, model_prefix):
    metrics = []
    for i in range(num_models):
        model = create_model(X_trains[i].shape[1])
        model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
        print(f"\nEntrenando Modelo {i + 1}...")
        model.fit(X_trains[i], y_trains[i], epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
        print(f"Guardando Modelo {i + 1}...")
        save_model(model, f'{model_prefix}{i + 1}.keras')  # Guardar en formato Keras nativo
        print(f"Evaluando Modelo {i + 1}...")
        loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
        metrics.append((loss, accuracy))
        print(f"Modelo {i + 1}: Pérdida = {loss:.4f}, Precisión = {accuracy:.4f}")
    return metrics

def run_example(filepath, target_column, num_models, train_sizes, epochs, batch_size, model_prefix):
    X_scaled, Y = load_and_preprocess_data(filepath, target_column)
    metrics = []
    for i, train_size in enumerate(train_sizes):
        print(f"\nEjemplo {i + 1}: Separación en {num_models[i]} modelos y 1 test")
        X_trains, y_trains, X_test, y_test = split_data(X_scaled, Y, num_models[i], train_size)
        example_metrics = train_and_save_models(X_trains, y_trains, X_test, y_test, num_models[i], epochs[i], batch_size[i], model_prefix[i])
        metrics.append(example_metrics)
    return metrics

# Definición de parámetros para cada ejemplo
examples_params = [
    {
        'num_models': [2],
        'train_sizes': [40706],
        'epochs': [5],
        'batch_size': [32],
        'model_prefix': ['model']
    },
    {
        'num_models': [5],
        'train_sizes': [16961],
        'epochs': [5],
        'batch_size': [32],
        'model_prefix': ['modelNuevo']
    }
]

# Ejecutar cada ejemplo
filepath = 'processed_data.csv'
target_column = 'diabetesMed_Yes'

for params in examples_params:
    run_example(filepath, target_column, **params)



Ejemplo 1: Separación en 2 modelos y 1 test

Entrenando Modelo 1...
Guardando Modelo 1...
Evaluando Modelo 1...
Modelo 1: Pérdida = 0.0396, Precisión = 0.9761

Entrenando Modelo 2...
Guardando Modelo 2...
Evaluando Modelo 2...
Modelo 2: Pérdida = 0.0040, Precisión = 0.9985

Ejemplo 1: Separación en 5 modelos y 1 test

Entrenando Modelo 1...
Guardando Modelo 1...
Evaluando Modelo 1...
Modelo 1: Pérdida = 0.0707, Precisión = 0.9721

Entrenando Modelo 2...
Guardando Modelo 2...
Evaluando Modelo 2...
Modelo 2: Pérdida = 0.0284, Precisión = 0.9961

Entrenando Modelo 3...
Guardando Modelo 3...
Evaluando Modelo 3...
Modelo 3: Pérdida = 0.0357, Precisión = 0.9876

Entrenando Modelo 4...
Guardando Modelo 4...
Evaluando Modelo 4...
Modelo 4: Pérdida = 0.0148, Precisión = 0.9971

Entrenando Modelo 5...
Guardando Modelo 5...
Evaluando Modelo 5...
Modelo 5: Pérdida = 0.0183, Precisión = 0.9977


In [19]:
from tensorflow.keras.models import load_model
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

# Función para cargar modelos guardados
def load_models(model_prefixes):
    models = []
    for prefix in model_prefixes:
        model = load_model(f'{prefix}.keras')
        models.append(model)
    return models

# Función para hacer predicciones con Bagging
def bagging_predict(models, data):
    predictions = np.zeros((len(data), 1))
    for model in models:
        prediction = model.predict(data)
        predictions += prediction
    avg_prediction = predictions / len(models)
    return avg_prediction

# Ejemplo de cómo usarlo con tus datos y modelos
# Suponiendo que 'modelNuevo' es el prefijo para los modelos guardados
model_prefixes = [
    'modelNuevo1', 'modelNuevo2', 'modelNuevo3', 'modelNuevo4', 'modelNuevo5'
]
models = load_models(model_prefixes)

y_pred_bagging = bagging_predict(models, X_test)
y_pred_bagging = (y_pred_bagging > 0.5).astype(int)

# Evaluar el rendimiento del modelo combinado
conf_matrix = confusion_matrix(y_test, y_pred_bagging)
class_report = classification_report(y_test, y_pred_bagging)

print("Confusion Matrix:")
print(conf_matrix)
print("\nClassification Report:")
print(class_report)


[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step
[1m531/531[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step
Confusion Matrix:
[[ 3543     6]
 [    6 13406]]

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      3549
         1.0       1.00      1.00      1.00     13412

    accuracy                           1.00     16961
   macro avg       1.00      1.00      1.00     16961
weighted avg       1.00      1.00      1.00     16961

