In [6]:
import pandas as pd

# Leer el archivo CSV
df = pd.read_csv('./diabetic_data.csv')

# Mostrar las primeras filas del dataset
print(df.head())

# Resumen de la información del dataset
print(df.info())

# Mostrar la cantidad de valores nulos por columna
print(df.isnull().sum())

   encounter_id  patient_nbr             race  gender      age weight  \
0       2278392      8222157        Caucasian  Female   [0-10)      ?   
1        149190     55629189        Caucasian  Female  [10-20)      ?   
2         64410     86047875  AfricanAmerican  Female  [20-30)      ?   
3        500364     82442376        Caucasian    Male  [30-40)      ?   
4         16680     42519267        Caucasian    Male  [40-50)      ?   

   admission_type_id  discharge_disposition_id  admission_source_id  \
0                  6                        25                    1   
1                  1                         1                    7   
2                  1                         1                    7   
3                  1                         1                    7   
4                  1                         1                    7   

   time_in_hospital  ... citoglipton insulin  glyburide-metformin  \
0                 1  ...          No      No                   No

In [7]:
# Leer el archivo CSV, tratando '?' como valores nulos
df = pd.read_csv('./diabetic_data.csv', na_values='?')

# Mostrar la cantidad de valores nulos por columna
print(df.isnull().sum())

# Guardar el DataFrame modificado en un nuevo archivo CSV
df.to_csv('diabetic_data_modified.csv', index=False)

print("El archivo modificado ha sido guardado como 'diabetic_data_modified.csv'.")


  df = pd.read_csv('./diabetic_data.csv', na_values='?')


encounter_id                    0
patient_nbr                     0
race                         2273
gender                          0
age                             0
weight                      98569
admission_type_id               0
discharge_disposition_id        0
admission_source_id             0
time_in_hospital                0
payer_code                  40256
medical_specialty           49949
num_lab_procedures              0
num_procedures                  0
num_medications                 0
number_outpatient               0
number_emergency                0
number_inpatient                0
diag_1                         21
diag_2                        358
diag_3                       1423
number_diagnoses                0
max_glu_serum                   0
A1Cresult                       0
metformin                       0
repaglinide                     0
nateglinide                     0
chlorpropamide                  0
glimepiride                     0
acetohexamide 

In [8]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# Cargar el dataset
df = pd.read_csv('./diabetic_data_modified.csv')

# Asegurarse de manejar correctamente los valores nulos
# Identificar las columnas con valores nulos según la descripción proporcionada
cols_with_missing = ['race', 'weight', 'payer_code', 'medical_specialty', 'diag_1', 'diag_2', 'diag_3']

# Imputar los valores nulos con la moda para variables categóricas
categorical_imputer = SimpleImputer(strategy='most_frequent')
df[cols_with_missing] = categorical_imputer.fit_transform(df[cols_with_missing])

# Limpiar y convertir 'weight' a valores numéricos y luego categorizarlo
def clean_weight(weight_str):
    if weight_str == "?":
        return None
    elif weight_str.startswith(">"):
        return float(weight_str[1:]) + 1  # Incrementar en 1 para asegurar que los límites sean correctos
    elif weight_str.startswith("["):
        return float(weight_str.strip("[]").split("-")[0])
    elif weight_str == "Unknown":
        return None
    else:
        return float(weight_str)

df['weight'] = df['weight'].apply(clean_weight)

# Función para asignar valores únicos a las franjas de peso
def weight_to_value(weight_str):
    weight_mapping = {
        '[0-25)': 12.5,
        '[25-50)': 37.5,
        '[50-75)': 62.5,
        '[75-100)': 87.5,
        '[100-125)': 112.5,
        '[125-150)': 137.5,
        '[150-175)': 162.5,
        '[175-200)': 187.5,
        '>200': 225
    }
    return weight_mapping.get(weight_str, None)

# Definir los rangos de peso y codificar 'weight'
weight_ranges = ['[0-25)', '[25-50)', '[50-75)', '[75-100)', '[100-125)', '[125-150)', '[150-175)', '[175-200)', '>200']
df['weight_category'] = pd.cut(df['weight'], bins=[0, 25, 50, 75, 100, 125, 150, 175, 200, float('inf')], labels=weight_ranges, right=False)
df['weight_category_value'] = df['weight_category'].apply(weight_to_value)

# Eliminar la columna original 'weight' y 'weight_category'
df.drop(columns=['weight', 'weight_category'], inplace=True)

# Función para asignar categorías a los códigos ICD-9
def assign_icd_category(icd_code):
    if pd.isnull(icd_code):
        return 'Unknown'
    if icd_code.startswith(('E', 'V')):
        return 'E-V codes'
    else:
        code_number = int(icd_code.split('.')[0])  # Tomar solo el número de código ICD-9
        if 1 <= code_number <= 139:
            return '001-139'
        elif 140 <= code_number <= 239:
            return '140-239'
        elif 240 <= code_number <= 279:
            return '240-279'
        elif 280 <= code_number <= 289:
            return '280-289'
        elif 290 <= code_number <= 319:
            return '290-319'
        elif 320 <= code_number <= 389:
            return '320-389'
        elif 390 <= code_number <= 459:
            return '390-459'
        elif 460 <= code_number <= 519:
            return '460-519'
        elif 520 <= code_number <= 579:
            return '520-579'
        elif 580 <= code_number <= 629:
            return '580-629'
        elif 630 <= code_number <= 679:
            return '630-679'
        elif 680 <= code_number <= 709:
            return '680-709'
        elif 710 <= code_number <= 739:
            return '710-739'
        elif 740 <= code_number <= 759:
            return '740-759'
        elif 760 <= code_number <= 779:
            return '760-779'
        elif 780 <= code_number <= 799:
            return '780-799'
        elif 800 <= code_number <= 999:
            return '800-999'
        else:
            return 'Other'  # En caso de no encontrar una categoría válida

# Aplicar la función a cada columna de diagnóstico
for col in ['diag_1', 'diag_2', 'diag_3']:
    df[col + '_category'] = df[col].apply(assign_icd_category)

# Eliminar las columnas originales de diagnóstico
df.drop(columns=['diag_1', 'diag_2', 'diag_3'], inplace=True)

# Función para asignar valores únicos a las franjas de edad
def age_to_value(age_str):
    age_mapping = {
        '[0-10)': 5,
        '[10-20)': 15,
        '[20-30)': 25,
        '[30-40)': 35,
        '[40-50)': 45,
        '[50-60)': 55,
        '[60-70)': 65,
        '[70-80)': 75,
        '[80-90)': 85,
        '[90-100)': 95
    }
    return age_mapping.get(age_str, None)

# Aplicar la función a la columna de edad
df['age_value'] = df['age'].apply(age_to_value)

# Eliminar la columna original 'age'
df.drop(columns=['age'], inplace=True)

# Aplicar codificación one-hot a las variables categóricas, excluyendo 'age_value' y 'weight_category_value' ya que están mapeadas a valores únicos
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
encoder = OneHotEncoder(drop='first', sparse=False)
encoded_cols = pd.DataFrame(encoder.fit_transform(df[categorical_cols]))

# Sustituir las columnas originales con las nuevas codificadas
encoded_cols.columns = encoder.get_feature_names_out(categorical_cols)
df.drop(columns=categorical_cols, inplace=True)
df = pd.concat([df, encoded_cols], axis=1)

# Guardar el resultado en un nuevo archivo CSV
df.to_csv('processed_data.csv', index=False)

print("Datos procesados y guardados en 'processed_data.csv'.")


  df = pd.read_csv('./diabetic_data_modified.csv')


Datos procesados y guardados en 'processed_data.csv'.


In [9]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

X = df.drop(columns=['diabetesMed_Yes']) 
Y = df['diabetesMed_Yes']

X_subset = X.iloc[:50883, :]
y_subset = Y.iloc[:50883]

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_subset,y_subset, test_size=0.2, random_state=42)

# Normalizar los datos de entrada
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Inicializar el modelo
model = Sequential()

# Añadir capas densas
model.add(Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32, activation='relu'))  
model.add(Dropout(0.3)) 
model.add(Dense(32, activation='relu'))  
model.add(Dropout(0.3)) 
model.add(Dense(1, activation='sigmoid'))

# Compilar el modelo
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])

# Función para obtener y mostrar pesos y sesgos
def print_layer_weights(model):
    for i, layer in enumerate(model.layers):
        weights = layer.get_weights()
        if len(weights) > 0: 
            weights, biases = weights
            print(f"Layer {i} weights:\n{weights}\n")
            print(f"Layer {i} biases:\n{biases}\n")
        else:
            print(f"Layer {i} ({layer.__class__.__name__}) no tiene pesos ni bias.\n")


# Mostrar pesos y sesgos antes del entrenamiento
print("Pesos y sesgos antes del entrenamiento:")
print_layer_weights(model)

# Entrenar el modelo
parametrosModelo = model.fit(X_train_scaled, y_train, epochs=30, batch_size=64, validation_split=0.1)

# Mostrar precisión después del entrenamiento
print()
print("Precisión durante el entrenamiento:")
print("Precisión en el conjunto de entrenamiento:")
print([acc * 100 for acc in parametrosModelo.history['accuracy']])
print()
print("Precisión en el conjunto de validación:")
print([acc * 100 for acc in parametrosModelo.history['val_accuracy']]) 

# Calcular accuracy promedio
avg_accuracy_train = np.mean(parametrosModelo.history['accuracy']) * 100
avg_accuracy_val = np.mean(parametrosModelo.history['val_accuracy']) * 100

print()
print(f"Accuracy promedio en entrenamiento: {avg_accuracy_train:.2f}%")
print(f"Accuracy promedio en validación: {avg_accuracy_val:.2f}%")

# Mostrar pesos y sesgos después del entrenamiento
print()
print("Pesos y sesgos después del entrenamiento:")
print_layer_weights(model)

model.save('diabetes_model2.h5')

test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Loss en el conjunto de prueba: {test_loss}")
print(f"Accuracy en el conjunto de prueba: {test_accuracy}")

# Hacer predicciones
predictions = model.predict(X_test_scaled)
predicted_classes = (predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Pesos y sesgos antes del entrenamiento:
Layer 0 weights:
[[ 0.04063994 -0.05072993  0.13651276 ...  0.11302426 -0.04847663
   0.08240421]
 [ 0.08878954 -0.1389369   0.01815511 ...  0.02413775 -0.02868775
   0.07922462]
 [-0.11575004  0.04836859  0.04187793 ... -0.13163708  0.06376694
  -0.05258293]
 ...
 [ 0.00498389  0.11721215 -0.12215046 ... -0.00060919 -0.09973914
  -0.12021938]
 [-0.10433164 -0.02308767  0.11986297 ... -0.08981217 -0.12512484
  -0.03728616]
 [-0.02509236 -0.13230301  0.05776756 ... -0.06344934 -0.12879084
  -0.08239449]]

Layer 0 biases:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Layer 1 (Dropout) no tiene pesos ni bias.

Layer 2 weights:
[[ 0.13763924 -0.05362898  0.1975135  ... -0.04624817  0.06065185
  -0.17811276]
 [-0.2085174   0.08905359 -0.18728955 ...  0.15063412  0.18036987
  -0.12660904]
 [-0.06148267 -

[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.9991 - loss: 8.4935e-04 - val_accuracy: 0.9978 - val_loss: 0.0020





Precisión durante el entrenamiento:
Precisión en el conjunto de entrenamiento:
[92.23966002464294, 99.26300048828125, 99.53323602676392, 99.60966110229492, 99.63696002960205, 99.69974160194397, 99.68882203102112, 99.71066117286682, 99.75160360336304, 99.78981614112854, 99.7434139251709, 99.83076453208923, 99.82803463935852, 99.8444139957428, 99.81438517570496, 99.8444139957428, 99.83076453208923, 99.8034656047821, 99.81438517570496, 99.78435635566711, 99.84168410301208, 99.80073571205139, 99.84168410301208, 99.86351728439331, 99.8444139957428, 99.86351728439331, 99.87170696258545, 99.82530474662781, 99.87443685531616, 99.89900588989258]

Precisión en el conjunto de validación:
[99.68066811561584, 99.68066811561584, 99.70523118972778, 99.6561050415039, 99.70523118972778, 99.6561050415039, 99.60697889328003, 99.6561050415039, 99.63154196739197, 99.53328371047974, 99.70523118972778, 99.6561050415039, 99.63154196739197, 99.63154196739197, 99.72979426383972, 99.70523118972778, 99.656105041

In [10]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Concatenate
from tensorflow.keras.models import load_model

# Cargar los modelos preentrenados
model1 = load_model('diabetes_model1.h5')
model2 = load_model('diabetes_model2.h5')

# Definir una nueva entrada para el modelo combinado
input_tensor = Input(shape=(X_train_scaled.shape[1],))

# Obtener las salidas de ambos modelos
output1 = model1(input_tensor)
output2 = model2(input_tensor)

# Concatenar las salidas
concatenated_output = Concatenate()([output1, output2])

# Añadir una capa densa final para producir la salida combinada
final_output = Dense(1, activation='sigmoid')(concatenated_output)

# Crear el nuevo modelo combinado
combined_model = Model(inputs=input_tensor, outputs=final_output)

# Compilar el modelo combinado
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Guardar el modelo combinado
combined_model.save('combined_diabetes_model.h5')

print("Modelo combinado guardado como 'combined_diabetes_model.h5'.")

# Cargar el modelo combinado
combined_model = load_model('combined_diabetes_model.h5')

# Evaluar el modelo combinado
loss, accuracy = combined_model.evaluate(X_test_scaled, y_test)
print(f"Pérdida: {loss:.2f}")
print(f"Precisión: {accuracy * 100:.2f}%")

# Predecir con el modelo combinado
predictions = combined_model.predict(X_test_scaled)
print(predictions)
print(predictions.shape)


FileNotFoundError: [Errno 2] Unable to synchronously open file (unable to open file: name = 'diabetes_model1.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

#### Cargar el CSV y separar los datos

In [7]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Cargar el dataset
df = pd.read_csv('processed_data.csv')

# Separar características y variable objetivo
X = df.drop(columns=['diabetesMed_Yes'])
Y = df['diabetesMed_Yes']

# Separar los datos para los dos modelos
X_train1 = X.iloc[:40706, :]
y_train1 = Y.iloc[:40706]

X_train2 = X.iloc[40707:81412, :]
y_train2 = Y.iloc[40707:81412]

# Datos para prueba
X_test = X.iloc[81412:, :]
y_test = Y.iloc[81412:]


#### Función para crear el modelo

In [8]:
def create_model(input_shape):
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(32, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    return model

#### Entrenar y guardar los modelos

In [9]:
# Normalizar los datos de entrada
scaler = StandardScaler()
X_train1_scaled = scaler.fit_transform(X_train1)
X_train2_scaled = scaler.fit_transform(X_train2)
X_test_scaled = scaler.transform(X_test)

# Crear y compilar los modelos
model1 = create_model(X_train1_scaled.shape[1])
model2 = create_model(X_train2_scaled.shape[1])

# Entrenar el primer modelo
optimizer1 = Adam(learning_rate=0.001)
model1.compile(optimizer=optimizer1, loss='binary_crossentropy', metrics=['accuracy'])
model1.fit(X_train1_scaled, y_train1, epochs=5, batch_size=64, validation_split=0.1)
model1.save('diabetes_model1.h5')

# Crear un nuevo optimizador y compilar el segundo modelo
optimizer2 = Adam(learning_rate=0.001)
model2.compile(optimizer=optimizer2, loss='binary_crossentropy', metrics=['accuracy'])
#optimizer2.build(model2.trainable_variables)
model2.fit(X_train2_scaled, y_train2, epochs=5, batch_size=64, validation_split=0.1)
model2.save('diabetes_model2.h5')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5


  saving_api.save_model(


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


#### Bagging y evaluación del modelo combinado
Para hacer el bagging, promediamos las predicciones de ambos modelos y evaluamos el rendimiento del modelo combinado.

In [10]:
from tensorflow.keras.models import load_model
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

# Cargar los modelos preentrenados
model1 = load_model('diabetes_model1.h5')
model2 = load_model('diabetes_model2.h5')

# Evaluar ambos modelos
predictions1 = model1.predict(X_test_scaled)
predictions2 = model2.predict(X_test_scaled)

# Promediar las predicciones
combined_predictions = (predictions1 + predictions2) / 2
predicted_classes = (combined_predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))

# Evaluar la precisión del modelo combinado
accuracy = np.mean(predicted_classes.flatten() == y_test)
print(f"Precisión del modelo combinado: {accuracy * 100:.2f}%")


Matriz de Confusión:
[[ 4162    17]
 [    3 16172]]

Reporte de Clasificación:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      4179
         1.0       1.00      1.00      1.00     16175

    accuracy                           1.00     20354
   macro avg       1.00      1.00      1.00     20354
weighted avg       1.00      1.00      1.00     20354

Precisión del modelo combinado: 99.90%


#### Crear un nuevo modelo combinado
Finalmente, para crear un nuevo modelo combinado que tenga las mismas capas que los modelos entrenados, podemos concatenar sus capas de salida antes de la última capa de salida:

In [11]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Average

# Definir una nueva entrada para el modelo combinado
input_tensor = Input(shape=(X_train1_scaled.shape[1],))

# Obtener las salidas de ambos modelos
output1 = model1(input_tensor)
output2 = model2(input_tensor)

# Promediar las salidas
averaged_output = Average()([output1, output2])

# Crear el nuevo modelo combinado
combined_model = Model(inputs=input_tensor, outputs=averaged_output)

# Compilar el modelo combinado
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Guardar el modelo combinado
combined_model.save('combined_diabetes_model.h5')

print("Modelo combinado guardado como 'combined_diabetes_model.h5'.")

# Evaluar el modelo combinado
loss, accuracy = combined_model.evaluate(X_test_scaled, y_test)
print(f"Pérdida: {loss:.2f}")
print(f"Precisión: {accuracy * 100:.2f}%")

# Predecir con el modelo combinado
predictions = combined_model.predict(X_test_scaled)
print(predictions)


  saving_api.save_model(


Modelo combinado guardado como 'combined_diabetes_model.h5'.
Pérdida: 0.01
Precisión: 99.90%
[[2.845340e-07]
 [1.000000e+00]
 [2.196625e-07]
 ...
 [1.000000e+00]
 [1.000000e+00]
 [3.842231e-07]]


#### Ver pesos y sesgos de los modelos entrenados y combinados

In [12]:
import numpy as np
from keras.models import load_model
from keras.layers import Dense, Dropout, Input
from keras.models import Sequential, Model
from keras.optimizers import Adam

# Función para imprimir pesos y biases
def print_layer_weights(models):
    max_layers = max(len(model.layers) for model in models)
    
    for layer_idx in range(max_layers):
        for model_idx, model in enumerate(models):
            if layer_idx < len(model.layers):
                layer = model.layers[layer_idx]
                weights = layer.get_weights()
                if len(weights) > 0:
                    weights, biases = weights
                    print(f"Model {model_idx + 1}, Layer {layer_idx} weights:\n{weights}\n")
                    print(f"Model {model_idx + 1}, Layer {layer_idx} biases:\n{biases}\n")
                else:
                    print(f"Model {model_idx + 1}, Layer {layer_idx} ({layer.__class__.__name__}) no tiene pesos ni bias.\n")
            else:
                print(f"Model {model_idx + 1}, Layer {layer_idx} no existe en este modelo.\n")
        print("-" * 80)

# Cargar modelos entrenados
model1 = load_model('diabetes_model1.h5')
model2 = load_model('diabetes_model2.h5')

# Crear el modelo combinado
input_shape = model1.input_shape[1]

combined_model = Sequential()
combined_model.add(Dense(64, activation='relu', input_shape=(input_shape,)))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(64, activation='relu'))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(32, activation='relu'))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(32, activation='relu'))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(1, activation='sigmoid'))

# Copiar pesos del primer modelo al modelo combinado
for layer_idx in range(len(model1.layers)):
    combined_model.layers[layer_idx].set_weights(model1.layers[layer_idx].get_weights())

# Ajustar pesos del segundo modelo al modelo combinado
for layer_idx in range(len(model2.layers)):
    weights1 = combined_model.layers[layer_idx].get_weights()
    weights2 = model2.layers[layer_idx].get_weights()
    combined_weights = [(w1 + w2) / 2 for w1, w2 in zip(weights1, weights2)]
    combined_model.layers[layer_idx].set_weights(combined_weights)

# Compilar el modelo combinado
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Imprimir pesos y biases de los modelos
print("Comparación de pesos y biases entre los modelos:")
print_layer_weights([model1, model2, combined_model])


Comparación de pesos y biases entre los modelos:
Model 1, Layer 0 weights:
[[-0.06191757 -0.08098182 -0.04410366 ... -0.07940952 -0.1017052
  -0.00617711]
 [-0.00965148 -0.04772649 -0.05453945 ...  0.03646876 -0.09333761
  -0.00939844]
 [ 0.13227929  0.00579428 -0.05832147 ...  0.0772763  -0.05367774
   0.06223681]
 ...
 [-0.06324483 -0.08523989 -0.02103165 ...  0.08674185 -0.05076953
  -0.00318669]
 [ 0.04394023 -0.10408543 -0.01934743 ...  0.00875132 -0.08530727
   0.10320699]
 [-0.12368436 -0.00354468  0.02977188 ... -0.16457503  0.01976795
  -0.0600509 ]]

Model 1, Layer 0 biases:
[ 0.21775964  0.17917053  0.09818418 -0.20169424  0.18850051  0.02824012
  0.1683878  -0.09930179 -0.04008741  0.18484135  0.2947634   0.0105831
 -0.11476576  0.1109532  -0.16854668  0.19045074  0.10019996 -0.08791085
  0.04309104 -0.20266037 -0.15516436 -0.07187007  0.05590915  0.06042239
  0.00512924  0.18720196  0.06794354 -0.04299501  0.17256473 -0.1665
  0.2110014  -0.02771115  0.15851715 -0.1849355 

# Para 5 modelos

#### Cargar el CSV y separar los datos

In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import confusion_matrix, classification_report

# Función para subdividir los datos
def subdivide_data(X, y, num_parts):
    size = len(X) // num_parts
    X_parts = [X.iloc[i*size:(i+1)*size, :] for i in range(num_parts)]
    y_parts = [y.iloc[i*size:(i+1)*size] for i in range(num_parts)]
    return X_parts, y_parts

# Cargar el dataset
df = pd.read_csv('processed_data.csv')

# Separar características y variable objetivo
X = df.drop(columns=['diabetesMed_Yes'])
y = df['diabetesMed_Yes']

# Subdividir los datos en 6 partes
num_parts = 6
X_parts, y_parts = subdivide_data(X, y, num_parts)

# Crear diccionarios para almacenar las partes de entrenamiento
X_train_parts = {}
y_train_parts = {}

# Asignar las subdivisiones a las variables correspondientes
for i in range(num_parts - 1):
    X_train_parts[f'X_train_{i+1}'] = X_parts[i]
    y_train_parts[f'y_train_{i+1}'] = y_parts[i]

# Guardar la última parte para pruebas
X_test = X_parts[-1]
y_test = y_parts[-1]

# Imprimir las formas para verificar
for i in range(num_parts - 1):
    print(f"X_train_{i+1} shape: {X_train_parts[f'X_train_{i+1}'].shape}, y_train_{i+1} shape: {y_train_parts[f'y_train_{i+1}'].shape}")

print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

X_train_1 shape: (16961, 213), y_train_1 shape: (16961,)
X_train_2 shape: (16961, 213), y_train_2 shape: (16961,)
X_train_3 shape: (16961, 213), y_train_3 shape: (16961,)
X_train_4 shape: (16961, 213), y_train_4 shape: (16961,)
X_train_5 shape: (16961, 213), y_train_5 shape: (16961,)
X_test shape: (16961, 213), y_test shape: (16961,)


#### Función para crear el modelo

In [31]:
def create_model(input_shape):
    model = Sequential()
    model.add(Dense(8, activation='relu', input_shape=(input_shape,)))
    model.add(Dropout(0.3))
    model.add(Dense(8, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(4, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(4, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(1, activation='sigmoid'))
    return model

#### Entrenar y guardar modelos

In [32]:
# Normalizar los datos de entrada
scalers = {}
for i in range(num_parts - 1):
    scalers[f'scaler_{i+1}'] = StandardScaler()
    X_train_parts[f'X_train_{i+1}_scaled'] = scalers[f'scaler_{i+1}'].fit_transform(X_train_parts[f'X_train_{i+1}'])

X_test_scaled = StandardScaler().fit_transform(X_test)

# Crear y entrenar los modelos
models = {}
optimizers = {}

for i in range(num_parts - 1):
    models[f'model_{i+1}'] = create_model(X_train_parts[f'X_train_{i+1}_scaled'].shape[1])
    optimizers[f'optimizer_{i+1}'] = Adam(learning_rate=0.001)
    models[f'model_{i+1}'].compile(optimizer=optimizers[f'optimizer_{i+1}'], loss='binary_crossentropy', metrics=['accuracy'])
    models[f'model_{i+1}'].fit(X_train_parts[f'X_train_{i+1}_scaled'], y_train_parts[f'y_train_{i+1}'], epochs=5, batch_size=64, validation_split=0.1)
    models[f'model_{i+1}'].save(f'diabetes_model_{i+1}.h5')


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5


  saving_api.save_model(


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


#### Bagging y evaluación del modelo combinado
Para hacer el bagging, promediamos las predicciones de ambos modelos y evaluamos el rendimiento del modelo combinado.

In [33]:
# Inicializar una lista para almacenar las predicciones de los modelos
all_predictions = []

# Cargar y predecir con cada modelo, luego almacenar las predicciones
for i in range(1, 6):
    model = load_model(f'diabetes_model_{i}.h5')
    predictions = model.predict(X_test_scaled)
    all_predictions.append(predictions)

# Promediar las predicciones de todos los modelos
combined_predictions = np.mean(all_predictions, axis=0)
predicted_classes = (combined_predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))

# Evaluar la precisión del modelo combinado
accuracy = np.mean(predicted_classes.flatten() == y_test)
print(f"Precisión del modelo combinado: {accuracy * 100:.2f}%")

Matriz de Confusión:
[[ 3525    24]
 [    1 13411]]

Reporte de Clasificación:
              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00      3549
         1.0       1.00      1.00      1.00     13412

    accuracy                           1.00     16961
   macro avg       1.00      1.00      1.00     16961
weighted avg       1.00      1.00      1.00     16961

Precisión del modelo combinado: 99.85%


#### Crear un nuevo modelo combinado
Finalmente, para crear un nuevo modelo combinado que tenga las mismas capas que los modelos entrenados, podemos concatenar sus capas de salida antes de la última capa de salida:

In [40]:
# Cargar los modelos preentrenados
models = [load_model(f'diabetes_model_{i}.h5') for i in range(1, 6)]

# Definir una nueva entrada para el modelo combinado
input_tensor = Input(shape=(X_test_scaled.shape[1],))

# Obtener las salidas de los cinco modelos
outputs = [model(input_tensor) for model in models]

# Promediar las salidas
averaged_output = Average()(outputs)

# Crear el nuevo modelo combinado
combined_model = Model(inputs=input_tensor, outputs=averaged_output)

# Compilar el modelo combinado
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Guardar el modelo combinado
combined_model.save('combined_diabetes_model.h5')

print("Modelo combinado guardado como 'combined_diabetes_model5.h5'.")

# Evaluar el modelo combinado
loss, accuracy = combined_model.evaluate(X_test_scaled, y_test)
print(f"Pérdida: {loss:.2f}")
print(f"Precisión: {accuracy * 100:.2f}%")

# Predecir con el modelo combinado
predictions = combined_model.predict(X_test_scaled)
predicted_classes = (predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))

  saving_api.save_model(


Modelo combinado guardado como 'combined_diabetes_model5.h5'.
Pérdida: 0.10
Precisión: 99.85%
Matriz de Confusión:
[[ 3525    24]
 [    1 13411]]

Reporte de Clasificación:
              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00      3549
         1.0       1.00      1.00      1.00     13412

    accuracy                           1.00     16961
   macro avg       1.00      1.00      1.00     16961
weighted avg       1.00      1.00      1.00     16961



#### Ver pesos y sesgos de los modelos entrenados y combinados

In [35]:
import numpy as np
from keras.models import load_model
from keras.layers import Dense, Dropout, Input
from keras.models import Sequential, Model
from keras.optimizers import Adam

# Función para imprimir pesos y biases
def print_layer_weights(models):
    max_layers = max(len(model.layers) for model in models)
    
    for layer_idx in range(max_layers):
        for model_idx, model in enumerate(models):
            if layer_idx < len(model.layers):
                layer = model.layers[layer_idx]
                weights = layer.get_weights()
                if len(weights) > 0:
                    weights, biases = weights
                    print(f"Model {model_idx + 1}, Layer {layer_idx} weights:\n{weights}\n")
                    print(f"Model {model_idx + 1}, Layer {layer_idx} biases:\n{biases}\n")
                else:
                    print(f"Model {model_idx + 1}, Layer {layer_idx} ({layer.__class__.__name__}) no tiene pesos ni bias.\n")
            else:
                print(f"Model {model_idx + 1}, Layer {layer_idx} no existe en este modelo.\n")
        print("-" * 80)

# Cargar modelos entrenados
models = [load_model(f'diabetes_model_{i}.h5') for i in range(1, 6)]

# Crear el modelo combinado
input_shape = models[0].input_shape[1]

combined_model = Sequential()
combined_model.add(Dense(8, activation='relu', input_shape=(input_shape,)))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(8, activation='relu'))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(4, activation='relu'))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(4, activation='relu'))
combined_model.add(Dropout(0.3))
combined_model.add(Dense(1, activation='sigmoid'))

# Copiar y promediar pesos de los cinco modelos al modelo combinado
for layer_idx in range(len(models[0].layers)):
    combined_weights = [np.zeros_like(weights) for weights in models[0].layers[layer_idx].get_weights()]
    for model in models:
        weights = model.layers[layer_idx].get_weights()
        for i in range(len(combined_weights)):
            combined_weights[i] += weights[i] / len(models)
    combined_model.layers[layer_idx].set_weights(combined_weights)

# Compilar el modelo combinado
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Imprimir pesos y biases de los modelos
print("Comparación de pesos y biases entre los modelos:")
print_layer_weights(models + [combined_model])

Comparación de pesos y biases entre los modelos:
Model 1, Layer 0 weights:
[[ 0.08754154  0.01318073  0.03278391 ...  0.12642643  0.12055822
  -0.08726312]
 [ 0.12308843  0.00311712  0.02473778 ...  0.07963285  0.04734337
  -0.08975643]
 [-0.05077969 -0.02989954 -0.06240862 ...  0.06857093 -0.16566604
  -0.1063541 ]
 ...
 [ 0.11018789 -0.02110373 -0.02447482 ...  0.02369241  0.02779322
  -0.12462737]
 [-0.19528095 -0.01349453 -0.03513342 ... -0.02802291 -0.0658963
   0.00754821]
 [ 0.06712667  0.00684241 -0.02309349 ...  0.13124083 -0.01194782
   0.03623368]]

Model 1, Layer 0 biases:
[ 0.17086786 -0.2858687  -0.26896858 -0.2406739   0.30710623 -0.23466906
 -0.16809085  0.03685481]

Model 2, Layer 0 weights:
[[-0.05055476  0.01312563 -0.05933542 ... -0.15204592 -0.01512105
   0.00504706]
 [-0.08810122 -0.03644716  0.04598607 ... -0.01449659  0.04549363
  -0.08985673]
 [ 0.02020413 -0.13084243 -0.03867763 ... -0.05034899 -0.05781659
  -0.01544108]
 ...
 [-0.00465415  0.02983462  0.08561