In [6]:
import numpy as np
import pandas as pd

df = pd.read_csv('diabetic_data.csv')

print(df.head())
print(df.info())

   encounter_id  patient_nbr             race  gender      age weight  \
0       2278392      8222157        Caucasian  Female   [0-10)      ?   
1        149190     55629189        Caucasian  Female  [10-20)      ?   
2         64410     86047875  AfricanAmerican  Female  [20-30)      ?   
3        500364     82442376        Caucasian    Male  [30-40)      ?   
4         16680     42519267        Caucasian    Male  [40-50)      ?   

   admission_type_id  discharge_disposition_id  admission_source_id  \
0                  6                        25                    1   
1                  1                         1                    7   
2                  1                         1                    7   
3                  1                         1                    7   
4                  1                         1                    7   

   time_in_hospital  ... citoglipton insulin  glyburide-metformin  \
0                 1  ...          No      No                   No

In [13]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Crear una copia del DataFrame original para trabajar
df_copy = df.copy()

# Reemplazar '?' por NaN en la copia
df_copy.replace("?", np.nan, inplace=True)

# Verificar y limpiar la variable objetivo 'readmitted'
if 'readmitted' in df_copy.columns:
    y = df_copy['readmitted'].copy()
    df_copy.drop(columns=['readmitted'], inplace=True)
    y = y.apply(lambda x: 0 if x == 'NO' else 1)
else:
    raise ValueError("La columna 'readmitted' no se encontró en el DataFrame.")

# Convertir variables categóricas a variables dummy
cat_cols = df_copy.select_dtypes(include=['object']).columns
df_copy = pd.get_dummies(df_copy, columns=cat_cols)

# Estandarizar variables numéricas
num_cols = df_copy.select_dtypes(include=['int64', 'float64']).columns
scaler = StandardScaler()
df_copy[num_cols] = scaler.fit_transform(df_copy[num_cols])

# Asignar X después de preprocesamiento
X = df_copy

# Guardar el DataFrame limpio en un nuevo archivo CSV
#df_copy.to_csv('DiabetesLimpio.csv', index=False)

# Verificar la forma y las columnas de X y y para asegurarnos de que todo esté correcto
print("Forma de X:", X.shape)
print("Forma de y:", y.shape)
print("Número de columnas en X:", len(X.columns))

# Información sobre el DataFrame preprocesado
print()
print("Información del DataFrame limpio:")
print(df_copy.info())

Forma de X: (101766, 2465)
Forma de y: (101766,)
Número de columnas en X: 2465

Información del DataFrame limpio:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101766 entries, 0 to 101765
Columns: 2465 entries, encounter_id to diabetesMed_Yes
dtypes: float64(13), uint8(2452)
memory usage: 248.1 MB
None


In [14]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split

X_subset = X.iloc[50883:, :]
y_subset = y.iloc[50883:]

# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_subset,y_subset, test_size=0.2, random_state=42)

# Normalizar los datos de entrada
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Inicializar el modelo
model = Sequential()

# Añadir capas densas
model.add(Dense(258, activation='relu', input_shape=(X_train_scaled.shape[1],)))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))  
model.add(Dropout(0.3)) 
model.add(Dense(32, activation='relu'))  
model.add(Dropout(0.3)) 
model.add(Dense(1, activation='sigmoid'))

# Compilar el modelo
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['accuracy'])

# Función para obtener y mostrar pesos y sesgos
def print_layer_weights(model):
    for i, layer in enumerate(model.layers):
        weights = layer.get_weights()
        if len(weights) > 0: 
            weights, biases = weights
            print(f"Layer {i} weights:\n{weights}\n")
            print(f"Layer {i} biases:\n{biases}\n")
        else:
            print(f"Layer {i} ({layer.__class__.__name__}) no tiene pesos ni bias.\n")


# Mostrar pesos y sesgos antes del entrenamiento
print("Pesos y sesgos antes del entrenamiento:")
print_layer_weights(model)

# Entrenar el modelo
parametrosModelo = model.fit(X_train_scaled, y_train, epochs=30, batch_size=64, validation_split=0.1)

# Mostrar precisión después del entrenamiento
print()
print("Precisión durante el entrenamiento:")
print("Precisión en el conjunto de entrenamiento:")
print([acc * 100 for acc in parametrosModelo.history['accuracy']])
print()
print("Precisión en el conjunto de validación:")
print([acc * 100 for acc in parametrosModelo.history['val_accuracy']]) 

# Calcular accuracy promedio
avg_accuracy_train = np.mean(parametrosModelo.history['accuracy']) * 100
avg_accuracy_val = np.mean(parametrosModelo.history['val_accuracy']) * 100

print()
print(f"Accuracy promedio en entrenamiento: {avg_accuracy_train:.2f}%")
print(f"Accuracy promedio en validación: {avg_accuracy_val:.2f}%")

# Mostrar pesos y sesgos después del entrenamiento
print()
print("Pesos y sesgos después del entrenamiento:")
print_layer_weights(model)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Pesos y sesgos antes del entrenamiento:
Layer 0 weights:
[[-1.7782612e-02  1.6994663e-03  4.2044688e-02 ...  3.3657428e-02
   2.0636659e-02  2.6258003e-02]
 [ 1.7070446e-02 -3.0590205e-02  1.6307686e-02 ... -2.8216915e-02
   4.2368259e-02  1.9513667e-03]
 [-3.3753216e-02  3.4142535e-02  1.0396037e-02 ... -3.8298782e-02
   1.7113242e-02  1.0874223e-02]
 ...
 [ 4.3874983e-02  1.6085047e-02  2.0299185e-02 ... -2.7538681e-02
  -2.1521002e-05 -1.3466179e-03]
 [-3.2429341e-02  1.9418631e-02  7.0847198e-04 ... -4.3750733e-02
   2.6270907e-02 -4.1971169e-03]
 [-6.7501068e-03 -4.2376965e-02 -3.9947908e-02 ...  4.1013759e-02
  -2.6290238e-02 -3.8821395e-02]]

Layer 0 biases:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 

[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.8674 - loss: 0.0975 - val_accuracy: 0.5996 - val_loss: 0.2906
Epoch 25/30
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.8759 - loss: 0.0926 - val_accuracy: 0.6092 - val_loss: 0.3010
Epoch 26/30
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.8784 - loss: 0.0910 - val_accuracy: 0.6050 - val_loss: 0.2985
Epoch 27/30
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 10ms/step - accuracy: 0.8818 - loss: 0.0888 - val_accuracy: 0.5976 - val_loss: 0.3044
Epoch 28/30
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 9ms/step - accuracy: 0.8851 - loss: 0.0856 - val_accuracy: 0.5994 - val_loss: 0.3045
Epoch 29/30
[1m573/573[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 8ms/step - accuracy: 0.8942 - loss: 0.0809 - val_accuracy: 0.5981 - val_loss: 0.2988
Epoch 30/30
[1m573/573[0m [3

In [10]:
# Evaluar el modelo
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test)
print(f"Loss en el conjunto de prueba: {test_loss}")
print(f"Accuracy en el conjunto de prueba: {test_accuracy}")

# Hacer predicciones
predictions = model.predict(X_test_scaled)
predicted_classes = (predictions > 0.5).astype("int32")

# Evaluar métricas adicionales
print("Matriz de Confusión:")
print(confusion_matrix(y_test, predicted_classes))

print("\nReporte de Clasificación:")
print(classification_report(y_test, predicted_classes))

[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6115 - loss: 0.3047
Loss en el conjunto de prueba: 0.30572623014450073
Accuracy en el conjunto de prueba: 0.6082342267036438
[1m319/319[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
Matriz de Confusión:
[[3504 2043]
 [1944 2686]]

Reporte de Clasificación:
              precision    recall  f1-score   support

           0       0.64      0.63      0.64      5547
           1       0.57      0.58      0.57      4630

    accuracy                           0.61     10177
   macro avg       0.61      0.61      0.61     10177
weighted avg       0.61      0.61      0.61     10177

