## Taller 7

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras

In [2]:
# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)
# Set the environment flag for determinism
tf.config.experimental.enable_op_determinism()

In [3]:
df = pd.read_csv('heart.csv')

In [None]:
df.head()

Descripción de las variables:

https://archive.ics.uci.edu/dataset/45/heart+disease


Exploremos el tamaño del df

In [None]:
df.shape

Identificamos NAs en los datos

In [None]:
df.isna().sum()

Definimos listas para las variables categóricas enteras, categóricas string y numéricas.

In [7]:
## Reducimos el numero de variables categoricas a 4
cat_int_feats = ['sex', 'cp', 'fbs', 'exang']

In [8]:
cat_str_feats = ['thal']

In [9]:
# Reducimos el numero de variables numericas a 4
num_feats = ['age', 'chol', 'oldpeak', 'slope']

Agregamos las listas de categorías

In [10]:
feats_ordered = cat_int_feats+cat_str_feats+num_feats

Reordenamos el dataframe de acuerdo con el tipo de variable

In [11]:
df = df[feats_ordered+['target']]

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df["target"].unique()

In [None]:
df.dtypes

Separamos los datos en entrenamiento, validación y prueba

In [None]:
train = df.sample(frac=0.8, random_state=100)
train.head()

In [None]:
train.shape

In [None]:
test = df.drop(train.index)
test.head()

In [19]:
val = train.sample(frac=0.2, random_state=100)

In [None]:
val.shape

In [21]:
train = train.drop(val.index)

In [None]:
print(train.shape)
print(val.shape)
print(test.shape)

Calculamos estadísticas de cada variable numérica

In [None]:
train.describe()

Función para convertir de dataframe (pandas) a dataset (tensorflow), separando características y etiquetas

In [24]:
def dataframe_to_dataset(dataframe):
    dataframe = dataframe.copy()
    labels = dataframe.pop("target")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    ds = ds.shuffle(buffer_size=len(dataframe))
    return ds

In [25]:
train_ds = dataframe_to_dataset(train)
val_ds = dataframe_to_dataset(val)
test_ds = dataframe_to_dataset(test)

In [None]:
type(train_ds)

Ejemplo de cómo queda el tf.dataset

In [None]:
for x, y in train_ds.take(1):
    print("Input:", x)
    print("Target:", y)

Separamos los datos de entrenamiento, validación y prueba en lotes

In [28]:
batch_size = 32
train_ds = train_ds.batch(batch_size)
test_ds = test_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)

Función para codificar variables numéricas (Keras docs)

In [29]:
def encode_numerical_feature(feature, name, dataset):
    # Crea capa de normalización para este feature
    normalizer = keras.layers.Normalization()

    # Prepara el dataset para considerar únicamente la feature de interés (name)
    feature_ds = dataset.map(lambda x, y: x[name]) # selecciona variable
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # deja el tensor de una dimensión

    # Aprende las estadísticas de los datos (media, varianza)
    normalizer.adapt(feature_ds)

    # Aplica la normalización a la variable
    encoded_feature = normalizer(feature)
    return encoded_feature

Función para codificar variables categóricas (Keras docs)

In [30]:
def encode_categorical_feature(feature, name, dataset, is_string):
    lookup_class = keras.layers.StringLookup if is_string else keras.layers.IntegerLookup
    # Crea una capa Lookup para retornas variables 0/1 (dummies)
    # lookup: busca el valor correspondiente de la variable categórica
    lookup = lookup_class(output_mode="binary")

    # Prepara el dataset para considerar únicamente la feature de interés (name)
    feature_ds = dataset.map(lambda x, y: x[name]) # selecciona variable
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1)) # deja el tensor de una dimensión

    # Aprende el conjunto de posibles valores que toma la variable categórica y asigna enteros
    lookup.adapt(feature_ds)

    # Aplica la conversión de categorías a enteros
    encoded_feature = lookup(feature)
    return encoded_feature

Creamos una lista de inputs para el modelo, de acuerdo con cada tipo de variable

In [None]:
inputs = []
for i in cat_int_feats:
  inputs.append(keras.Input(shape=(1,), name=i, dtype="int64"))

In [32]:
for i in cat_str_feats:
  inputs.append(keras.Input(shape=(1,), name=i, dtype="string"))

In [33]:
for i in num_feats:
  inputs.append(keras.Input(shape=(1,), name=i))

In [None]:
for i in inputs:
   print(i)

Creamos una lista de variables codificadas/normalizadas de acuerdo con su tipo, empleando las funciones de codificación/normalización

In [35]:
feats_encoded=[]

In [None]:
for i,feat in enumerate(cat_int_feats):
  feats_encoded.append(
      encode_categorical_feature(inputs[i], feat, train_ds, False)
  )

In [None]:
len_feats = len(feats_encoded)
len_feats

In [38]:
for i,feat in enumerate(cat_str_feats):
  feats_encoded.append(
      encode_categorical_feature(inputs[len_feats+i], feat, train_ds, True)
  )

In [None]:
len_feats = len(feats_encoded)
len_feats

In [40]:
for i,feat in enumerate(num_feats):
  feats_encoded.append(
      encode_numerical_feature(inputs[len_feats+i], feat, train_ds)
  )

In [None]:
for i in feats_encoded:
  print(i)

Creamos una capa concatenando todas las variables codificadas

In [42]:
all_feats = keras.layers.concatenate(feats_encoded)

In [None]:
type(all_feats)

Agregamos una capa densa con 32 neuronas y función de activación relu

In [44]:
model_layers = keras.layers.Dense(32, activation='relu')(all_feats)

Agregamos la capa de salida con 1 neurona (probabilidad de sufrir la enfermedad cardiada) y función de activación sigmoide

In [45]:
model_layers = keras.layers.Dense(1, activation='sigmoid')(model_layers)

Creamos el modelo con las capas ya creadas y las variables de entrada

In [46]:
model = keras.Model(inputs, model_layers)

Compilamos el modelo, definiendo optimizador, función de pérdida y métricas adicionales a capturar

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])


In [None]:
keras.utils.plot_model(model, show_shapes=True, rankdir="LR")

Aseguramos que Keras use TensorFlow como backend, para asegurar que el modelo pueda usar strings como entradas

In [49]:
import os
os.environ["KERAS_BACKEND"] = "tensorflow"

In [None]:
model.summary()

Entrenamos el modelo con los datos en el formato tf.Dataset

In [None]:
history = model.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
# plt.plot(history.history['accuracy'],label='accuracy')
# plt.plot(history.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Modelo base")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
# plt.plot(history.history['loss'], label='loss')
# plt.plot(history.history['val_loss'], label='val_loss')
plt.plot(history.history['accuracy'],label='accuracy')
plt.plot(history.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Modelo base")
plt.legend()

In [None]:
results = model.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados para modelo base")
for name, value in zip(model.metrics_names, results):
    print(f"{name}: {value}")

## Punto 3 - 1 (optimizador = SGD)

In [55]:
from tensorflow.keras.optimizers import SGD, Adam, RMSprop

In [56]:
optimizer_exp1 = SGD(learning_rate=0.01, momentum=0.9)

In [57]:
model_exp1 = keras.Model(inputs, model_layers)

Compilamos el modelo, definiendo optimizador, función de pérdida y métricas adicionales a capturar

In [58]:
model_exp1.compile(optimizer=optimizer_exp1, loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])


In [None]:
keras.utils.plot_model(model_exp1, show_shapes=True, rankdir="LR")

In [None]:
model_exp1.summary()

In [None]:
history_exp1 = model_exp1.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_exp1.history['loss'], label='loss')
plt.plot(history_exp1.history['val_loss'], label='val_loss')
# plt.plot(history.history['accuracy'],label='accuracy')
# plt.plot(history.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 3 - 1 SGD")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
# plt.plot(history.history['loss'], label='loss')
# plt.plot(history.history['val_loss'], label='val_loss')
plt.plot(history_exp1.history['accuracy'],label='accuracy')
plt.plot(history_exp1.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 3 - 1 SGD")
plt.legend()

In [None]:
results_exp1 = model_exp1.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 3 - 1 SGD")
for name, value in zip(model_exp1.metrics_names, results_exp1):
    print(f"{name}: {value}")

## Punto 3 - 2 (RMS)

In [65]:
optimizer_exp2 = RMSprop(learning_rate=0.001)

In [66]:
model_exp2 = keras.Model(inputs, model_layers)

Compilamos el modelo, definiendo optimizador, función de pérdida y métricas adicionales a capturar

In [67]:
model_exp2.compile(optimizer=optimizer_exp2, loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])


In [None]:
model_exp2.summary()

In [None]:
history_exp2 = model_exp2.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_exp2.history['loss'], label='loss')
plt.plot(history_exp2.history['val_loss'], label='val_loss')
# plt.plot(history.history['accuracy'],label='accuracy')
# plt.plot(history.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 3 - 2 RMS")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
# plt.plot(history.history['loss'], label='loss')
# plt.plot(history.history['val_loss'], label='val_loss')
plt.plot(history_exp2.history['accuracy'],label='accuracy')
plt.plot(history_exp2.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 3 - 2 RMS")
plt.legend()

In [None]:
results_exp2 = model_exp2.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 3 - 2 RMS")
for name, value in zip(model_exp2.metrics_names, results_exp1):
    print(f"{name}: {value}")

## Punto 4 - 1 (tasa = 0.01)

In [85]:
model_4_1 = keras.Model(inputs, model_layers)

In [86]:
learning_rate_1 = 0.01
optimizer = Adam(learning_rate=learning_rate_1)
model_4_1.compile(optimizer=optimizer, loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])

In [None]:
model_4_1.summary()

In [None]:
history_4_1 = model_4_1.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_4_1.history['loss'], label='loss')
plt.plot(history_4_1.history['val_loss'], label='val_loss')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 4 - 1 (0.01)")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_4_1.history['accuracy'],label='accuracy')
plt.plot(history_4_1.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 4 - 1 (0.01)")
plt.legend()

In [None]:
results_4_1 = model_4_1.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 4 - 1 (0.01)")
for name, value in zip(model_4_1.metrics_names, results_4_1):
    print(f"{name}: {value}")

## Punto 4 - 2 (tasa = 0.0001)

In [94]:
model_4_2 = keras.Model(inputs, model_layers)

In [95]:
learning_rate_2 = 0.0001
optimizer = Adam(learning_rate=learning_rate_2)
model_4_2.compile(optimizer=optimizer, loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])

In [None]:
model_4_2.summary()

In [None]:
history_4_2 = model_4_2.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_4_2.history['loss'], label='loss')
plt.plot(history_4_2.history['val_loss'], label='val_loss')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 4 - 2 (0.0001)")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_4_2.history['accuracy'],label='accuracy')
plt.plot(history_4_2.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 4 - 2 (0.0001)")
plt.legend()

In [None]:
results_4_2 = model_4_2.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 4 - 2 (0.0001)")
for name, value in zip(model_4_2.metrics_names, results_4_2):
    print(f"{name}: {value}")

## Punto 4 - 3 (tasa = 0.005)

In [93]:
model_4_3 = keras.Model(inputs, model_layers)

In [96]:
learning_rate_3 = 0.005
optimizer = Adam(learning_rate=learning_rate_3)
model_4_3.compile(optimizer=optimizer, loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])

In [None]:
model_4_3.summary()

In [None]:
history_4_3 = model_4_3.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_4_3.history['loss'], label='loss')
plt.plot(history_4_3.history['val_loss'], label='val_loss')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 4 - 3 (0.005)")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_4_3.history['accuracy'],label='accuracy')
plt.plot(history_4_3.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 4 - 3 (0.005)")
plt.legend()

In [None]:
results_4_3 = model_4_3.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 4 - 3 (0.005)")
for name, value in zip(model_4_3.metrics_names, results_4_3):
    print(f"{name}: {value}")

## Punto 6 - 1 (Tanh-sigmoid)

In [None]:
model_layers1 = keras.layers.Dense(32, activation='tanh')(all_feats)
model_layers1 = keras.layers.Dense(1, activation='sigmoid')(model_layers1)

In [None]:
model_6_1 = keras.Model(inputs, model_layers1)

In [None]:
model_6_1.compile(optimizer='adam', loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])

In [None]:
model_6_1.summary()

In [None]:
history_6_1 = model_6_1.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_6_1.history['loss'], label='loss')
plt.plot(history_6_1.history['val_loss'], label='val_loss')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 6 - 1 (tanh-sigmoid)")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_6_1.history['accuracy'],label='accuracy')
plt.plot(history_6_1.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 6 - 1 (tanh-sigmoid)")
plt.legend()

In [None]:
results_6_1 = model_6_1.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 6 - 1 (tanh-sigmoid)")
for name, value in zip(model_6_1.metrics_names, results_6_1):
    print(f"{name}: {value}")

## Punto 6 - 2 (sigmoid-relu)

In [None]:
model_layers2 = keras.layers.Dense(32, activation='sigmoid')(all_feats)
model_layers2 = keras.layers.Dense(1, activation='relu')(model_layers2)

In [None]:
model_6_2 = keras.Model(inputs, model_layers2)

In [None]:
model_6_2.compile(optimizer='adam', loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])

In [None]:
model_6_2.summary()

In [None]:
history_6_2 = model_6_2.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
plt.plot(history_6_2.history['loss'], label='loss')
plt.plot(history_6_2.history['val_loss'], label='val_loss')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 6 - 2 (sigmoid-relu)")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_6_2.history['accuracy'],label='accuracy')
plt.plot(history_6_2.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 6 - 2 (sigmoid-relu)")
plt.legend()

In [None]:
results_6_2 = model_6_2.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 6 - 2 (sigmoid-relu)")
for name, value in zip(model_6_2.metrics_names, results_6_2):
    print(f"{name}: {value}")

## Punto 6 - 3 (relu-tanh)

In [None]:
model_layers3 = keras.layers.Dense(32, activation='relu')(all_feats)
model_layers3 = keras.layers.Dense(1, activation='tanh')(model_layers3)

In [None]:
model_6_3 = keras.Model(inputs, model_layers3)

In [None]:
model_6_3.compile(optimizer='adam', loss='binary_crossentropy',  metrics=["accuracy", tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),tf.keras.metrics.AUC(name='roc_auc')])

In [None]:
model_6_3.summary()

In [None]:
history_6_3 = model_6_3.fit(train_ds, epochs=50, validation_data=val_ds)

In [None]:
plt.plot(history_6_3.history['loss'], label='loss')
plt.plot(history_6_3.history['val_loss'], label='val_loss')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Loss Function')
plt.title("Punto 6 - 3 (relu-tanh)")
plt.legend()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history_6_3.history['accuracy'],label='accuracy')
plt.plot(history_6_3.history['val_accuracy'],label='val_accuracy')
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title("Punto 6 - 3 (relu-tanh)")
plt.legend()

In [None]:
results_6_3 = model_6_3.evaluate(test_ds, verbose=0)

# Assuming you have the metric names as defined in `model.metrics_names`
print("Resultados Punto 6 - 3(relu-tanh)")
for name, value in zip(model_6_3.metrics_names, results_6_3):
    print(f"{name}: {value}")