## Pretrained Model Resnet50 for Transfer

In [1]:
!pip install optuna
import urllib
import numpy as np
import tensorflow as tf
import optuna
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.metrics import AUC
from tensorflow.keras.preprocessing.image import img_to_array, load_img

Collecting optuna
  Downloading optuna-4.0.0-py3-none-any.whl.metadata (16 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.2-py3-none-any.whl.metadata (7.4 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.5-py3-none-any.whl.metadata (2.9 kB)
Downloading optuna-4.0.0-py3-none-any.whl (362 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m362.8/362.8 kB[0m [31m16.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.13.2-py3-none-any.whl (232 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.0/233.0 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading Mako-1.3.5-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: M

In [2]:
import sys
assert sys.version_info >= (3, 7)

from packaging import version
import sklearn
assert version.parse(sklearn.__version__) >= version.parse("1.0.1")

import tensorflow as tf
assert version.parse(tf.__version__) >= version.parse("2.8.0")

import numpy as np

In [3]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, RMSprop
import optuna
from sklearn.model_selection import train_test_split
import urllib

In [4]:
urllib.request.urlretrieve(
    'https://raw.githubusercontent.com/sergiomora03/AdvancedTopicsAnalytics/main/notebooks/img/ImportImagenesURL.py',
    'ImportImagenes.py'
)

('ImportImagenes.py', <http.client.HTTPMessage at 0x79dd724f7c70>)

In [5]:
from ImportImagenes import *

In [6]:
X, Y = import_imagenes()

In [7]:
print("X shape:", X.shape)
print("Y shape:", Y.shape)

X shape: (14700, 250)
Y shape: (1, 250)


In [8]:
CE_x, CV_x, CE_y, CV_y = train_test_split(X.T, Y.T, test_size=0.3, random_state=100)

In [9]:
#Redimensionar las imágenes para el modelo
def resize_images(images, target_size):
    resized_images = np.empty((images.shape[0], *target_size, 3))
    for i in range(images.shape[0]):
        img = images[i].reshape((70, 70, 3))  # Suponiendo que las imágenes están en 70x70
        img = tf.image.resize(img, target_size)
        resized_images[i] = img
    return resized_images

In [10]:
# Redimensionar imágenes a (224, 224) para VGG16
target_size = (224, 224)
CE_x_resized = resize_images(CE_x, target_size)
CV_x_resized = resize_images(CV_x, target_size)

In [11]:
# Verificar tamaños de las imágenes redimensionadas
print("Forma de CE_x_resized:", CE_x_resized.shape)
print("Forma de CV_x_resized:", CV_x_resized.shape)
print("Forma de CE_y:", CE_y.shape)
print("Forma de CV_y:", CV_y.shape)

Forma de CE_x_resized: (175, 224, 224, 3)
Forma de CV_x_resized: (75, 224, 224, 3)
Forma de CE_y: (175, 1)
Forma de CV_y: (75, 1)


In [23]:
# Función objetivo para Optuna
def objective(trial):
    # Valores de hiperparámetros sugeridos
    num_dense_units = trial.suggest_int('num_dense_units', 128, 512)
    activation = trial.suggest_categorical('activation', ['relu', 'tanh'])
    l2_reg = trial.suggest_loguniform('l2_reg', 1e-6, 1e-2)
    optimizer_name = trial.suggest_categorical('optimizer', ['adam', 'rmsprop'])
    epochs = trial.suggest_int('epochs', 5, 10)  # Número de épocas

    # Cargar el modelo base sin la parte superior (top)
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

    # Agregar nuevas capas
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(num_dense_units, activation=activation, kernel_regularizer=l2(l2_reg))(x)
    predictions = Dense(1, activation='sigmoid')(x)  # 1 salida para clasificación binaria

    model = Model(inputs=base_model.input, outputs=predictions)

    # Congelar las capas base del modelo
    for layer in base_model.layers:
        layer.trainable = False

    # Configurar el optimizador
    lr = trial.suggest_loguniform('lr', 1e-6, 1e-1)  # learning rate
    if optimizer_name == 'adam':
        optimizer = Adam(learning_rate=lr)
    else:
        optimizer = RMSprop(learning_rate=lr)

    # Compilar el modelo con AUC como métrica
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[tf.keras.metrics.AUC(name='auc')])


    # Entrenar el modelo
    history = model.fit(CE_x_resized, CE_y,
                        epochs=epochs,
                        batch_size=32,  # Puedes ajustar esto también si es necesario
                        validation_data=(CV_x_resized, CV_y),
                        verbose=0)  # 'verbose=0' para evitar salida de entrenamiento

    # Evaluar el modelo en el conjunto de validación
    loss, accuracy = model.evaluate(CV_x_resized, CV_y, verbose=0)

    # Optuna busca maximizar la precisión, así que devolvemos la precisión
    return accuracy


In [24]:
# Crear un estudio de Optuna y optimizar
study = optuna.create_study(direction='maximize')  # Maximizar la precisión
study.optimize(objective, n_trials=10)  # Número de pruebas reducido para pruebas rápidas


[I 2024-09-04 02:28:15,196] A new study created in memory with name: no-name-94958fda-e7fd-4a38-8212-66bd20c6edd0
  l2_reg = trial.suggest_loguniform('l2_reg', 1e-6, 1e-2)
  lr = trial.suggest_loguniform('lr', 1e-6, 1e-1)  # learning rate
[I 2024-09-04 02:28:35,055] Trial 0 finished with value: 0.7347384095191956 and parameters: {'num_dense_units': 341, 'activation': 'relu', 'l2_reg': 0.003058754019102797, 'optimizer': 'adam', 'epochs': 7, 'lr': 1.7689818223001673e-06}. Best is trial 0 with value: 0.7347384095191956.
[I 2024-09-04 02:28:52,098] Trial 1 finished with value: 0.7605377435684204 and parameters: {'num_dense_units': 443, 'activation': 'relu', 'l2_reg': 3.6268752692834254e-05, 'optimizer': 'adam', 'epochs': 8, 'lr': 2.0525894584753237e-06}. Best is trial 1 with value: 0.7605377435684204.
[I 2024-09-04 02:29:16,179] Trial 2 finished with value: 0.8208575248718262 and parameters: {'num_dense_units': 369, 'activation': 'tanh', 'l2_reg': 9.801110583858983e-05, 'optimizer': 'adam'

In [26]:
# Imprimir los mejores hiperparámetros encontrados
print("Mejores hiperparámetros encontrados:")
print(study.best_params)

# Mostrar el mejor resultado
print(f"Mejor AUC encontrado: {study.best_value}")

Mejores hiperparámetros encontrados:
{'num_dense_units': 502, 'activation': 'tanh', 'l2_reg': 9.507299889625621e-05, 'optimizer': 'adam', 'epochs': 7, 'lr': 0.000528435615539255}
Mejor AUC encontrado: 0.8702760934829712


In [27]:
# Definir el mejor conjunto de hiperparámetros
best_params = study.best_params

In [28]:
# Cargar el modelo base sin la parte superior (top) con los mejores hiperparámetros
base_model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))


In [29]:
# Agregar nuevas capas con los mejores hiperparámetros
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(best_params['num_dense_units'], activation=best_params['activation'], kernel_regularizer=l2(best_params['l2_reg']))(x)
predictions = Dense(1, activation='sigmoid')(x)  # 1 salida para clasificación binaria


In [30]:
# Crear el modelo
model = Model(inputs=base_model.input, outputs=predictions)

# Congelar las capas base del modelo
for layer in base_model.layers:
    layer.trainable = False

In [31]:
# Configurar el optimizador con el mejor learning rate
if best_params['optimizer'] == 'adam':
    optimizer = Adam(learning_rate=best_params['lr'])
else:
    optimizer = RMSprop(learning_rate=best_params['lr'])

In [35]:
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[tf.keras.metrics.AUC(name='auc')])


# Entrenar el modelo con el conjunto completo de entrenamiento
model.fit(CE_x_resized, CE_y,
          epochs=best_params['epochs'],
          batch_size=32,  # Puedes ajustar esto también si es necesario
          validation_data=(CV_x_resized, CV_y))

# Evaluar el modelo en el conjunto de validación
loss, accuracy = model.evaluate(CE_x_resized, CE_y)
print(f'Pérdida en el conjunto de entrenamiento: {loss}')
print(f'Precisión en el conjunto de entrenamiento: {accuracy}')

Epoch 1/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 891ms/step - auc: 0.9703 - loss: 0.3437 - val_auc: 0.9480 - val_loss: 0.3778
Epoch 2/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 202ms/step - auc: 0.9737 - loss: 0.3308 - val_auc: 0.9506 - val_loss: 0.3665
Epoch 3/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 259ms/step - auc: 0.9707 - loss: 0.3210 - val_auc: 0.9517 - val_loss: 0.3654
Epoch 4/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 256ms/step - auc: 0.9592 - loss: 0.3419 - val_auc: 0.9535 - val_loss: 0.3599
Epoch 5/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 201ms/step - auc: 0.9776 - loss: 0.2933 - val_auc: 0.9546 - val_loss: 0.3726
Epoch 6/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 259ms/step - auc: 0.9728 - loss: 0.2976 - val_auc: 0.9557 - val_loss: 0.3511
Epoch 7/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 262ms/step - auc: 0.9779 - loss: 0

In [32]:
# Compilar el modelo
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=[tf.keras.metrics.AUC(name='auc')])


# Entrenar el modelo con el conjunto completo de entrenamiento
model.fit(CE_x_resized, CE_y,
          epochs=best_params['epochs'],
          batch_size=32,  # Puedes ajustar esto también si es necesario
          validation_data=(CV_x_resized, CV_y))

# Evaluar el modelo en el conjunto de validación
loss, accuracy = model.evaluate(CV_x_resized, CV_y)
print(f'Pérdida en el conjunto de validación: {loss}')
print(f'Precisión en el conjunto de validación: {accuracy}')

Epoch 1/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 982ms/step - auc: 0.5412 - loss: 0.7597 - val_auc: 0.8165 - val_loss: 0.6767
Epoch 2/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 217ms/step - auc: 0.7609 - loss: 0.6783 - val_auc: 0.8219 - val_loss: 0.6306
Epoch 3/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 218ms/step - auc: 0.8161 - loss: 0.6236 - val_auc: 0.8190 - val_loss: 0.5989
Epoch 4/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 222ms/step - auc: 0.7791 - loss: 0.6119 - val_auc: 0.8296 - val_loss: 0.5778
Epoch 5/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 270ms/step - auc: 0.8448 - loss: 0.5544 - val_auc: 0.8365 - val_loss: 0.5581
Epoch 6/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 266ms/step - auc: 0.8182 - loss: 0.5644 - val_auc: 0.8499 - val_loss: 0.5464
Epoch 7/7
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 279ms/step - auc: 0.8117 - loss: 0