# Hyperparameter Search

Hyperparameter tuning is crucial in machine learning because it allows us to find the optimal settings for the model's parameters that maximize performance. By exploring different combinations of hyperparameters, we can fine-tune the model's behavior, improve its accuracy, and prevent issues such as overfitting or underfitting.

### Load the GPU 

In [None]:
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print("Number of GPUs Available: ", len(physical_devices))
# aactiver el uso de la GPU
tf.config.experimental.set_visible_devices(physical_devices[0], 'GPU')
# liberar espacio de la memoria de la GPU
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [1]:

import optuna
from optuna import Trial
from optuna.samplers import TPESampler
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from sklearn.utils import shuffle
import glob
import random
import numpy as np
from PIL import Image
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from keras.preprocessing.image import ImageDataGenerator
from preprocess import *

#### Data for load the images 

In [3]:
real_images_path = '../data/dataset/data/CASIA2/Au/*.*'
fake_images_path = '../data/dataset/data/CASIA2/Tp/*.*'
image_size = (128, 128)

In [4]:
def build_model(filters, dropout, learning_rate, image_size):
    model = Sequential()
    model.add(Conv2D(filters=filters, kernel_size=(3, 3), padding='same', activation='relu', input_shape=(image_size[0], image_size[1], 3)))
    model.add(Conv2D(filters=filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Conv2D(filters=2*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(Conv2D(filters=2*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Conv2D(filters=4*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(Conv2D(filters=4*filters, kernel_size=(3, 3), padding='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(dropout))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(2, activation='softmax'))

    return model

In [6]:
def train_model(model, train_generator,val_generator, batch_size, epochs, learning_rate):
    early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=5, verbose=0, mode='auto')
    checkpoint = ModelCheckpoint('../models/model.h5', monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(
        train_generator,
        steps_per_epoch=len(train_generator),
        batch_size=batch_size,
        epochs=epochs,
        validation_data=(val_generator),
        callbacks=[early_stopping, checkpoint]
    )
    return model

In [None]:
def objective(trial: Trial, X, y):
    # Definir los hiperparámetros a optimizar
    filters = trial.suggest_categorical('filters', [16, 32, 64])
    dropout = trial.suggest_uniform('dropout', 0.1, 0.5)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-4, 1e-1)

    batch_size = 32
    epochs = 50

    X = np.array(X).reshape(-1, image_size[0], image_size[1], 3)
    y = to_categorical(y, num_classes=2)

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=5)
    X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=5)

    train_generator = ImageDataGenerator().flow(X_train, y_train, batch_size=batch_size)
    val_generator = ImageDataGenerator().flow(X_val, y_val, batch_size=batch_size)


    model = build_model(filters, dropout, learning_rate, image_size)
    model = train_model(model, train_generator, val_generator, batch_size, epochs, learning_rate)

    # Calculamos la precisión del modelo con los datos de test
    score = model.evaluate(X_test, y_test)

    #ir guardando en tiempo real los resultados en un fichero de texto, con el encabezado de la primera iteración
    with open('../models/results.txt', 'a') as f:
        if trial.number == 0:
            f.write('iteration,filters,dropout,learning_rate,accuracy\n')
        f.write('{},{},{},{},{}\n'.format(trial.number, filters, dropout, learning_rate, score[1]))

    #liberar la memoria de la GPU
    tf.keras.backend.clear_session()


    return score[1]  # Precisión del modelo

In [None]:
if __name__ == '__main__':

    sampler = TPESampler(seed=0)

    #cargar las imágenes
    X = []
    y = []
    
    print("Cargando datos de imágenes reales...")
    for file_path in glob.glob(real_images_path):
        X.append(preparete_image_ela(file_path, image_size))
        y.append(0)
    random.shuffle(X)
    print("Cargando datos de imágenes falsificadas...")
    for file_path in glob.glob(fake_images_path):
        X.append(preparete_image_ela(file_path, image_size))
        y.append(1)


    n_trials = 40
    study = optuna.create_study(direction='maximize', sampler=sampler)
    study.optimize(lambda trial: objective(trial, X, y), n_trials=n_trials)

    best_params = study.best_params
    best_value = study.best_value

    print("Mejores hiperparámetros encontrados:")
    print(best_params)
    print("Valor óptimo de la métrica objetivo:")
    print(best_value)