In [1]:
import torch
from torch import nn
from torchvision import datasets,transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import optuna
import os

#direccion donde se almacenaran los datos
DIR = os.getcwd()

#estructura red neuronal y otras funciones
from CNN_network import CNNNetwork,train,test,get_mnist

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#se define el tipo de dispositivo utilzado para el entrenamiento, en este caso CPU
DEVICE          = torch.device("cpu")
BATCHSIZE       = 128
EPOCHS          = 10
EPOCHS_RETRAIN  = 5

In [3]:
#se carga el dataset
train_loader,valid_loader, test_loader = get_mnist(BATCHSIZE)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 26.4M/26.4M [00:40<00:00, 660kB/s] 


Extracting c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\train-images-idx3-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 29.5k/29.5k [00:00<00:00, 169kB/s]


Extracting c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\train-labels-idx1-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 4.42M/4.42M [00:07<00:00, 564kB/s] 


Extracting c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\t10k-images-idx3-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 5.15k/5.15k [00:00<?, ?B/s]

Extracting c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw\t10k-labels-idx1-ubyte.gz to c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\FashionMNIST\raw






In [4]:
#se crea la funcion objetivo, donde se almacenara toda la logica de instanciacion del modelo y el entrenamiento
def objective(trial):
    """Funcion objetivo para optimizar la red neuronal

    Args:
    trial (optuna.trial): Objeto de la clase trial de optuna

    Returns:
    float: Accuracy obtenido en la validacion"""

    #se definen los valores que queremos que se evaluen por medio de optuna
    n_conv_layers       = trial.suggest_int("num_conv_layers", 1, 3)
    n_fully_layers      = trial.suggest_int("num_fully_layers", 1, 3)
    num_filters         = []
    num_neurons         = []

    for i in range(n_conv_layers):
        filters = trial.suggest_int(f"num_filter_cl{i}", 16, 128,step=16)
        num_filters.append(filters)

    for i in range(n_fully_layers):
        neurons = trial.suggest_int(f'n_neurons_l{i}', 20, 50)
        num_neurons.append(neurons)

    #con los anteriores parametros, se crea el modelo
    model               = CNNNetwork(n_conv_layers,num_filters,n_fully_layers,num_neurons)

    #se define el tipo de hardware donde se realizara el entrenamiento
    model.to(DEVICE)

    #se define la funcion de perdida y el optimizador
    optimizer_name      = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr                  = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer           = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    #se define la funcion de perdida
    loss_fn             = nn.CrossEntropyLoss()

    #se realiza el entrenamiento
    for epoch in range(1, EPOCHS + 1):
        train_loss          = train(train_loader, model, loss_fn, optimizer,DEVICE)
        accuracy,test_loss  = test(valid_loader, model, loss_fn,DEVICE)
    
        #se reporta el 'accuracy obtenido luego de la validacion'
        trial.report(accuracy, epoch)

        #si el entrenamiento no es prometedor, se detiene el entrenamiento
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return accuracy

In [5]:
#se crea el estudio de optuna
study_name      = "CNN-training"

#se carga el estudio si ya existe y se dirige la direccion hacia donde optuna realizara la optimizacion
study           = optuna.create_study(direction='maximize',load_if_exists=True,study_name=study_name)
study.optimize(objective, n_trials=10)

[I 2024-10-23 14:59:07,556] A new study created in memory with name: CNN-training
[W 2024-10-23 14:59:11,420] Trial 0 failed with parameters: {'num_conv_layers': 2, 'num_fully_layers': 1, 'num_filter_cl0': 16, 'num_filter_cl1': 96, 'n_neurons_l0': 24, 'optimizer': 'RMSprop', 'lr': 6.802493661596039e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\User\anaconda3\envs\opti-env\lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\User\AppData\Local\Temp\ipykernel_31436\2497502543.py", line 41, in objective
    train_loss          = train(train_loader, model, loss_fn, optimizer,DEVICE)
  File "c:\Users\User\Desktop\universidad\optimizacion-de-hiperpaametros-de-redes-neuronales-utilizando-optuna\CNN\CNN_network.py", line 111, in train
    for batch, (X, y) in enumerate(dataloader):
  File "c:\Users\User\anaconda3\envs\opti-env\lib\site-packages\torch\utils\data\da

KeyboardInterrupt: 

In [None]:
#se imprimen los mejores hiperparmametros del estudio
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
#se obtienen los mejores parametros y se re entrena el modelo con estos parametros

num_filters         = []
num_neurons         = []
n_conv_layers    = study.best_trial.params["num_conv_layers"]
n_fully_layers   = study.best_trial.params["num_fully_layers"]

for i in range(n_conv_layers):
    num_filters.append(study.best_trial.params[f"num_filter_cl{i}"])
    
for i in range(n_fully_layers):
    num_neurons.append(study.best_trial.params[f'n_neurons_l{i}'])

model_re            = CNNNetwork(n_conv_layers,num_filters,n_fully_layers,num_neurons)
optimizer_name_re   = study.best_trial.params["optimizer"]
lr_re               = study.best_trial.params["lr"]
optimizer_re        = getattr(optim, optimizer_name_re)(model_re.parameters(), lr=lr_re)
loss_fn_re          = nn.CrossEntropyLoss()

#se realiza el re-entrenamiento
for epoch in range(1, EPOCHS_RETRAIN + 1):
    train_loss              = train(train_loader, model_re, loss_fn_re , optimizer_re,DEVICE)
    accuracy_val,val_loss   = test(test_loader, model_re, loss_fn_re ,DEVICE)
    print(f"Epoch {epoch}/{EPOCHS_RETRAIN} - val loss: {val_loss:.4f} - val Accuracy: {accuracy_val:.4f}")

In [None]:
#se realiza la evaluacion del modelo final
accuracy_test,test_loss  = test(test_loader, model_re, loss_fn_re ,DEVICE)
print(f"Test loss: {test_loss:.4f} - Test Accuracy: {accuracy_test:.4f}")