In [1]:
import torch
from torch import nn
from torchvision import datasets,transforms
from torch.utils.data import DataLoader, random_split
import torch.optim as optim
import optuna
import os

#direccion donde se almacenaran los datos
DIR = os.getcwd()

#estructura red neuronal y otras funciones
from MLP_network import MLPNetwork,train,test,get_mnist

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
#se define el tipo de dispositivo utilzado para el entrenamiento, en este caso CPU
DEVICE          = torch.device("cpu")
BATCHSIZE       = 128
EPOCHS          = 50
EPOCHS_RETRAIN  = 50

In [3]:
train_loader,validation_loader, test_loader,num_classes = get_mnist(BATCHSIZE)

In [4]:
#se crea la funcion objetivo, donde se almacenara toda la logica de instanciacion del modelo y el entrenamiento
def objective(trial):
    """Funcion objetivo para optimizar la red neuronal
    
    Args:
    trial (optuna.trial): Objeto de la clase trial de optuna
    
    Returns:
    float: Accuracy obtenido en la validacion"""

    neurons_per_layer       = []
    activation_per_layer    = []
    #se definen los valores que queremos que se evaluen por medio de optuna
    n_layers            = trial.suggest_int(f'n_layers', 1, 3)

    for i in range(n_layers):
        channels    = trial.suggest_int(f'n_neurons_l{i}', 1, 50)
        activation  = trial.suggest_categorical(f'activation_l{i}', ['relu','tanh','sigmoid'])
        neurons_per_layer.append(channels)
        activation_per_layer.append(activation)

    #con los anteriores parametros, se crea el modelo
    model               = MLPNetwork(n_layers,neurons_per_layer,activation_per_layer,num_classes)

    #se define el tipo de hardware donde se realizara el entrenamiento
    model.to(DEVICE)

    #se define la funcion de perdida y el optimizador
    optimizer_name      = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr                  = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer           = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    #se define la funcion de perdida
    loss_fn             = nn.CrossEntropyLoss()

    #se realiza el entrenamiento
    for epoch in range(1, EPOCHS + 1):
        train_loss                  = train(train_loader, model, loss_fn, optimizer,DEVICE)
        accuracy,validation_loss    = test(validation_loader, model, loss_fn,DEVICE)

        #se reporta el 'accuracy obtenido luego de la validacion'
        trial.report(accuracy, epoch)

        #si el entrenamiento no es prometedor, se detiene el entrenamiento
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()
        
    return accuracy

In [None]:
#se crea el estudio de optuna
study_name      = "MLP-training"  

#se carga el estudio si ya existe y se dirige la direccion hacia donde optuna realizara la optimizacion
study           = optuna.create_study(direction='maximize',load_if_exists=True,study_name=study_name)
study.optimize(objective, n_trials=10,n_jobs=-1)

In [None]:
#se imprimen los mejores hiperparmametros del estudio
print("Best trial:")
trial = study.best_trial
print("  Value: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

In [None]:
neurons_per_layer_re        = []
activation_per_layer_re     = []
n_layers                    = study.best_trial.params["n_layers"]
for i in range(n_layers):
    neurons_per_layer_re.append(study.best_trial.params[f"n_neurons_l{i}"])
    activation_per_layer_re.append(study.best_trial.params[f"activation_l{i}"])

model_re            = MLPNetwork(2,neurons_per_layer_re,activation_per_layer_re,num_classes)
optimizer_name_re   = study.best_trial.params["optimizer"]
lr_re               = study.best_trial.params["lr"]
optimizer_re        = getattr(optim, optimizer_name_re)(model_re.parameters(), lr=lr_re)
loss_fn_re          = nn.CrossEntropyLoss()

#se realiza el entrenamiento
for epoch in range(1, EPOCHS_RETRAIN+1):
    train_loss      = train(train_loader, model_re, loss_fn_re , optimizer_re,DEVICE)
    validation_loss = test(validation_loader, model_re, loss_fn_re ,DEVICE)
    print(f"Epoch {epoch}/{EPOCHS_RETRAIN} - val loss: {validation_loss:.4f}")

In [None]:
#se realiza la evaluacion final del modelo
accuracy, test_loss = test(test_loader, model_re, loss_fn_re,DEVICE)
print(f"Test loss: {test_loss:.4f} - Test Accuracy: {accuracy:.4f}")