In [21]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
import pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib
from torch import Tensor
import torch.nn.functional as F
from torch.utils.data import DataLoader
import optuna
from tqdm import tqdm
from py.utilities import train_model
from optuna.samplers import TPESampler
import os
from optuna.study import StudyDirection
from math import sqrt

In [7]:
class PixelDataset(Dataset):

    def __init__(self, mode):
        #data loading in base alla modalità
        if mode == 'train':
            data = pd.read_csv(r'..\pixelwise\dataset\px_train_set.csv', header=0)
            scaler = StandardScaler()
            scaler.fit(data.drop('SWP', axis=1))
            joblib.dump(scaler, r'..\pixelwise\dataset\scaler.pkl')
        elif mode == 'val':
            data = pd.read_csv(r'..\pixelwise\dataset\val_set.csv', header=0)    
        elif mode == 'test':
            data = pd.read_csv(r'..\pixelwise\dataset\test_set.csv', header=0)   
        else:
            raise ValueError('Invalid mode')

        self.X = data.drop('SWP', axis=1).reset_index(drop=True) # features
        self.y = data['SWP'] # lista di float
        self.scaler = joblib.load(r'..\pixelwise\dataset\scaler_pixelwise.pkl')
        self.X = self.scaler.transform(self.X) # scaling features con scaler addestrato sul training set
        self.n_samples = len(self.y)

    def __len__(self):
        return len(self.y)

    def __getitem__(self, index):
        return self.X[index], self.y[index]

In [8]:
class PixelNet(nn.Module):

    def __init__(self, in_count, num_hidden_layers, size_hidden_layers, dropout_rates, device):
        super(PixelNet, self).__init__()

        self.num_hidden_layers = num_hidden_layers
        self.size_hidden_layers = size_hidden_layers
        self.dropout_rates = dropout_rates
        self.device = device

        #MLP for regression
        # Input layer
        self.input_layer = nn.Linear(in_count, size_hidden_layers[0])

        # Hidden layers
        self.hidden_layers = nn.ModuleList()
        for i in range(num_hidden_layers - 1):  # Loop per ogni hidden layer
            self.hidden_layers.append(nn.Linear(size_hidden_layers[i], size_hidden_layers[i+1]))
        
        # Output layer
        self.output_layer = nn.Linear(size_hidden_layers[-1], 1)# regression task
        # Dropout layers
        self.dropouts = nn.ModuleList([nn.Dropout(p=dropout_rates[i]) for i in range(len(dropout_rates))])

    def forward(self, x: Tensor):
        x = F.relu(self.input_layer(x))
        for i in range(self.hidden_layers):
            x = F.relu(self.hidden_layers[i](x))
            x = self.dropouts[i](x)
        x = self.output_layer(x)
        return x

In [24]:
loss_lists= {}
def objective(trial, batch_size, n_epochs, device, seed):

    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    trial.set_user_attr("n_epochs", n_epochs)
    trial.set_user_attr("batch_size", batch_size)

    try:
        #suggerimento iperparametri
        num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 10)
        size_hidden_layers = [trial.suggest_int(f'neurons_per_layer_{i}', 16, 1024) for i in range(num_hidden_layers)] 
        learning_rate = trial.suggest_categorical('learning_rate', [1e-4, 5e-4, 1e-5, 5e-5, 1e-6, 5e-6])
        dropout_rates = [trial.suggest_float(f'dropout_rates_{i}', 0.1, 0.8) for i in range(num_hidden_layers)]
        num_epochs = n_epochs
        batch_size = batch_size
        
        # Crea datasets e dataloaders
        train_set = PixelDataset(mode='train')
        val_set = PixelDataset(mode='val')
        train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_set, batch_size=batch_size)

        #creazione modello
        model = PixelNet(num_hidden_layers, size_hidden_layers, dropout_rates, device)
        model = model.to(device)

        #training
        best_val_loss, avg_train_losses, avg_val_losses, early_stop_epoch = train_model(model=model, 
                                                                                        num_epochs=num_epochs, 
                                                                                        lr=learning_rate, 
                                                                                        train_loader=train_loader, 
                                                                                        val_loader=val_loader,
                                                                                        device=device)
        

        #lista per plottare train/val loss della trial migliore
        loss_lists[trial.number] = (avg_train_losses, avg_val_losses, early_stop_epoch)

        return best_val_loss
    except Exception as e:
        print(f"An exception occurred during optimization: {e}")
        print("Saving study and exiting...")
        joblib.dump(study, r'..\pixelwise\optuna_results.pkl')
        raise

In [25]:
seed = 42
'''
parser = argparse.ArgumentParser(description='Ottimizzazione iperparametri con Optuna')
parser.add_argument('--batch_size', type=int, default=30, help='Dimensione del batch')
parser.add_argument('--n_trials', type=int, default=10, help='Numero di tentativi di ottimizzazione')
parser.add_argument('--n_epochs', type=int, default=200, help='Numero di epoche di addestramento')
args = parser.parse_args()  
'''
n_trials = 5
batch_size = 5094
n_epochs = 10

#usa una GPU se disponibile
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}") 

sampler = TPESampler(seed=42)

#Se lo studio non esiste, lo crea. Se esiste, lo carica e stampa la trial migliore finora.
if not os.path.exists(r'..pixelwise\optuna_results.pkl'):
    study = optuna.create_study(direction=StudyDirection.MINIMIZE, sampler=sampler)
else:
    study = joblib.load(r'..\pixelwise\optuna_results.pkl')
    print("Best trial until now:")
    print("  Number: ", study.best_trial.number)
    print(" Value: ", study.best_trial.value)
    print(" Params: ")
    for key, value in study.best_trial.params.items():
        print(f"    {key}: {value}")
    for trial in study.trials:
        if trial.state == optuna.trial.TrialState.FAIL: 
            study.enqueue_trial(trial.params)
            print(f"enqueued previosly failed trial...{trial}")

with tqdm(total=n_trials, desc="Optimizing.. ") as trial_pbar:
    def callback(study, trial):
        trial_pbar.set_description(f"Trial {trial.number+1}/{n_trials}")
        trial_pbar.update(1)
        joblib.dump(study, r'..\pixelwise\optuna_results.pkl')
    
    study.optimize(lambda trial: objective(trial, batch_size, n_epochs, device, seed),
                    n_trials=n_trials,
                    callbacks=[callback])

print(study.trials_dataframe())
print(f'Number of finished trials: {len(study.trials)}')
#print(f'Best trial: {study.best_trial}')
print(f'Best MSE: {study.best_trial.value}')
print(f'Best RMSE: {sqrt(study.best_trial.value)}')
print(f'Best value: {study.best_value}')
print(f'Best hyperparameters: {study.best_params}')

#plot_best_losses(loss_lists[study.best_trial.number])

[I 2024-05-23 11:37:11,143] A new study created in memory with name: no-name-2518d45f-4932-4ffe-b3a1-956ce79cf030


Using device: cpu


Optimizing.. :   0%|          | 0/5 [00:00<?, ?it/s][W 2024-05-23 11:37:11,153] Trial 0 failed with parameters: {'num_hidden_layers': 4, 'neurons_per_layer_0': 975, 'neurons_per_layer_1': 754, 'neurons_per_layer_2': 620, 'neurons_per_layer_3': 173, 'learning_rate': 1e-05, 'dropout_rates_0': 0.778936896513396, 'dropout_rates_1': 0.6827098485602953, 'dropout_rates_2': 0.24863737747479334, 'dropout_rates_3': 0.22727747704497045} because of the following error: FileNotFoundError(2, 'No such file or directory').
Traceback (most recent call last):
  File "C:\Users\giova\AppData\Local\Temp\ipykernel_9744\2245859744.py", line 19, in objective
    train_set = PixelDataset(mode='train')
                ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\giova\AppData\Local\Temp\ipykernel_9744\2090658673.py", line 6, in __init__
    data = pd.read_csv(r'..\data\pixelwise\px_train_set.csv', header=0)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\giova\OneDrive\De

An exception occurred during optimization: [Errno 2] No such file or directory: '..\\data\\pixelwise\\px_train_set.csv'
Saving study and exiting...





FileNotFoundError: [Errno 2] No such file or directory: '..\\pixelwise\\optuna_results.pkl'

In [27]:
current_dir = os.getcwd()
current_dir

'c:\\Users\\giova\\OneDrive\\Desktop\\projects\\SWP-regr'

In [28]:
data_dir = os.path.join(current_dir, 'pixelwise', 'dataset')
data_dir

'c:\\Users\\giova\\OneDrive\\Desktop\\projects\\SWP-regr\\pixelwise\\data'

In [2]:
import joblib
study = joblib.load(r'C:\Users\giova\OneDrive\Desktop\projects\SWP-regr\pixelwise\optuna_results_pixelwise.pkl')
df = study.trials_dataframe()
df.sort_values('value').head(20)

  from .autonotebook import tqdm as notebook_tqdm


Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_dropout_rates_0,params_dropout_rates_1,params_dropout_rates_2,params_dropout_rates_3,params_learning_rate,params_neurons_per_layer_0,params_neurons_per_layer_1,params_neurons_per_layer_2,params_neurons_per_layer_3,params_num_hidden_layers,user_attrs_batch_size,user_attrs_n_epochs,state
0,0,270.760177,2024-05-23 12:26:30.274432,2024-05-23 12:27:48.652432,0 days 00:01:18.378000,0.778937,0.68271,0.248637,0.227277,1e-05,975,754.0,620.0,173.0,4,5094,5,COMPLETE
1,1,372.525393,2024-05-23 12:27:48.660435,2024-05-23 12:28:17.816438,0 days 00:00:29.156003,0.419249,0.649623,,,1e-05,322,545.0,,,2,5094,5,COMPLETE
2,2,379.126413,2024-05-23 12:28:37.238674,2024-05-23 12:29:11.187638,0 days 00:00:33.948964,0.665878,0.31323,,,5e-06,534,613.0,,,2,5094,5,COMPLETE
3,3,406.695065,2024-05-23 12:29:11.194637,2024-05-23 12:29:30.078638,0 days 00:00:18.884001,0.563766,,,,1e-06,706,,,,1,5094,5,COMPLETE
