In [26]:
import sys
sys.path.append(r'C:\Users\giova\OneDrive\Desktop\VS Code folders\SWP-regr\py')

from TreeDataset import TreeDataset
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from ViTRegressor import ViTRegressor
from sklearn.model_selection import train_test_split
from transformers import get_scheduler
from tqdm.auto import tqdm
import tabulate

In [2]:
#Creazione dataset
dataset = TreeDataset()

# Split indici per test train val
num_samples = dataset.__len__()
indices = list(range(num_samples))
train_indices, test_indices = train_test_split(indices, test_size=0.2) 
train_indices, val_indices = train_test_split(train_indices, test_size=0.2)

# Subsets dei dati
X_train = []
y_train = []
X_val = []
y_val = []
X_test = []
y_test = []

for idx in train_indices:
    X, y = dataset.__getitem__(idx)
    X_train.append(X)
    y_train.append(y)
X_train = torch.stack(X_train) 
y_train = torch.tensor(y_train)
train_set = TensorDataset(X_train, y_train)
#train_set.tensors[0]: tensore delle immagini shape [32, 12, 224, 224]
#train_set.tensors[1]: tensore delle labels shape [32]

for idx in val_indices:
    X, y = dataset.__getitem__(idx)
    X_val.append(X)
    y_val.append(y)
X_val = torch.stack(X_val)
y_val = torch.tensor(y_val)
val_set = TensorDataset(X_val, y_val)

for idx in test_indices:
    X, y = dataset.__getitem__(idx)
    X_test.append(X)
    y_test.append(y)
X_test = torch.stack(X_test) 
y_test = torch.tensor(y_test)
test_set = TensorDataset(X_test, y_test)

# Dataloaders
train_loader = DataLoader(train_set, batch_size=32, shuffle=True, num_workers=2) #full batch (32 immagini di training)
val_loader = DataLoader(val_set, batch_size=8, shuffle=True, num_workers=2) #full batch
test_loader = DataLoader(test_set, batch_size=10, shuffle=True, num_workers=2) #full batch

#check dataloader
#data_iter = iter(val_loader)
#images, labels = next(data_iter)
#print(images, labels)

In [28]:
# Iperparametri mlp
num_hidden_layers = 1
size_hidden_layers = [256]
dropout_rates = [0.5, 0.5]

#modello
model = ViTRegressor(
    num_hidden_layers = num_hidden_layers, 
    size_hidden_layers = size_hidden_layers, 
    dropout_rates = dropout_rates
    )

# Layer da congelare
frozen_layers = ["vit_layer"] 
for name, param in model.named_parameters():
  if name.split(".")[0] in frozen_layers:
     param.requires_grad = False
model.vit_layer.vit_layer.conv_proj.weight.requires_grad = True
model.vit_layer.vit_layer.conv_proj.bias.requires_grad = True
# stampa lista layers congelati e non
headers = ["Layer", "Shape", "Requires Grad"]
table = []
for name, param in model.named_parameters():
  table.append([name, param.shape, param.requires_grad])
print(tabulate.tabulate(table, headers=headers))

criterion = nn.MSELoss()
learning_rate = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)

# learning rate scheduler
num_epochs = 10
num_training_steps = num_epochs * len(train_loader)
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

#progress bar
progress_bar = tqdm(range(num_training_steps))

Layer                                                                               Shape                          Requires Grad
----------------------------------------------------------------------------------  -----------------------------  ---------------
vit_layer.vit_layer.class_token                                                     torch.Size([1, 1, 768])        False
vit_layer.vit_layer.conv_proj.weight                                                torch.Size([768, 12, 16, 16])  True
vit_layer.vit_layer.conv_proj.bias                                                  torch.Size([768])              True
vit_layer.vit_layer.encoder.pos_embedding                                           torch.Size([1, 197, 768])      False
vit_layer.vit_layer.encoder.layers.encoder_layer_0.ln_1.weight                      torch.Size([768])              False
vit_layer.vit_layer.encoder.layers.encoder_layer_0.ln_1.bias                        torch.Size([768])              False
vit_layer.vit_la

  0%|          | 0/10 [00:00<?, ?it/s]

In [9]:
#training loop
model.train()
for epoch in range(num_epochs):
    for inputs, labels in train_loader:
        #Forward pass
        outputs = model(inputs)
        labels = labels.unsqueeze(1)
        train_loss = criterion(outputs, labels)
        #Backward pass
        train_loss.backward() 
        #Aggiornamento pesi
        optimizer.step()
        optimizer.zero_grad()
        #lr scheduler step
        lr_scheduler.step()
        #progress bar step
        progress_bar.update(1)


    #validation loop
    model.eval()
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            labels = labels.unsqueeze(1)
            val_loss = criterion(outputs, labels)

    # Print training stats 
    if epoch % 1 == 0:
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {train_loss.item():.4f}') 
        print(f'Val Loss: {val_loss.item():.4f}')

Epoch 1/10
Train Loss: 441.1355
Val Loss: 293.1299
Epoch 2/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 3/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 4/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 5/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 6/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 7/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 8/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 9/10
Train Loss: 442.1984
Val Loss: 293.1299
Epoch 10/10
Train Loss: 442.1984
Val Loss: 293.1299


In [7]:
train_loss

tensor(442.3644, grad_fn=<MseLossBackward0>)