In [1]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
import torchvision
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from libs.code import *
from pytorch_lightning.loggers import TensorBoardLogger
from tqdm import tqdm
# ------
# to export in ipynb
import torch
import torch.utils.data as data
from torchvision import transforms
import pytorch_lightning as pl
# from libs.code import *
from libs.Dataset import *
from libs.PretrainedModels import *
# from libs.VAE import *

In [28]:
random.seed(1996)
np.random.seed(1996)

In [None]:
PATH_DST = 'dataset/all_labels.csv'
PATH_GDRIVE = ''
NUM_WORKERS = 0 # evita il warning
BATCH_SIZE = 32
NUM_EPOCHS = 10
GPUS = 0

In [26]:
# baseline nearest neighbor su RGB
def extract_rgb_representations(loader):
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        representations.append(batch[0].view(batch[0].shape[0],-1).numpy())
        labels.append(batch[1])

    return np.concatenate(representations), np.concatenate(labels)


In [3]:
# valori pretrained
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225] 

dataset = TrashbinDataset(csv=PATH_DST, transform=transforms.Compose([transforms.Resize((32,32)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]))

dataset_train, dataset_test = split_into_train_and_test(dataset)

dst_train_loader = DataLoader(dataset_train, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)
dst_test_loader = DataLoader(dataset_test, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=False)


In [53]:
def extract_rgb_representations(loader):
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        representations.append(batch[0].view(batch[0].shape[0],-1).numpy())
        labels.append(batch[1])

    return np.concatenate(representations), np.concatenate(labels)

In [54]:
dst_train_rep_rgb, dst_train_labels = extract_rgb_representations(loader=dst_train_loader)
dst_test_rep_rgb, dst_test_labels = extract_rgb_representations(loader=dst_test_loader)

100%|██████████| 330/330 [05:59<00:00,  1.09s/it]
100%|██████████| 83/83 [01:29<00:00,  1.07s/it]


In [55]:
# rappresentazioni di training
dst_train_rep_rgb.shape

(10560, 150528)

In [56]:
import faiss
def predict_nn(train_rep, test_rep, train_label):
    """ Funzione che permette di predire le etichette sul test set analizzando l'algoritmo NN. !pip install faiss-gpu/cpu"""
    # inizializzo l'oggetto index utilizzato x indicizzare le rappresentazioni
    index = faiss.IndexFlat(train_rep.shape[1])
    # aggiungo le rappresentazioni di training all'indice
    index.add(train_rep.astype(np.float32))
    # effettuiamo la ricerca

    indices = np.array([index.search(x.reshape(1,-1).astype(np.float32), k=1)[1][0][0] for x in test_rep])

    #restituisco le etichette predette
    return train_label[indices].squeeze()

In [57]:
# ottengo le predizionis ul test set:
pred_test_label_rgb = predict_nn(dst_train_rep_rgb, dst_test_rep_rgb, dst_train_labels)

In [36]:
pred_test_label_rgb

array([2, 0, 1, ..., 1, 2, 2])

In [58]:
def evaluate_classification(pred_label, ground_truth):
    """ Valuto la bontà delle predizioni ottenute calcolando la distanza euclidea tra il vettore di label
        predetto e quelli di ground truth"""
    dist = np.sqrt(np.sum(np.square(pred_label-ground_truth)))
    return dist

Valuto le performance della baseline

In [59]:
classification_error = evaluate_classification(pred_test_label_rgb, dst_test_labels)

print(f"Classification error: {classification_error:0.2f}")

Classification error: 5.48


In [8]:
# # training della funzione di rappresentazione mediante triplet
# # TODO: importa la migliore che avevi fatto dagli studi precedenti

# from torchvision.models import mobilenet_v2
# base_model = mobilenet_v2()

# # voglio usare il modello per estrarre le featuer, quindi devo rimuovere il classificatore finale. nello specifico imposto il classificatore a un modulo identità
# base_model.classifier = nn.Identity()
# # verifico qual è la dimensione del vettore di feature estratto per una immagine di input di shape 3 x 244 x 224
# base_model(torch.zeros(1,3,244,244)).shape

torch.Size([1, 1280])

In [21]:
def set_parameter_requires_grad(model, feature_extracting: bool):
    """Helper function that sets the `require_grad` attribute of parameter in the model to False when is used feature extracting"""

    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


In [25]:
PRETRAINED_MODEL_PATH =  'models/squeezeNet_pretrained.pth'
CLASSES = 3

squeezeNet_1_0 = torch.hub.load('pytorch/vision:v0.10.0', 'squeezenet1_0', pretrained=True)
squeezeNet_1_0.classifier[1] = nn.Conv2d(512, CLASSES, kernel_size=(1,1), stride=(1,1))
squeezeNet_1_0.num_classes = CLASSES
squeezeNet_1_0.load_state_dict(torch.load(PRETRAINED_MODEL_PATH))
#model.eval() ?
# set_parameters qua ?? Non penso.

squeezeNet_1_0.classifier = nn.Identity()

squeezeNet_1_0(torch.zeros(1, 3, 224,224)).shape

Using cache found in /Users/danilo/.cache/torch/hub/pytorch_vision_v0.10.0


torch.Size([1, 86528])

In [26]:
# il modello estrae vettori di rappresentazione di 1280 unità. definisco una funzione per estrarre le rappresentazioni di dataloader di training e test

def extract_rep_(model, loader):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.eval()
    model.to(device)
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        x = batch[0].to(device)
        # print(x.shape)
        # print(x.size)
        # break
        rep = model(x)
        rep = rep.detach().to('cpu').numpy()
        labels.append(batch[1])
        representations.append(rep)
    
    return np.concatenate(representations), np.concatenate(labels)


In [44]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

dataset = TrashbinDataset(csv=PATH_DST, transform=transforms.Compose([
    transforms.Resize((224,224)), 
    transforms.ToTensor(), 
    transforms.Normalize(mean=mean, std=std)
    ]))

dataset_train, dataset_test = split_into_train_and_test(dataset)

dst_train_loader = DataLoader(dataset_train, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)
dst_test_loader = DataLoader(dataset_test, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=False)

In [45]:
# uso il modello non ancora allenato per estrarre le rappresentazioni dal training e dal test set
dst_train_rep_base, dst_train_labels = extract_rep_(squeezeNet_1_0, dst_train_loader)
dst_test_rep_base, dst_test_labels = extract_rep_(squeezeNet_1_0, dst_test_loader)

100%|██████████| 330/330 [13:38<00:00,  2.48s/it]
100%|██████████| 83/83 [03:17<00:00,  2.38s/it]


In [60]:
# valuto le performance del sistema con rappresentazioni non ancora ottimizzate

# forse gli import sono sbagliati TODO: sistemali
pred_test_label_base = predict_nn(dst_train_rep_rgb, dst_test_rep_rgb, dst_train_labels)
classification_error = evaluate_classification(pred_test_label_base, dst_test_labels)

In [61]:
classification_error

5.477225575051661

In [62]:
triplet_dst = TripletTrashbin(root=PATH_DST, transform=transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
    ]))

triplet_dst_train, triplet_dst_test = split_into_train_and_test(triplet_dst)

triplet_dataset_train_loader = DataLoader(triplet_dst_train, num_workers=0, batch_size=BATCH_SIZE, shuffle=True)
triplet_dataset_test_loader = DataLoader(triplet_dst_test, num_workers=0, batch_size=BATCH_SIZE)

In [63]:
from libs.SiameseNetwork import TripletNetworkTask

triplet_trashbin_task =  TripletNetworkTask(squeezeNet_1_0, lr=0.002)
logger = TensorBoardLogger("metric_logs", name="test_trashbin_v1")

trainer = pl.Trainer(gpus=GPUS, logger = logger, max_epochs = 10, check_val_every_n_epoch=5)

trainer.fit(triplet_trashbin_task, triplet_dataset_train_loader, triplet_dataset_test_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name          | Type              | Params
----------------------------------------------------
0 | embedding_net | SqueezeNet        | 735 K 
1 | criterion     | TripletMarginLoss | 0     
----------------------------------------------------
735 K     Trainable params
0         Non-trainable params
735 K     Total params
2.942     Total estimated model params size (MB)


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


                                                                      

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Epoch 3:  58%|█████▊    | 192/330 [37:32<26:58, 11.73s/it, loss=nan, v_num=0]     

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
