In [1]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import torch.optim as optim
import torchvision
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from libs.code import *
from pytorch_lightning.loggers import TensorBoardLogger
from tqdm import tqdm
# ------
# to export in ipynb
import torch
import torch.utils.data as data
from torchvision import transforms
import pytorch_lightning as pl
# from libs.code import *
from libs.Dataset import *
# from libs.VAE import *

In [2]:
PATH_DST = 'dataset/all_labels.csv'
PATH_GDRIVE = ''
NUM_WORKERS = 8 # ricordati se significava tutto o niente
BATCH_SIZE = 32
NUM_EPOCHS = 10
GPUS = 0

In [26]:
# baseline nearest neighbor su RGB
def extract_rgb_representations(loader):
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        representations.append(batch[0].view(batch[0].shape[0],-1).numpy())
        labels.append(batch[1])

    return np.concatenate(representations), np.concatenate(labels)


In [3]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

dataset = TrashbinDataset(csv=PATH_DST, transform=transforms.Compose([transforms.Resize((32,32)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]))

dataset_train, dataset_test = split_into_train_and_test(dataset)

dst_train_loader = DataLoader(dataset_train, shuffle=True, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE)
dst_test_loader = DataLoader(dataset_test, shuffle=True, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE)


In [30]:
dst_train_rep_rgb, dst_train_labels = extract_rgb_representations(loader=dst_train_loader)
dst_test_rep_rgb, dst_test_labels = extract_rgb_representations(loader=dst_test_loader)

100%|██████████| 330/330 [00:54<00:00,  6.01it/s]
100%|██████████| 83/83 [00:14<00:00,  5.77it/s]


In [31]:
# rappresentazioni di training
dst_train_rep_rgb.shape

(10560, 3072)

In [5]:
import faiss
def predict_nn(train_rep, test_rep, train_label):
    """ Funzione che permette di predire le etichette sul test set analizzando l'algoritmo NN. !pip install faiss-gpu/cpu"""
    # inizializzo l'oggetto index utilizzato x indicizzare le rappresentazioni
    index = faiss.IndexFlat(train_rep.shape[1])
    # aggiungo le rappresentazioni di training all'indice
    index.add(train_rep.astype(np.float32))
    # effettuiamo la ricerca

    indices = np.array([index.search(x.reshape(1,-1).astype(np.float32), k=1)[1][0][0] for x in test_rep])

    #restituisco le etichette predette
    return train_label[indices].squeeze()

In [35]:
# ottengo le predizionis ul test set:
pred_test_label_rgb = predict_nn(dst_train_rep_rgb, dst_test_rep_rgb, dst_train_labels)

In [36]:
pred_test_label_rgb

array([2, 0, 1, ..., 1, 2, 2])

In [6]:
def evaluate_classification(pred_label, ground_truth):
    """ Valuto la bontà delle predizioni ottenute calcolando la distanza euclidea tra il vettore di label
        predetto e quelli di ground truth"""
    dist = np.sqrt(np.sum(np.square(pred_label-ground_truth)))
    return dist

Valuto le performance della baseline

In [50]:
classification_error = evaluate_classification(pred_test_label_rgb, dst_test_labels)

print(f"Classification error: {classification_error:0.2f}")

Classification error: 1.00


In [8]:
# training della funzione di rappresentazione mediante triplet
# TODO: importa la migliore che avevi fatto dagli studi precedenti

from torchvision.models import mobilenet_v2
base_model = mobilenet_v2()

# voglio usare il modello per estrarre le featuer, quindi devo rimuovere il classificatore finale. nello specifico imposto il classificatore a un modulo identità
base_model.classifier = nn.Identity()
# verifico qual è la dimensione del vettore di feature estratto per una immagine di input di shape 3 x 244 x 224
base_model(torch.zeros(1,3,244,244)).shape

torch.Size([1, 1280])

In [9]:
# il modello estrae vettori di rappresentazione di 1280 unità. definisco una funzione per estrarre le rappresentazioni di dataloader di training e test

def extract_rep_(model, loader):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.eval()
    model.to(device)
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        x = batch[0].to(device)
        # print(x.shape)
        # print(x.size)
        # break
        rep = model(x)
        rep = rep.detach().to('cpu').numpy()
        labels.append(batch[1])
        representations.append(rep)
    
    return np.concatenate(representations), np.concatenate(labels)


In [10]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

dataset = TrashbinDataset(csv=PATH_DST, transform=transforms.Compose([transforms.Resize((244,244)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]))

dataset_train, dataset_test = split_into_train_and_test(dataset)

dst_train_loader = DataLoader(dataset_train, shuffle=True, num_workers=0, batch_size=32)
dst_test_loader = DataLoader(dataset_test, shuffle=True, num_workers=0, batch_size=32)

In [78]:
# uso il modello non ancora allenato per estrarre le rappresentazioni dal training e dal test set
dst_train_rep_base, dst_train_labels = extract_rep_(base_model, dst_train_loader)
dst_test_rep_base, dst_test_labels = extract_rep_(base_model, dst_test_loader)

100%|██████████| 330/330 [16:48<00:00,  3.06s/it]
100%|██████████| 83/83 [04:05<00:00,  2.96s/it]


In [80]:
# valuto le performance del sistema con rappresentazioni non ancora ottimizzate

pred_test_label_base = predict_nn(dst_train_rep_rgb, dst_test_rep_rgb, dst_train_labels)
classification_error = evaluate_classification(pred_test_label_base, dst_test_labels)

In [81]:
classification_error

58.61740355901138

In [86]:
class TripletTrashbin(data.Dataset):
    def __init__(self, root = 'dataset/all_labels.csv', transform = None, path_gdrive='') -> None:
#        super().__init__()
        self.dataset = TrashbinDataset(root, transform=transform, path_gdrive=path_gdrive)
        # self.dataset = self.dataset.data    # dipende dalla classe sopra, evito di chiamare un oggetto lungo
        self.class_to_indices = [np.where(self.dataset.data.label == label)[0] for label in range(3)]  # N delle classi

        self.generate_triplets()
    
    def generate_triplets(self):
        """ Genera le triplete associando ongi elemento del dataset due nuovi elementi. Uno simile e uno dissimile"""

        self.similar_idx = []
        self.dissimilar_idx = []

        # cu.printer_helper("Start making triplets...")

        for i in range(len(self.dataset)):
            # classe del primo elemento della tripletta
            c1 = self.dataset[i][1] # la classe la trovo sempre alla posizione 1 dato il dataset di sopra
            # indice dell'elemento simile
            j = np.random.choice(self.class_to_indices[c1])
            # scelgo una classe diversa a caso
            diff_class = np.random.choice(list(set(range(3))-{c1}))
            # campiono dalla classe di ^ per ottenere l'indice dell'elemento dissimile
            k = np.random.choice(self.class_to_indices[diff_class])

            self.similar_idx.append(j)
            self.dissimilar_idx.append(k)

        # cu.printer_helper("Dataset loaded successfully!")

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        
        im1, l1 = self.dataset[index]
        im2, l2 = self.dataset[self.similar_idx[index]]
        im3, l3 = self.dataset[self.dissimilar_idx[index]]

        return im1, im2, im3, l1, l2, l3


In [94]:
triplet_trashbin = TripletTrashbin(root=PATH_DST, transform=transforms.Compose([transforms.Resize((244,244)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]))

triplet_dataset_train, triplet_dataset_test = split_into_train_and_test(triplet_trashbin)

triplet_dataset_train_loader = DataLoader(triplet_dataset_train, shuffle=True, num_workers=0, batch_size=32)
triplet_dataset_test_loader = DataLoader(triplet_dataset_test, num_workers=0, batch_size=32)

In [90]:
class TripletNetworkTask(pl.LightningModule):
    def __init__(self, embedding_net, lr=0.01, momentum=0.99, margin=2, num_class=3):
        super(TripletNetworkTask, self).__init__()
        self.save_hyperparameters()
        self.embedding_net = embedding_net
        self.criterion = nn.TripletMarginLoss(margin=margin)
        self.num_class = num_class

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        return SGD(self.embedding_net.parameters(), self.hparams.lr, momentum=self.hparams.momentum)

    def training_step(self, batch, batch_idx):
        I_i, I_j, I_k, *_ = batch
        phi_i = self.embedding_net(I_i)
        phi_j = self.embedding_net(I_j)
        phi_k = self.embedding_net(I_k)

        # calcoliamo la loss
        loss_triplet = self.criterion(phi_i, phi_j, phi_k)
        
        loss_embedd = phi_i.norm(2) + phi_i.norm(2) + phi_i.norm(2)
        loss = loss_triplet + 0.001 *loss_embedd

        self.log('train/loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        I_i, I_j, I_k, *_ = batch
        phi_i = self.embedding_net(I_i)
        phi_j = self.embedding_net(I_j)
        phi_k = self.embedding_net(I_k)

        #calcolo la loss
        loss_triplet = self.criterion(phi_i, phi_j, phi_k)

        loss_embedd = phi_i.norm(2) + phi_i.norm(2) + phi_i.norm(2)
        loss = loss_triplet + 0.001 * loss_embedd

        self.log('valid/loss', loss)
        return loss

In [95]:
triplet_trashbin_task =  TripletNetworkTask(base_model, lr=0.001)
logger = TensorBoardLogger("metric_logs", name="test_trashbin_v1")

trainer = pl.Trainer(gpus=GPUS, logger = logger, max_epochs = 10, check_val_every_n_epoch=5)

trainer.fit(triplet_trashbin_task, triplet_dataset_train_loader, triplet_dataset_test_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name          | Type              | Params
----------------------------------------------------
0 | embedding_net | MobileNetV2       | 2.2 M 
1 | criterion     | TripletMarginLoss | 0     
----------------------------------------------------
2.2 M     Trainable params
0         Non-trainable params
2.2 M     Total params
8.895     Total estimated model params size (MB)
