In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [1]:
!nvidia-smi

Sun Jan 30 12:15:19 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P8    26W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!pip install --quiet pytorch-lightning
!pip install faiss-gpu

[K     |████████████████████████████████| 527 kB 4.0 MB/s 
[K     |████████████████████████████████| 133 kB 52.3 MB/s 
[K     |████████████████████████████████| 596 kB 44.9 MB/s 
[K     |████████████████████████████████| 952 kB 43.9 MB/s 
[K     |████████████████████████████████| 829 kB 39.7 MB/s 
[K     |████████████████████████████████| 396 kB 48.3 MB/s 
[K     |████████████████████████████████| 1.1 MB 41.3 MB/s 
[K     |████████████████████████████████| 94 kB 2.8 MB/s 
[K     |████████████████████████████████| 144 kB 45.9 MB/s 
[K     |████████████████████████████████| 271 kB 50.3 MB/s 
[?25h  Building wheel for future (setup.py) ... [?25l[?25hdone
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.[0m
Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-

In [5]:
import pytorch_lightning as pl
import faiss
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import transforms
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
import torch
import pytorch_lightning as pl
from torch.utils import data # necessary to create a map-style dataset https://pytorch.org/docs/stable/data.html
from os.path import splitext, join
from PIL import Image
import numpy as np
import pandas as pd
from torchvision import transforms
from torch.optim import SGD
from torch import nn
from tqdm import tqdm
import random


### Dataset classes

In [6]:
class TrashbinDataset(data.Dataset): # data.Dataset https://pytorch.org/docs/stable/_modules/torch/utils/data/dataset.html#Dataset
    """ A map-style dataset class used to manipulate a dataset composed by:
        image path of trashbin and associated label that describe the available capacity of the trashbin
            0 : empty trashbin
            1 : half trashbin
            2 : full trashbin

        Attributes
        ----------
        data : str
            path of csv file
        transform : torchvision.transforms

        Methods
        -------
        __len__()
            Return the length of the dataset

        __getitem__(i)
            Return image, label of i element of dataset  
    """

    def __init__(self, csv: str=None, transform: transforms=None, path_gdrive: str=''):
        """ Constructor of the dataset
            Parameters
            ----------
            csv : str
            path of the dataset

            transform : torchvision.transforms
            apply transform to the dataset

            path_gdrive: str
            necessary to apply the prepath in gdrive witouth changing csv

            Raises
            ------
            NotImplementedError
                If no path is passed is not provided a default dataset, default to load the image use only the csv file
        """
        
        if csv is None:
            raise NotImplementedError("No default dataset is provided")
        if splitext(csv)[1] != '.csv':
            raise NotImplementedError("Only .csv files are supported")
        
        self.data = pd.read_csv(csv)        # import from csv using pandas
        self.data = self.data.iloc[np.random.permutation(len(self.data))]       # random auto-permutation of the data
        self.transform = transform
        self.path_gdrive = path_gdrive

    def __len__(self):
        """ Return length of dataset """
        return len(self.data)

    def __getitem__(self, i=None):
        """ Return the i-th item of dataset

            Parameters
            ----------
            i : int
            i-th item of dataset

            Raises
            ------
            NotImplementedError
            If i is not a int
        """
        if i is None:
            raise NotImplementedError("Only int type is supported for get the item. None is not allowed")
        
        im_path, im_label = self.data.iloc[i]['image'], self.data.iloc[i].label
        im = Image.open(join(self.path_gdrive,im_path))        # Handle image with Image module from Pillow https://pillow.readthedocs.io/en/stable/reference/Image.html
        if self.transform is not None:
            im = self.transform(im)
        return im, im_label

class TripletTrashbin(data.Dataset):
    def __init__(self, root = 'dataset/all_labels.csv', transform = None, path_gdrive='') -> None:
#        super().__init__()
        self.dataset = TrashbinDataset(root, transform=transform, path_gdrive=path_gdrive)
        # self.dataset = self.dataset.data    # dipende dalla classe sopra, evito di chiamare un oggetto lungo
        self.class_to_indices = [np.where(self.dataset.data.label == label)[0] for label in range(3)]  # N delle classi

        self.generate_triplets()
    
    def generate_triplets(self):
        """ Genera le triplete associando ongi elemento del dataset due nuovi elementi. Uno simile e uno dissimile"""

        self.similar_idx = []
        self.dissimilar_idx = []

        # cu.printer_helper("Start making triplets...")

        for i in range(len(self.dataset)):
            # classe del primo elemento della tripletta
            c1 = self.dataset[i][1] # la classe la trovo sempre alla posizione 1 dato il dataset di sopra
            # indice dell'elemento simile
            j = np.random.choice(self.class_to_indices[c1])
            # scelgo una classe diversa a caso
            diff_class = np.random.choice(list(set(range(3))-{c1}))
            # campiono dalla classe di ^ per ottenere l'indice dell'elemento dissimile
            k = np.random.choice(self.class_to_indices[diff_class])

            self.similar_idx.append(j)
            self.dissimilar_idx.append(k)

        # cu.printer_helper("Dataset loaded successfully!")

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        
        im1, l1 = self.dataset[index]
        im2, l2 = self.dataset[self.similar_idx[index]]
        im3, l3 = self.dataset[self.dissimilar_idx[index]]

        return im1, im2, im3, l1, l2, l3

### Siamese Network classes

In [7]:
class TripletNetworkTask(pl.LightningModule):
    # lr uguale a quello del progetto vecchio
    def __init__(self, embedding_net, lr=0.002, momentum=0.99, margin=2, num_class=3):
        super(TripletNetworkTask, self).__init__()
        self.save_hyperparameters()
        self.embedding_net = embedding_net
        self.criterion = nn.TripletMarginLoss(margin=margin)
        self.num_class = num_class

    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        return SGD(self.embedding_net.parameters(), self.hparams.lr, momentum=self.hparams.momentum)

    # Lightning automatically sets the model to training for training_step and to eval for validation.
    def training_step(self, batch, batch_idx):
        I_i, I_j, I_k, *_ = batch
        phi_i = self.embedding_net(I_i)
        phi_j = self.embedding_net(I_j)
        phi_k = self.embedding_net(I_k)

        # calcoliamo la loss
        loss_triplet = self.criterion(phi_i, phi_j, phi_k)
        
        loss_embedd = phi_i.norm(2) + phi_i.norm(2) + phi_i.norm(2)
        loss = loss_triplet + 0.001 *loss_embedd

        self.log('train/loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        I_i, I_j, I_k, *_ = batch
        phi_i = self.embedding_net(I_i)
        phi_j = self.embedding_net(I_j)
        phi_k = self.embedding_net(I_k)

        #calcolo la loss
        loss_triplet = self.criterion(phi_i, phi_j, phi_k)

        loss_embedd = phi_i.norm(2) + phi_i.norm(2) + phi_i.norm(2)
        loss = loss_triplet + 0.001 * loss_embedd

        self.log('valid/loss', loss)
        return loss


### utils

In [8]:
def extract_rgb_representations(loader):
    """ Baseline basata su nearest neighbor RGB"""
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        representations.append(batch[0].view(batch[0].shape[0],-1).numpy())
        labels.append(batch[1])

    return np.concatenate(representations), np.concatenate(labels)

def predict_nn(train_rep, test_rep, train_label):
    """ Funzione che permette di predire le etichette sul test set analizzando l'algoritmo NN. !pip install faiss-gpu/cpu"""
    # inizializzo l'oggetto index utilizzato x indicizzare le rappresentazioni
    index = faiss.IndexFlat(train_rep.shape[1])
    # aggiungo le rappresentazioni di training all'indice
    index.add(train_rep.astype(np.float32))
    # effettuiamo la ricerca

    indices = np.array([index.search(x.reshape(1,-1).astype(np.float32), k=1)[1][0][0] for x in test_rep])

    #restituisco le etichette predette
    return train_label[indices].squeeze()

def evaluate_classification(pred_label, ground_truth):
    """ Valuto la bontà delle predizioni ottenute calcolando la distanza euclidea tra il vettore di label
        predetto e quelli di ground truth"""
    dist = np.sqrt(np.sum(np.square(pred_label-ground_truth)))
    return dist

def set_parameter_requires_grad(model, feature_extracting: bool):
    """Helper function that sets the `require_grad` attribute of parameter in the model to False when is used feature extracting"""

    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False


def extract_rep_squeezeNet(model, loader, device="cpu"):
    """ Il modello estrae vettori di rappresentazione di 1280 unità. definisco una funzione per estrarre le rappresentazioni di dataloader di training e test """
    
    # Whenever you want to test your model you want to set it to model.eval() before which will disable dropout
    # (and do the appropriate scaling of the weights), also it will make batchnorm work on the averages computed
    # during training. Your code where you’ve commented model.eval() looks like like the right spot to set it to
    # evaluation mode. Then after you simply do model.train() and you’ve enabled dropout, batchnorm to work as previously.
    model.eval()
    model.to(device)
    representations, labels = [], []
    for batch in tqdm(loader, total=len(loader)):
        x = batch[0].to(device)
        rep = model(x)
        rep = rep.detach().to('cpu').numpy()
        labels.append(batch[1])
        representations.append(rep)
    
    return np.concatenate(representations), np.concatenate(labels)

def split_into_train_and_test(dataset, train_size_perc=0.8):
    train_size = int(train_size_perc * len(dataset))
    test_size = len(dataset) - train_size

    dataset_train, dataset_test = torch.utils.data.random_split(dataset, [train_size, test_size])

    return dataset_train, dataset_test

# Project

Imposto i seed e le variabili globali:

In [10]:
random.seed(1996)
np.random.seed(1996)
pl.seed_everything(1996)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)

Global seed set to 1996


Device: cuda


In [18]:
PATH_DST = '/content/gdrive/MyDrive/trashbin-classifier/dataset/all_labels.csv'
PATH_GDRIVE = '/content/gdrive/MyDrive/trashbin-classifier/'
# TODO: se setto > 0 mi da 
# [W ParallelNative.cpp:214] Warning: Cannot set number of intraop threads after parallel work has started or after set_num_threads call when using native parallel backend (function set_num_threads)
# e non mi permette di effettuare il training. tuttavia resta troppo lento. come procedo?
NUM_WORKERS = 2
BATCH_SIZE = 32
NUM_EPOCHS = 20
GPUS = 1
PRETRAINED_MODEL_PATH =  '/content/gdrive/MyDrive/trashbin-classifier/SqueezeNet1_1__lr=0.00282-50.pth'
num_class = 3

# valori pretrained
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225] 

Carico il dataset singolo

In [19]:
from torch.utils.data import DataLoader

transf = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])

dst = TrashbinDataset(csv=PATH_DST, transform=transf, path_gdrive=PATH_GDRIVE)

dst_train, dst_test = split_into_train_and_test(dst)

dst_train_loader = DataLoader(dst_train, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)
dst_test_loader = DataLoader(dst_test, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=False)

Estraggo le rappresentazioni rgb dai loader:

In [20]:
dst_train_rep_rgb, dst_train_labels = extract_rgb_representations(loader=dst_train_loader)
dst_test_rep_rgb, dst_test_labels = extract_rgb_representations(loader=dst_test_loader)

 96%|█████████▌| 316/330 [1:05:04<02:52, 12.36s/it]


RuntimeError: ignored

Rappresentazioni di training:

In [None]:
dst_train_rep_rgb.shape

Ottengo le predizioni sul test-set usando `predict_nn`:

In [None]:
pred_test_label_rgb = predict_nn(dst_train_rep_rgb, dst_test_rep_rgb, dst_train_labels)
print(f"Sample di label: {pred_test_label_rgb}")

Valuto le performance della baseline

In [None]:
classification_error = evaluate_classification(pred_test_label_rgb, dst_test_labels)
print(f"Classification error: {classification_error:0.2f}")

Importo per effettuare il training della triplenet il miglior modello trovato nella precedente relazione: `SqueezeNet v1.0`. Importo dunque i pesi già trovati dopo il training di 100 epoche .... <b>TODO migliora la descrizione</b> ... importo i pesi.. faccio le opportune modifiche ...

In [None]:
# scarico il modello da pytorch
squeezeNet_1_0 = torch.hub.load('pytorch/vision:v0.10.0', 'squeezenet1_0', pretrained=True)
# applico le opportune modifiche
squeezeNet_1_0.classifier[1] = nn.Conv2d(512, num_class, kernel_size=(1,1), stride=(1,1))
squeezeNet_1_0.num_classes = num_class
# carico i pesi salvati
squeezeNet_1_0.load_state_dict(torch.load(PRETRAINED_MODEL_PATH))

# riduco l'ultimo modello alla funzione attività:
squeezeNet_1_0.classifier = nn.Identity()
squeezeNet_1_0(torch.zeros(1, 3, 224,224)).shape

In [None]:
# uso il modello, allenato nel precedente progetto, per estrarre le rappresentazioni dal training e dal test set
dst_train_rep, dst_train_labels = extract_rep_squeezeNet(squeezeNet_1_0, dst_train_loader)
dst_test_rep, dst_test_labels = extract_rep_squeezeNet(squeezeNet_1_0, dst_test_loader)

Valuto le performance del sistema:

In [None]:
# valuto le performance del sistema con rappresentazioni non ancora ottimizzate
pred_test_label = predict_nn(dst_train_rep, dst_test_rep, dst_train_labels)
classification_error = evaluate_classification(pred_test_label, dst_test_labels)
print(f"Classification error: {classification_error}")

Carico il dataset in triplette:

In [None]:
dst_triplet = TripletTrashbin(root=PATH_DST, transform=transf)

dst_train_triplet, dst_test_triplet = split_into_train_and_test(dst_triplet)

triplet_dataset_train_loader = DataLoader(dst_train_triplet, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)
triplet_dataset_test_loader = DataLoader(dst_test_triplet, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE)

In [None]:
# TODO: mostra le immagini delle triplette

Alleno la rete con lr=0.002 che è il migliore trovato per SqueezeNet nel precedente progetto.

In [None]:
triplet_trashbin_task =  TripletNetworkTask(squeezeNet_1_0, lr=0.002)
logger = TensorBoardLogger("metric_logs", name="test_trashbin_v1",)

# TODO: salva ogni ...
# TODO: CALLBACK!!!!!
trainer = pl.Trainer(gpus=GPUS, logger = logger, max_epochs = 10, check_val_every_n_epoch=5, )
trainer.fit(triplet_trashbin_task, triplet_dataset_train_loader, triplet_dataset_test_loader)

In [None]:
#TODO: devo estrarre TSNE??
# Come continuo ???