In [1]:
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision import transforms
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from libs.code import *
from pytorch_lightning.loggers import TensorBoardLogger
import torch
import pytorch_lightning as pl
import faiss

from libs.Dataset import *
from libs.util import *
from libs.SiameseNetwork import TripletNetworkTask
# non necessari !
# from libs.code import *
# from libs.VAE import *

Imposto i seed e le variabili globali:

In [2]:
random.seed(1996)
np.random.seed(1996)
pl.seed_everything(1996)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print("Device:", device)

Global seed set to 1996


Device: cpu


In [4]:
PATH_DST = 'dataset/all_labels.csv'
PATH_GDRIVE = ''
# TODO: se setto > 0 mi da 
# [W ParallelNative.cpp:214] Warning: Cannot set number of intraop threads after parallel work has started or after set_num_threads call when using native parallel backend (function set_num_threads)
# e non mi permette di effettuare il training. tuttavia resta troppo lento. come procedo?
NUM_WORKERS = 0
BATCH_SIZE = 32
NUM_EPOCHS = 20
GPUS = 0
PRETRAINED_MODEL_PATH =  'models/squeezeNet_pretrained.pth'
num_class = 3

# valori pretrained
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225] 

#### Carico il dataset singolo

In [4]:
transf = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])

dst = TrashbinDataset(csv=PATH_DST, transform=transf)

dst_train, dst_test = split_into_train_and_test(dst)

dst_train_loader = DataLoader(dst_train, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)
dst_test_loader = DataLoader(dst_test, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=False)

Estraggo le rappresentazioni rgb dai loader:

In [5]:
dst_train_rep_rgb, dst_train_labels = extract_rgb_representations(loader=dst_train_loader)
dst_test_rep_rgb, dst_test_labels = extract_rgb_representations(loader=dst_test_loader)

100%|██████████| 330/330 [05:47<00:00,  1.05s/it]
100%|██████████| 83/83 [01:25<00:00,  1.04s/it]


Rappresentazioni di training:

In [6]:
dst_train_rep_rgb.shape

(10560, 150528)

Ottengo le predizioni sul test-set usando `predict_nn`:

In [7]:
pred_test_label_rgb = predict_nn(dst_train_rep_rgb, dst_test_rep_rgb, dst_train_labels)
print(f"Sample di label: {pred_test_label_rgb}")

Sample di label: [2 0 0 ... 0 2 2]


Valuto le performance della baseline

In [8]:
classification_error = evaluate_classification(pred_test_label_rgb, dst_test_labels)
print(f"Classification error: {classification_error:0.2f}")

Classification error: 5.29


#### Modello

<s>Importo per effettuare il training della triplenet il miglior modello trovato nella precedente relazione: `SqueezeNet v1.0`. Importo dunque i pesi già trovati dopo il training di 100 epoche .... <b>TODO migliora la descrizione</b> ... importo i pesi.. faccio le opportune modifiche ...</s>

In [8]:
# TODO: Vorrei usare il modello già allenato precedentemente. Ma come?
# scarico il modello da pytorch
squeezeNet_1_0 = torch.hub.load('pytorch/vision:v0.10.0', 'squeezenet1_0', pretrained=True)
# applico le opportune modifiche
squeezeNet_1_0.classifier[1] = nn.Conv2d(512, num_class, kernel_size=(1,1), stride=(1,1))
# # # carico i pesi salvati

squeezeNet_1_0.load_state_dict(torch.load(PRETRAINED_MODEL_PATH))

# testo così
squeezeNet_1_0.classifier = nn.Sequential(
    # nn.Dropout(p=0.5, inplace=False),
    # nn.Conv2d(512, num_class, kernel_size=(1, 1), stride=(1, 1)),
    # nn.Conv2d(512, 1000, kernel_size=(1, 1), stride=(1, 1))
    nn.ReLU(inplace=True),
    nn.AdaptiveAvgPool2d(output_size=(1, 1)),
    nn.Identity()
  )

squeezeNet_1_0(torch.zeros(1, 3, 224,224)).shape

Using cache found in /Users/danilo/.cache/torch/hub/pytorch_vision_v0.10.0


torch.Size([1, 512])

In [10]:
# uso il modello, allenato nel precedente progetto, per estrarre le rappresentazioni dal training e dal test set
dst_train_rep, dst_train_labels = extract_rep_squeezeNet(squeezeNet_1_0, dst_train_loader)
dst_test_rep, dst_test_labels = extract_rep_squeezeNet(squeezeNet_1_0, dst_test_loader)

100%|██████████| 330/330 [12:34<00:00,  2.29s/it]
100%|██████████| 83/83 [03:09<00:00,  2.29s/it]


Valuto le performance del sistema:

In [11]:
# valuto le performance del sistema con rappresentazioni non ancora ottimizzate
pred_test_label = predict_nn(dst_train_rep, dst_test_rep, dst_train_labels)
classification_error = evaluate_classification(pred_test_label, dst_test_labels)
print(f"Classification error: {classification_error}")

Classification error: 0.0


#### Carico il dataset in triplette:

In [6]:
transf = transforms.Compose([transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])

dst_triplet = TripletTrashbin(root=PATH_DST, transform=transf)

dst_train_triplet, dst_test_triplet = split_into_train_and_test(dst_triplet)

triplet_dataset_train_loader = DataLoader(dst_train_triplet, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE, shuffle=True)
triplet_dataset_test_loader = DataLoader(dst_test_triplet, num_workers=NUM_WORKERS, batch_size=BATCH_SIZE)

In [13]:
# TODO: mostra le immagini delle triplette

#### Primo training:
Alleno la rete con lr=0.002 che è il migliore trovato per SqueezeNet nel precedente progetto.

In [16]:
triplet_trashbin_task = TripletNetworkTask(squeezeNet_1_0, lr=0.002)
logger = TensorBoardLogger("metric_logs", name="test_trashbin_v1",)

trainer = pl.Trainer(gpus=GPUS, logger = logger, max_epochs = 10, check_val_every_n_epoch = 5, )
trainer.fit(triplet_trashbin_task, triplet_dataset_train_loader, triplet_dataset_test_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


Epoch 3:  74%|███████▍  | 244/330 [49:31<17:27, 12.18s/it, loss=2, v_num=0]



  | Name          | Type              | Params
----------------------------------------------------
0 | embedding_net | SqueezeNet        | 735 K 
1 | criterion     | TripletMarginLoss | 0     
----------------------------------------------------
735 K     Trainable params
0         Non-trainable params
735 K     Total params
2.942     Total estimated model params size (MB)


                                                                      

Global seed set to 1996


Epoch 0:   0%|          | 0/330 [00:00<?, ?it/s] loss embedd 0.02123003453016281
loss 2.00002121925354
Epoch 0:   0%|          | 1/330 [00:15<1:26:34, 15.79s/it, loss=2, v_num=0]loss embedd 0.02122921124100685
loss 2.00002121925354
Epoch 0:   1%|          | 2/330 [00:28<1:17:14, 14.13s/it, loss=2, v_num=0]loss embedd 0.021227756515145302
loss 2.00002121925354
Epoch 0:   1%|          | 3/330 [00:40<1:13:05, 13.41s/it, loss=2, v_num=0]loss embedd 0.0212254635989666
loss 2.00002121925354
Epoch 0:   1%|          | 4/330 [00:52<1:10:44, 13.02s/it, loss=2, v_num=0]loss embedd 0.02122240513563156
loss 2.00002121925354
Epoch 0:   2%|▏         | 5/330 [01:04<1:09:36, 12.85s/it, loss=2, v_num=0]loss embedd 0.021218664944171906
loss 2.00002121925354
Epoch 0:   2%|▏         | 6/330 [01:16<1:08:51, 12.75s/it, loss=2, v_num=0]loss embedd 0.021214192733168602
loss 2.00002121925354
Epoch 0:   2%|▏         | 7/330 [01:28<1:07:57, 12.62s/it, loss=2, v_num=0]loss embedd 0.02120891399681568
loss 2.0000212

#### Secondo training

In [23]:
triplet_trashbin_task_v2 = TripletNetworkTask(squeezeNet_1_0, lr=0.002)

checkpoint_callback = [ ModelCheckpoint(
    monitor= 'valid/loss',
    dirpath='/Users/danilo/GitHub/deep-learning/metric_logs/test_trashbin_v1/version_0/',
    filename='epoch=9-step=3299'
) ]

logger = TensorBoardLogger("metric_logs", name="test_trashbin_v1",)
trainer = pl.Trainer(gpus=GPUS, logger = logger, max_epochs = 15, check_val_every_n_epoch = 1, callbacks=checkpoint_callback )
trainer.fit(triplet_trashbin_task_v2, triplet_dataset_train_loader, triplet_dataset_test_loader, ckpt_path='metric_logs/test_trashbin_v1/version_0/checkpoints/epoch=9-step=3299.ckpt')

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
Restoring states from the checkpoint path at metric_logs/test_trashbin_v1/version_0/checkpoints/epoch=9-step=3299.ckpt
  "You're resuming from a checkpoint that ended mid-epoch."
Restored all states from the checkpoint file at metric_logs/test_trashbin_v1/version_0/checkpoints/epoch=9-step=3299.ckpt

  | Name          | Type              | Params
----------------------------------------------------
0 | embedding_net | SqueezeNet        | 735 K 
1 | criterion     | TripletMarginLoss | 0     
----------------------------------------------------
735 K     Trainable params
0         Non-trainable params
735 K     Total params
2.942     Total estimated model params size (MB)
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")


Validation sanity check:   0%|          | 0/2 [00:00<?, ?it/s]

  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


                                                                      

Global seed set to 1996
  f"The dataloader, {name}, does not have many workers which may be a bottleneck."


Epoch 10:   0%|          | 0/413 [00:00<?, ?it/s]loss embedd 0.0
loss 2.0
Epoch 10:   0%|          | 1/413 [00:15<1:48:54, 15.86s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   0%|          | 2/413 [00:28<1:36:11, 14.04s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   1%|          | 3/413 [00:40<1:31:59, 13.46s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   1%|          | 4/413 [00:53<1:30:21, 13.26s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   1%|          | 5/413 [01:05<1:29:29, 13.16s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   1%|▏         | 6/413 [01:18<1:28:26, 13.04s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   2%|▏         | 7/413 [01:30<1:27:21, 12.91s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   2%|▏         | 8/413 [01:42<1:26:34, 12.83s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   2%|▏         | 9/413 [01:54<1:25:59, 12.77s/it, loss=2, v_num=1]loss embedd 0.0
loss 2.0
Epoch 10:   2%|▏ 

MisconfigurationException: ModelCheckpoint(monitor='val_loss') not found in the returned metrics: ['train/loss', 'valid/loss']. HINT: Did you call self.log('val_loss', value) in the LightningModule?

In [None]:
#TODO: controlla i commit

In [None]:
#TODO: devo estrarre TSNE??
# Come continuo ???