## 1. Libraries

In [123]:
# Importing libraries
import time
import copy

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from torch import nn
from torch.utils.data import DataLoader
from torchvision.utils import make_grid

import tqdm
from tqdm.auto import tqdm;

In [124]:
# Define the manual seed
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [125]:
# Make a device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"The device is: {device}")
n_gpus = torch.cuda.device_count()
print(f"Number of gpus: {n_gpus}")
!nvidia-smi --query-gpu=name --format=csv,noheader

The device is: cuda
Number of gpus: 1
NVIDIA GeForce RTX 2060


## 2. Some Blocks


In [126]:
class ConvBlock (nn.Module):
    def __init__(self,
                 in_channels: int,
                 out_channels: int,
                 **kwargs): # this means that the key arguments are arbitrary
        super().__init__()

        self.conv = nn.Conv2d(in_channels=in_channels,
                              out_channels=out_channels,
                              **kwargs,
                              device=device)
        self.batchnorm = nn.BatchNorm2d(num_features=out_channels) # to improve performance
        self.relu = nn.ReLU()

    def forward(self,x):
        x = self.conv(x)
        x = self.batchnorm(x)
        x = self.relu(x)
        return x

In [127]:
class L2NormLayer (nn.Module):
    def __init__(self,dim=1):
        super().__init__()
        self.dim=dim

    def forward (self,x):
        return nn.functional.normalize(x,p=2,dim=self.dim)

In [128]:
class  InceptionBlock_2 (nn.Module):
    def __init__(self,
                 
                 in_channels: int,
                 red_3x3: int,
                 out_3x3: int):
        
        super().__init__()

        self.branch2 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_3x3,
                      kernel_size=1),
            ConvBlock(in_channels=red_3x3,
                      out_channels=out_3x3,
                      kernel_size=3,
                      padding=1) # ojo (btw no ponemos el S bc por defecto es 1)
        )

    def forward(self,x):
        # N x filters x 28 x 28 → 0th x 1st x 2nd x 3rd dimension (we use 1)
        return self.branch2(x)

In [129]:
from typing import Literal

class InceptionBlock_m3x3 (nn.Module):
    def __init__(self,
                 
                 in_channels: int,
                 
                 red_3x3: int,
                 out_3x3: int,
                 
                 red_5x5: int,
                 out_5x5: int): ## change later
        
        super().__init__()
        
        ## Branch 2
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_3x3,
                      kernel_size=1),
            ConvBlock(in_channels=red_3x3,
                      out_channels=out_3x3,
                      kernel_size=3,
                      padding=1,
                      stride=2) # ojo (btw no ponemos el S bc por defecto es 1)
        )
        
        ## Branch 3
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_5x5,
                      kernel_size=1),
            ConvBlock(in_channels=red_5x5,
                      out_channels=out_5x5,
                      kernel_size=5,
                      padding=2,
                      stride=2) # ojo (btw no ponemos el S bc por defecto es 1)
        )

        ## Branch 4
        self.branch4 = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)

    def forward(self,x):
        # N x filters x 28 x 28 → 0th x 1st x 2nd x 3rd dimension (we use 1)
        return torch.cat([self.branch2(x),self.branch3(x),self.branch4(x)],1)

In [130]:
from typing import Literal

class InceptionBlock (nn.Module):
    def __init__(self,
                 
                 in_channels: int,
                 out_1x1: int,

                 red_3x3: int,
                 out_3x3: int,

                 red_5x5: int,
                 out_5x5: int,

                 out_1x1pool: int,
                 
                 pool_type: Literal['l2', 'max']):
        
        super().__init__()

        ## Branch 1
        self.branch1 = ConvBlock(in_channels=in_channels,
                                 out_channels=out_1x1,
                                 kernel_size=1)
        
        ## Branch 2
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_3x3,
                      kernel_size=1),
            ConvBlock(in_channels=red_3x3,
                      out_channels=out_3x3,
                      kernel_size=3,
                      padding=1) # ojo (btw no ponemos el S bc por defecto es 1)
        )
        
        ## Branch 3
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels=in_channels,
                      out_channels=red_5x5,
                      kernel_size=1),
            ConvBlock(in_channels=red_5x5,
                      out_channels=out_5x5,
                      kernel_size=5,
                      padding=2) # ojo (btw no ponemos el S bc por defecto es 1)
        )

        ## Branch 4
        if pool_type == 'max':
            self.branch4 = nn.Sequential(
                nn.MaxPool2d(kernel_size=3,stride=1,padding=1),
                ConvBlock(in_channels=in_channels,
                          out_channels=out_1x1pool,
                          kernel_size=1)
            )
        elif pool_type == 'l2':
            self.branch4 = nn.Sequential(
                L2NormLayer(),
                ConvBlock(in_channels=in_channels,
                          out_channels=out_1x1pool,
                          kernel_size=1)
            )

    def forward(self,x):
        # N x filters x 28 x 28 → 0th x 1st x 2nd x 3rd dimension (we use 1)
        output = []

        if self.branch1 is not None:
            output.append(self.branch1(x))
        output.append(self.branch2(x))
        output.append(self.branch3(x))
        output.append(self.branch4(x))

        return torch.cat(output,1)

## 3. The model

In [131]:
class NN2 (nn.Module):
    def __init__(self,
                 in_channels = 3):
        super().__init__()

        self.conv1 = ConvBlock(in_channels=in_channels,
                               out_channels=64,
                               kernel_size=7,
                               stride=2,
                               padding=3)
        
        # In this order: 'in_channels',red_3x3,out_3x3
        self.inception2 = InceptionBlock_2(64,64,192)

        # In this order: in_channels,red_3x3,out_3x3,red_5x5,out_5x5
        self.inception3c = InceptionBlock_m3x3(320,128,256,32, 64)
        self.inception4e = InceptionBlock_m3x3(640,160,256,64,128)

        # In this order: in_channels,out_1x1,red_3x3,out_3x3,red_5x5,out_5x5,out_1x1pool
        self.inception3a = InceptionBlock( 192, 64, 96,128,16, 32, 32, pool_type='max')
        self.inception3b = InceptionBlock( 256, 64, 96,128,32, 64, 64, pool_type='l2')

        self.inception4a = InceptionBlock( 640,256, 96,192,32, 64,128, pool_type='l2')
        self.inception4b = InceptionBlock( 640,224,112,224,32, 64,128, pool_type='l2')
        self.inception4c = InceptionBlock( 640,192,128,256,32, 64,128, pool_type='l2')
        self.inception4d = InceptionBlock( 640,160,144,288,32, 64,128, pool_type='l2')
        
        self.inception5a = InceptionBlock(1024,384,192,384,48,128,128, pool_type='l2')
        self.inception5b = InceptionBlock(1024,384,192,384,48,128,128, pool_type='max')
        
        self.maxpool = nn.MaxPool2d(kernel_size=3,stride=2,padding=1)
        self.avgpool = nn.AvgPool2d(kernel_size=7) # stride? padding?
        
        self.FC = nn.Linear(1024,128)
        self.norm = nn.BatchNorm2d(num_features=64)

    def forward(self,x):
        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.norm(x)
        
        x = self.inception2(x)

        x = self.maxpool(x)

        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.inception3c(x)

        x = self.inception4a(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        x = self.inception4e(x)

        x = self.inception5a(x)
        x = self.inception5b(x)

        x = self.avgpool(x)
        x = x.view(x.shape[0],-1)
        x = self.FC(x)
        x = nn.functional.normalize(x,p=2,dim=1)

        return x

In [132]:
model_1 = NN2().to(device)
print(f"Are the models in the 'cuda' device? {next(model_1.parameters()).is_cuda}")

Are the models in the 'cuda' device? True


## 4. Verify the dimensions of the model

In [133]:
another_random_tensor = torch.randn(12,3,224,224).to(device)
model_1(another_random_tensor)

tensor([[-0.1502,  0.0332, -0.0110,  ...,  0.0455,  0.1069, -0.1147],
        [-0.1504,  0.0750, -0.0012,  ..., -0.0346,  0.1119, -0.0966],
        [-0.1584,  0.0091,  0.0219,  ...,  0.0311,  0.0820, -0.1038],
        ...,
        [-0.1212,  0.0738,  0.0434,  ..., -0.0308,  0.0923, -0.0892],
        [-0.1215,  0.0827,  0.0106,  ..., -0.0486,  0.1086, -0.0663],
        [-0.1538,  0.0641, -0.0034,  ..., -0.0161,  0.1292, -0.1398]],
       device='cuda:0', grad_fn=<DivBackward0>)

In [134]:
random_tensor = torch.randn(51,3,224,224).to(device)

from torchinfo import summary

summary(model_1,random_tensor.shape)

Layer (type:depth-idx)                   Output Shape              Param #
NN2                                      [51, 128]                 --
├─ConvBlock: 1-1                         [51, 64, 112, 112]        --
│    └─Conv2d: 2-1                       [51, 64, 112, 112]        9,472
│    └─BatchNorm2d: 2-2                  [51, 64, 112, 112]        128
│    └─ReLU: 2-3                         [51, 64, 112, 112]        --
├─MaxPool2d: 1-2                         [51, 64, 56, 56]          --
├─BatchNorm2d: 1-3                       [51, 64, 56, 56]          128
├─InceptionBlock_2: 1-4                  [51, 192, 56, 56]         --
│    └─Sequential: 2-4                   [51, 192, 56, 56]         --
│    │    └─ConvBlock: 3-1               [51, 64, 56, 56]          4,288
│    │    └─ConvBlock: 3-2               [51, 192, 56, 56]         111,168
├─MaxPool2d: 1-5                         [51, 192, 28, 28]         --
├─InceptionBlock: 1-6                    [51, 256, 28, 28]         --
│ 

## 5. Working with the data

In [135]:
# Define the transformations we will apply to images
# they need to match the size of images the architecture was developed for

transform = transforms.Compose([
    transforms.Resize((224,224)),   # 224x224 px
    transforms.ToTensor()           # Convert to tensor
])

In [136]:
# The path of the dataset
from pathlib import Path

root_data_path=Path('model_data')

train_path=root_data_path / 'train'
test_path=root_data_path / 'test'

In [137]:
# Define the dataset

train_data = torchvision.datasets.ImageFolder(train_path,
                                              transform=transform)

test_data = torchvision.datasets.ImageFolder(test_path,
                                             transform=transform)

In [138]:
# Split the data

BATCH_SIZE = 10

train_dataloader = DataLoader(dataset=train_data,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=4*n_gpus)

test_dataloader = DataLoader(dataset=test_data,
                             batch_size=BATCH_SIZE,
                             shuffle=True,
                             num_workers=4*n_gpus)

# Let's check out what what we've created
print(f"DataLoaders: {train_dataloader, test_dataloader}")
print(f"Length of train_dataloader: {len(train_dataloader)} batches of {BATCH_SIZE}...")
print(f"Length of test_dataloader: {len(test_dataloader)} batches of {BATCH_SIZE}...")

DataLoaders: (<torch.utils.data.dataloader.DataLoader object at 0x000002C4B78F2140>, <torch.utils.data.dataloader.DataLoader object at 0x000002C4B78F2F50>)
Length of train_dataloader: 400 batches of 10...
Length of test_dataloader: 100 batches of 10...


In [140]:
# Check out what's inside the training dataloader
train_features_batch, train_labels_batch = next(iter(train_dataloader))
train_features_batch.shape, train_labels_batch.shape

(torch.Size([10, 3, 224, 224]), torch.Size([10]))

## 6. Setup loss function, optimizer and timer

In [142]:
# Setup loss function and optimizer
loss_fn = nn.TripletMarginLoss(margin=0.2,p=2,eps=1e-7)
optimizer = optim.Adam(params=model_1.parameters(),
                       lr = 0.001,
                       weight_decay=1e-3)

# Smth new!
lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer,
                                                    patience=5)
# current_lrs = lr_scheduler.get_last_lr() ← use it like this

In [143]:
# Function to time our experiment

from timeit import default_timer as timer

def print_train_time(start: float,
                     end: float,
                     device: torch.device = None):
    """Print diff between start and end time:"""
    total_time = end - start
    print(f"Train time on {device}: {total_time:.3f} seconds")
    return total_time

## 7. For the triplet loss

In [144]:
# Importing libraries

from pytorch_metric_learning.distances import CosineSimilarity
from pytorch_metric_learning.reducers import ThresholdReducer
from pytorch_metric_learning.regularizers import LpRegularizer
from pytorch_metric_learning import losses
from pytorch_metric_learning.miners import TripletMarginMiner

# Creating the variables

distance=CosineSimilarity()
reducer = ThresholdReducer(high=0.3)
embedding_regularizer = LpRegularizer()

In [145]:
loss_func = losses.TripletMarginLoss(distance = CosineSimilarity(), 
                                    reducer = ThresholdReducer(high=0.3), 
                                    embedding_regularizer = LpRegularizer())

In [146]:
mining_func=TripletMarginMiner(margin=0.2,distance=distance,type_of_triplets='hard')

## 8. `train_step` and `test_step`

In [147]:
from sklearn.neighbors import KNeighborsClassifier

knn_classifier = KNeighborsClassifier(n_neighbors=1)  

In [151]:
def train_step(model:torch.nn.Module,
               dataloader:torch.utils.data.dataloader,
               loss_fn:torch.nn.Module,
               optimizer:torch.optim.Optimizer,
               mining_func,
               device = device,
               knn_classifier = None):
    
    """Preforms a training with model trying to learn on data_loader."""

    # For the training
    train_loss=0,0

    # For the testing
    all_embeddings = []
    all_labels = []

    # Put model into training mode
    model.train()

    # Add a loop to loop through the training batcher
    for batch, (X,y) in enumerate(dataloader):

        # Put data on target device
        X,y = X.to(device),y.to(device)

        # 1. Forward pass
        y_pred = model(X)

        indices_tuple=mining_func(y_pred,y) # it gives you the index

        # 2. Calculate the loss (and accuracy)
        anchors=y_pred[indices_tuple[0]]
        positives=y_pred[indices_tuple[1]]
        negatives=y_pred[indices_tuple[2]]

        loss=loss_fn(anchors,positives,negatives)

        loss=torch.nan_to_num(loss,nan=0.0)

        train_loss+=loss.item()

        # 3.Optimizer zero grad

        optimizer.zero_grad()

        # 4. Loss backward
        loss.backward()

        # 5. Optimizer step
        optimizer.step()
    
    # Adjust metrics to get average loss per batch
    train_loss= train_loss/len(dataloader)
    

    return train_loss , accuracy

In [152]:
def test_step(
        model:torch.nn.Module,
        dataloader:torch.utils.data.dataloader,
        loss_fn:torch.nn.Module,
        mining_func,
        device=device):
    
    # Put model in eval mode
    model.eval()

    test_loss,test_acc=0,0

    with torch.inference_mode():
        # Loop through DataLoader batches

        for batch, (X,y) in enumerate(dataloader):
            # Send data to the target device
            X,y=X.to(device),y.to(device)

            # 1. Forward pass
            embeddings=model(X)

            indices_tuple=mining_func(embeddings,y)

            anchors=embeddings[indices_tuple[0]]
            positives=embeddings[indices_tuple[1]]
            negatives=embeddings[indices_tuple[2]]

            # 2. Calculate the loss
            loss=loss_fn(anchors,positives,negatives)

            loss=torch.nan_to_num(loss,nan=0.0)

            test_loss+=loss.item()

            # 3. Calculate the accuracy
    
    # Adjust the metrics to get averague loss and accuracy per batch
    test_loss=test_loss / len(dataloader)
    
    # test_acc=test_acc / len(dataloader)

    return test_loss #,test_acc

In [153]:
train_loss_model = train_step(model=model_1,
                              dataloader=train_dataloader,
                              loss_fn=loss_fn,
                              optimizer=optimizer,
                              mining_func=mining_func,
                              device=device)

test_loss_model = test_step(model=model_1,
                            dataloader=test_dataloader,
                            loss_fn=loss_fn,
                            mining_func=mining_func,
                            device=device)

TypeError: can only concatenate tuple (not "float") to tuple

In [None]:
print(f"The train loss is: {train_loss_model}\nThe test loss is:  {test_loss_model}")

The train loss is: 0.1980000028014183
The test loss is:  0.20000000268220902


## 9. `train_model`

In [None]:
def train_model(model:torch.nn.Module,
        train_dataloader:torch.utils.data.dataloader,
        test_dataloader:torch.utils.data.dataloader,
        optimizer:torch.optim.Optimizer,
        loss_fn:torch.nn.Module,
        minig_func_train,
        minig_func_test,
        epochs=5,
        device=device):

        # Creating empty results dictionary
        results={"train_loss":[],
                "test_loss":[]}
        
        # Loop through training and testing step for a number of epochs
        for epoch in tqdm(range(epochs)):
                train_loss=train_step(model=model,
                                    dataloader=train_dataloader,
                                    loss_fn=loss_fn,
                                    optimizer=optimizer,
                                    mining_func=mining_func,
                                    device=device)
                test_loss=test_step(model=model,
                                    dataloader=test_dataloader,
                                    loss_fn=loss_fn,
                                    mining_func=mining_func,
                                    device=device)
        
                # Print
                print(f"Epoch: {epoch} | Train loss:{train_loss} | Test loss : {test_loss}")
                
                # Update dictonaries
                results["train_loss"].append(train_loss)
                results["test_loss"].append(test_loss)

        return results

In [None]:
results_model_1=train_model(
    model=model_1,
    train_dataloader=train_dataloader,
    test_dataloader=test_dataloader,
    optimizer=optimizer,
    loss_fn=loss_fn,
    minig_func=minig_func,
    epochs=10,
    device=device
)

TypeError: train_model() got an unexpected keyword argument 'minig_func'

In [None]:
results_model_1

{'train_loss': [0.05000000447034836,
  0.05000000074505806,
  0.05000000447034836,
  0.05000000074505806,
  0.05000000447034836,
  0.05000000074505806,
  0.0,
  0.05000000447034836,
  0.05000000074505806,
  0.05000000447034836],
 'test_loss': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]}