In [26]:
import torch 
import numpy as np 
import matplotlib.pyplot as plt 
from torch import nn 
import config

In [27]:
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [28]:
# setup training device
device = "cuda" if torch.cuda.is_available() else "cpu"


# get efficientnet-b0 as a backbone
backbone = torch.hub.load('NVIDIA/DeepLearningExamples:torchhub', 'nvidia_efficientnet_b0', pretrained=True)

# delete the last classifier layer
backbone.classifier.fc=nn.Identity() 
backbone.classifier.dropout=nn.Identity()

# freeze all parameters for efficientnet-b0
for param in backbone.parameters():
    param.requires_grad = False

# putting model on device 
efficientnet= backbone.to(device)

Using cache found in C:\Users\moaaz/.cache\torch\hub\NVIDIA_DeepLearningExamples_torchhub


In [38]:
# siamese network
class Model(nn.Module): 
    def __init__(self  ): 
        """ 
        model input shape is (batch size , CH, W , H) 
        """
        super().__init__()
        self.pool = nn.Sequential( 
            nn.AvgPool1d(kernel_size = 3, stride=2 , padding=1)
        )
        self.f1 = nn.Sequential( 
            nn.Linear(in_features=640, out_features=1024 ),
            nn.ReLU(),
            nn.Dropout(p=0.2 ),
            nn.BatchNorm1d(num_features=1024)
        ) 
        self.f2 = nn.Sequential( 
            nn.Linear(in_features=1024, out_features=512 ),
            nn.ReLU(),
            nn.Dropout(p=0.2 ),
            nn.BatchNorm1d(num_features=512)
        )
        self.f3= nn.Sequential( 
            nn.Linear(in_features=512, out_features=256 ),
            nn.ReLU(),
            nn.Dropout(p=0.2 ),
            nn.BatchNorm1d(num_features=256))
        self.embed= nn.Sequential( 
            nn.Linear(in_features=256, out_features=128 ),
        ) 
    def forward (self , x ):
        x= backbone(x)
        x=self.pool(x)
        x= self.f1(x)
        x= self.f2(x)
        x= self.f3(x)
        x= self.embed(x)
        return x
        
model = Model().to(device)

In [39]:
# our loss calculation class 
class loss_fn(): 
    def __init__(self, margin , model):
        
        self.margin = torch.tensor(margin).to(device)
        self.model = model
        
    def _embedding(self,inputs : list[torch.tensor]):
        """
        inputs : list[torch.tensor] with the shape (positive, anchor, negative)
        output : list[torch.tensor] with the shape (positive, anchor, negative)
        """
        # [3,3,256,256]
        positive = self.model(inputs[0])
        anchor = self.model(inputs[1])
        negative = self.model(inputs[2])

        return [positive,anchor,negative]
        
    def compute_distance(self, inputs):

        embeddings = self._embedding(inputs)
        
        anchorEmbedding = embeddings[1]
        positiveEmbedding = embeddings[0]
        negativeEmbedding = embeddings[2]
    
        # calculate the anchor to positive and negative distance
        apDistance = torch.sum( torch.square(anchorEmbedding - positiveEmbedding), axis=-1)
        anDistance = torch.sum( torch.square(anchorEmbedding - negativeEmbedding), axis=-1 )
        return apDistance , anDistance
    
    def compute_loss(self , apDistance , anDistance):
        # return the distances
        loss = apDistance - anDistance
        loss = torch.max(loss + self.margin, torch.tensor(0.0).to(device))
        return loss

In [40]:
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)

In [41]:
loss_function = loss_fn(0.5, model)

# Data loader 

In [42]:
class custom_data_loader(datasets.ImageFolder):
    def __init__(self, *arg, **kw) :
        super(custom_data_loader, self).__init__(*arg, **kw)
        self.n_triplets =len(self.samples)
        
        self.train_triplets = self.gen_example()
    def __len__(self):
        return (len(self.targets))

    def gen_example(self ): 
       
       labels = torch.Tensor(self.targets)
    
       triplets = []
       for x in np.arange(self.n_triplets): 
            
            idx = np.random.randint(0, labels.size(0))
            idx_matches = np.where(labels.numpy() == labels[idx].numpy())[0] 
            idx_no_matches = np.where(labels.numpy() != labels[idx].numpy())[0]
            idx_a, idx_p = np.random.choice(idx_matches, 2, replace=True)
            idx_n = np.random.choice(idx_no_matches, 1)[0]
            triplets.append([  idx_a,idx_p, idx_n])
       return np.array(triplets)
    
    def set_triplets(self, triplets):
        self.train_triplets = triplets

    def __getitem__(self, idx):
        t = self.train_triplets[idx]

        path_a, _ = self.samples[t[0]]
        path_p, _ = self.samples[t[1]]
        path_n, _ = self.samples[t[2]]

        img_a = self.loader(path_a)
        img_p = self.loader(path_p)
        img_n = self.loader(path_n)
        if self.transform is not None:
            img_a = self.transform(img_a)
            img_p = self.transform(img_p)
            img_n = self.transform(img_n)
        
        return img_p , img_a , img_n

In [43]:
data_transform= data_transform = transforms.Compose([
            transforms.Resize(size=config.IMAGE_SIZE ),
            transforms.ToTensor() 
        ])
triplet_data_train  =custom_data_loader( root=config.TRAIN_DATASET, transform=data_transform  )
triplet_dataloader_train  = DataLoader(triplet_data_train, batch_size=4, shuffle=False)

In [44]:
triplet_data_test  =custom_data_loader( root=config.TEST_DATASET, transform=data_transform  )
triplet_dataloader_test  = DataLoader(triplet_data_test, batch_size=4, shuffle=False)

In [45]:

def train_step(model: torch.nn.Module,
               train_data ,
               test_data,
               loss_fn,
               optimizer,
               device: torch.device = device ,
               epochs : int = 100):
    train_loss_acc = []
    test_loss_acc= []
    for i in range(epochs):
       
        train_loss, train_acc = 0, 0
        test_loss, test_acc =0,0
        for batch, (p , a,n ) in enumerate(train_data):
        # Send data to GPU
        # postive , anchor , negative = postive.to(device), anchor.to(device), negative.to(device)
            p ,a ,n = p.to(device),a.to(device),n.to(device)
            inputs = torch.stack([p,a,n] , dim =0)
        # 2. Calculate loss
            
            apDistance , anDistance = loss_fn.compute_distance(inputs)
            loss = loss_fn.compute_loss(apDistance , anDistance  )

            train_loss += loss.sum()
        # 3. Optimizer zero grad
            optimizer.zero_grad()
        # 4. Loss backward
            loss.sum().backward()
        # 5. Optimizer step
            optimizer.step() 
        train_loss_acc.append(train_loss.detach().to("cpu").numpy())
        model.eval()
        with torch.no_grad() : 
            for batch, (p , a,n ) in enumerate(test_data):
        
        # Send data to GPU
        # postive , anchor , negative = postive.to(device), anchor.to(device), negative.to(device)
                p ,a ,n = p.to(device),a.to(device),n.to(device)
                inputs = torch.stack([p,a,n] , dim =0)
        # 2. Calculate loss
            
                apDistance , anDistance = loss_fn.compute_distance(inputs)
                loss = loss_fn.compute_loss(apDistance , anDistance  )
 
                test_loss += loss.sum()

            test_loss_acc.append(test_loss.to("cpu").detach().numpy())
        print(f"train loss: {train_loss} test loss: {test_loss} @ epoch {i}")
    return train_loss_acc , test_loss_acc


In [46]:
train_loss , test_loss = train_step(model, triplet_dataloader_train , triplet_dataloader_test, loss_function, optimizer,  device)

train loss: 45.97726058959961 test loss: 4.542803764343262 @ epoch 0
train loss: 6.645621299743652 test loss: 4.683127403259277 @ epoch 1
train loss: 5.3045735359191895 test loss: 4.951138496398926 @ epoch 2
train loss: 3.935481548309326 test loss: 5.320071220397949 @ epoch 3
train loss: 2.4137771129608154 test loss: 5.638318061828613 @ epoch 4
train loss: 1.5338997840881348 test loss: 5.8872809410095215 @ epoch 5
train loss: 1.041686773300171 test loss: 6.031423091888428 @ epoch 6
train loss: 0.6912530660629272 test loss: 6.104761123657227 @ epoch 7
train loss: 0.16672658920288086 test loss: 6.15138053894043 @ epoch 8
train loss: 0.0 test loss: 6.180362701416016 @ epoch 9
train loss: 0.0 test loss: 6.200336933135986 @ epoch 10
train loss: 0.0 test loss: 6.2141432762146 @ epoch 11
train loss: 0.0 test loss: 6.225149154663086 @ epoch 12
train loss: 0.0 test loss: 6.232798099517822 @ epoch 13
train loss: 0.0 test loss: 6.2378973960876465 @ epoch 14
train loss: 0.0 test loss: 6.2413692474

TODO: 
- get a better model
- evaluation metrics
- setup testing on a single input  

In [68]:
model(data[0][0].to(device).unsqueeze(0))
# .to ( device ) => send data to the device we are using 
# unsqueeze(0) adds another dimension in the 0 axis to be able to pass it to the model

tensor([[ 0.2234,  0.1222, -0.1239, -0.1642, -0.2062,  0.0816,  0.1660, -0.1628,
          0.1041,  0.0876, -0.1518, -0.2030,  0.0851,  0.2966,  0.1166,  0.0952,
         -0.1162,  0.0172, -0.1691,  0.2308,  0.0208,  0.2810, -0.2433,  0.0848,
         -0.0793,  0.0678, -0.0411, -0.1001, -0.0844, -0.1642, -0.0314,  0.1733,
          0.0057,  0.2140,  0.2378, -0.0044, -0.0849,  0.1237,  0.0316,  0.0678,
          0.1436, -0.0590,  0.0117,  0.2900, -0.0457,  0.1262, -0.1463,  0.1422,
          0.0050,  0.0482, -0.1706,  0.1340, -0.0666,  0.1389, -0.1058, -0.0157,
          0.0182, -0.2733, -0.0520,  0.1310, -0.2148, -0.0417, -0.1726, -0.0980,
          0.1629,  0.0353,  0.0504,  0.1300, -0.0464,  0.1694, -0.0888,  0.0077,
          0.0493, -0.1013, -0.0331,  0.0960,  0.2619,  0.0717,  0.0127, -0.0680,
          0.0646, -0.1401, -0.0491, -0.3085, -0.1358, -0.1194, -0.0946,  0.1434,
          0.1243, -0.2062,  0.1443,  0.1248, -0.1398, -0.1277, -0.1244,  0.1654,
         -0.1781,  0.0957,  

give input of an image to the model => model -> embeddings ( vectors )
to find the person we want we measure distance between this embedding and the embeddings of out dataset
input - dataset 

open each folder 
make dictionary of the name of the person (folder name) as keys and values are the images
