# Recitation - 6
___

* Custom Dataset & DataLoader
* Torchvision ImageFolder Dataset
* Residual Block
* CNN model with Residual Block
* Loss Functions (Center Loss and Triplet Loss)

## Imports

In [None]:
import os
import numpy as np
from PIL import Image

from torchvision import  transforms
import torch
import torchvision   
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from zipfile import ZipFile

## Custom DataSet with DataLoader
___
We have used a subset of the data given for the Face Classification and Verification problem in Part 2 of the homework

#### Parse the given directory to accumulate all the images

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [None]:
# read data from my drive


with ZipFile("/content/drive/My Drive/11-785hw2p2-s20/test_classification.zip") as file1:
    file1.extractall()

with ZipFile("/content/drive/My Drive/11-785hw2p2-s20/test_verification.zip") as file2:
    file2.extractall()

with ZipFile("/content/drive/My Drive/11-785hw2p2-s20/validation_verification.zip") as file3:
    file3.extractall()

with ZipFile("/content/drive/My Drive/11-785hw2p2-s20/validation_classification.zip") as file4:
    file4.extractall()



In [None]:
with ZipFile("/content/drive/My Drive/11-785hw2p2-s20/train_data.zip") as file:
    file.extractall()

## Torchvision DataSet and DataLoader

In [None]:
# loading data
train_dataTransform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(),
    #transforms.RandomRotation(10),
    #transforms.RandomAffine(5),
    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

val_dataTransform = torchvision.transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

train_dataset = torchvision.datasets.ImageFolder(root='/content/train_data/medium/', 
                                                       transform=train_dataTransform)

test_dataset = torchvision.datasets.ImageFolder(root='/content/test_classification/', 
                                                       transform=torchvision.transforms.ToTensor())

val_dataset = torchvision.datasets.ImageFolder(root='/content/validation_classification/medium/', 
                                                       transform=val_dataTransform)

In [None]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=8)
validation_loader = DataLoader(val_dataset, batch_size=32, shuffle=True, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=8)

In [None]:
print(train_loader.__len__(), len(train_dataset.classes))
validation_loader.__len__(), len(val_dataset.classes)

25693 2300


(144, 2300)

## Residual Block

Resnet: https://arxiv.org/pdf/1512.03385.pdf

Here is a basic usage of shortcut in Resnet

In [None]:
class BottleNeck(nn.Module):

    def __init__(self, in_channel, first_two_channel, last_channel, stride_list= [1,1,1]):
        super(BottleNeck, self).__init__()
        self.first_two_channel = first_two_channel
        self.last_channel = last_channel

        self.conv1 = nn.Conv2d(in_channel, first_two_channel, kernel_size=1, stride=stride_list[0], padding=0, bias=False)
        self.bn1 = nn.BatchNorm2d(first_two_channel)
        self.activation = nn.ReLU(first_two_channel)

        self.conv2 = nn.Conv2d(first_two_channel, first_two_channel, kernel_size=3, stride=stride_list[1], padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(first_two_channel)

        self.conv3 = nn.Conv2d(first_two_channel, last_channel, kernel_size =1, stride = stride_list[2], padding=0, bias= False)
        self.bn3 = nn.BatchNorm2d(last_channel)

        self.shortcut = nn.Conv2d(in_channel, in_channel, kernel_size=1, stride=stride_list[0],bias=False)
        self.bn = nn.BatchNorm2d(last_channel)

        self.layers = [self.conv1, self.bn1,self.activation, self.conv2, self.bn2,self.activation, self.conv3, self.bn3]
        self.layers = nn.Sequential(*self.layers)

    def forward(self, x):
        out = x
        out = self.layers(x)
        shortcut = self.shortcut(x)
        size = shortcut.size()
        self.padding = torch.zeros(size[0],self.last_channel - size[1], size[2],size[3]).to(device)
        shortcut = torch.cat((shortcut, self.padding),1).to(device)
        shortcut = self.bn(shortcut)
        # self.shortcut = F.batch_norm(self.shortcut)
        # check the size of shortcut
        out += shortcut

        out = F.relu(out)
        return out

## CNN Model with Residual Block 

In [None]:
class Network(nn.Module):
    def __init__(self, num_feats, hidden_sizes1, hidden_sizes2, reps, num_classes, feat_dim=1000):
        super(Network, self).__init__()
        # self.layers = []

        self.conv1 = nn.Conv2d(num_feats,64*4 ,kernel_size=3,stride=1,padding=1, bias=False)
        # self.pool1 = nn.MaxPool2d(kernel_size = 2)
        self.layers = [self.conv1]
        for j in range(3):
            # append the network according to different situation
            # j = 0 represents that is the first block
            if j == 0:
                self.layers.append(BottleNeck(64*4, 64, 64*4))   
            else:
                self.layers.append(BottleNeck(64*4, 64, 64*4))   
        for i in range(len(reps)):
            for j in range(reps[i]):
                # append the network according to different situation
                # j = 0 represents that is the first block
                if i == 0 and j == 0:
                    print("i == 0 and j == 0")
                    self.layers.append(BottleNeck(64*4, hidden_sizes1[i], hidden_sizes2[i],stride_list=[2,1,1]))
                elif (i != 0 and j == 0):
                    print("i != 0 and j == 0" )
                    self.layers.append(BottleNeck(hidden_sizes2[i-1],hidden_sizes1[i], hidden_sizes2[i], stride_list=[2,1,1]))
                elif  j != 0:
                    print("i != 0 and j != 0")
                    self.layers.append(BottleNeck(hidden_sizes2[i], hidden_sizes1[i], hidden_sizes2[i]))

        # self.conv2d = nn.Conv2d(hidden_sizes2[-1], hidden_sizes2[-1], kernel_size=2)
        # self.layers.append(self.conv2d)
        self.layers = nn.Sequential(*self.layers)
        # self.last_bn = nn.BatchNorm2d(hidden_sizes2[-1])
        # print out the shape
        self.linear_label = nn.Linear(hidden_sizes2[-1], num_classes, bias=False)
        # For creating the embedding to be passed into the Center Loss criterion
        self.linear_closs = nn.Linear(hidden_sizes2[-1], feat_dim, bias=False)
        self.dropout = nn.Dropout()
        self.relu_closs = nn.ReLU(inplace=True)
    
    def forward(self, x, evalMode=False):
        output = x
        output = self.layers(output)       
        # output = self.last_bn(output)
        # output = self.relu_closs(output)

        output = F.avg_pool2d(output,4)
        output = output.reshape(output.shape[0], output.shape[1])
        output = self.dropout(output)

        label_output = self.linear_label(output)
        label_output = label_output/torch.norm(self.linear_label.weight, dim=1)
        
        # Create the feature embedding for the Center Loss
        closs_output = self.linear_closs(output)
        closs_output = self.relu_closs(closs_output)

        return closs_output, label_output

def init_weights(m):
    if type(m) == nn.Conv2d or type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight.data)

### Training & Testing Model

#### Dataset, DataLoader and Constant Declarations

In [None]:
numEpochs = 50
print("Num of epochs is: ", numEpochs)
num_feats = 3

learningRate = 1e-2
weightDecay = 5e-5

##############  modify this  ####################
# hidden_sizes = [32, 64, 128, 128, 128, 256]

num_classes = len(train_dataset.classes)
#################################################

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Num of epochs is:  50


In [None]:
torch.cuda.is_available()

True

## Center Loss
___
The following piece of code for Center Loss has been pulled and modified based on the code from the GitHub Repo: https://github.com/KaiyangZhou/pytorch-center-loss
    
<b>Reference:</b>
<i>Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.</i>

In [None]:
class CenterLoss(nn.Module):
    """
    Args:
        num_classes (int): number of classes.
        feat_dim (int): feature dimension.
    """
    def __init__(self, num_classes, feat_dim, device=torch.device('cpu')):
        super(CenterLoss, self).__init__()
        self.num_classes = num_classes
        self.feat_dim = feat_dim
        self.device = device
        
        self.centers = nn.Parameter(torch.randn(self.num_classes, self.feat_dim).to(self.device))

    def forward(self, x, labels):
        """
        Args:
            x: feature matrix with shape (batch_size, feat_dim).
            labels: ground truth labels with shape (batch_size).
        """
        batch_size = x.size(0)
        distmat = torch.pow(x, 2).sum(dim=1, keepdim=True).expand(batch_size, self.num_classes) + \
                  torch.pow(self.centers, 2).sum(dim=1, keepdim=True).expand(self.num_classes, batch_size).t()
        distmat.addmm_(1, -2, x, self.centers.t())

        classes = torch.arange(self.num_classes).long().to(self.device)
        labels = labels.unsqueeze(1).expand(batch_size, self.num_classes)
        mask = labels.eq(classes.expand(batch_size, self.num_classes))

        dist = []
        for i in range(batch_size):
            value = distmat[i][mask[i]]
            value = value.clamp(min=1e-12, max=1e+12) # for numerical stability
            dist.append(value)
        dist = torch.cat(dist)
        loss = dist.mean()

        return loss

In [None]:
import time
def train_closs(optimizer_closs, optimizer_label, model, data_loader, test_loader, task='Classification'):
    
    model.train()
    lr = learningRate
    # optimizer_label = torch.optim.Adam(model.parameters(), amsgrad=True, lr=0.01)
    # optimizer_closs = torch.optim.Adam(criterion_closs.parameters(), amsgrad=True, lr=0.5)
    scheduler_label = torch.optim.lr_scheduler.StepLR(optimizer_label, 1, gamma=0.9)
    scheduler_closs = torch.optim.lr_scheduler.StepLR(optimizer_closs, 1, gamma=0.9)
    for epoch in range(numEpochs):
        print("Current running lr is: ",optimizer_label.param_groups[0]['lr'])
        
        avg_loss = 0.0
        epoch_start = time.time()
        # if epoch > 7:
        #     lr = lr/5
        #     lr_cent = lr_cent/5
        #     optimizer_label = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weightDecay, momentum=0.9)
        #     optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)
        # elif epoch > 10 and epoch <= 15:
        #     lr = lr/2
        #     lr_cent = lr_cent/2
        #     optimizer_label = torch.optim.SGD(model.parameters(), lr=lr, weight_decay=weightDecay, momentum=0.9)
        #     optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)


        for batch_num, (feats, labels) in enumerate(data_loader):
            feats, labels = feats.to(device), labels.to(device)
            
            optimizer_label.zero_grad()
            optimizer_closs.zero_grad()
            
            feature, outputs = model(feats)
            # print("feature size", feature.size())
            # print("outputs size", outputs.size())
            # print("label size", labels.size())
            l_loss = criterion_label(outputs, labels.long())
            c_loss = criterion_closs(feature, labels.long())
            loss = l_loss + closs_weight * c_loss
            
            loss.backward()
            
            optimizer_label.step()
            # by doing so, weight_cent would not impact on the learning of centers
            for param in criterion_closs.parameters():
                param.grad.data *= (1. / closs_weight)
            optimizer_closs.step()
            
            avg_loss += loss.item()

            if batch_num % 50 == 49:
                temp = time.time()
                print('Epoch: {}\tBatch: {}\tAvg-Loss: {:.4f}'.format(epoch+1, batch_num+1, avg_loss/50))
                avg_loss = 0.0    
                print("Time for 50 batch ", temp - epoch_start)
            
            # clear the cuda cache
            torch.cuda.empty_cache()
            del feats
            del labels
            del loss

        if task == 'Classification':
            val_loss, val_acc, v_closs, v_lloss,v_criterion = test_classify_closs(model, test_loader)
            # train_loss, train_acc, _, _, _= test_classify_closs(model, data_loader)
            print('#########################    Val Loss: {:.4f}\tVal Accuracy: {:.4f}  ########################'.
                  format(val_loss, val_acc))
            # use val loss to change lr
            scheduler_label.step()
            scheduler_closs.step()
        else:
            test_verify(model, test_loader)
        model_save_name = 'classifier_trial2' + str(epoch + 1)+ '.pt'
        path = F"/content/drive/My Drive/{model_save_name}" 
        torch.save({
            # 'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': (optimizer_closs.state_dict(),optimizer_label.state_dict()),
            # 'loss': loss,
            }, path)
        print("Running time for epoch " + str(epoch + 1) + " is " + str((epoch_start - time.time())/60))




def test_classify_closs(model, test_loader):
    model.eval()
    test_loss = []
    accuracy = 0
    total = 0

    for batch_num, (feats, labels) in enumerate(test_loader):
        feats, labels = feats.to(device), labels.to(device)
        feature, outputs = model(feats)
        
        _, pred_labels = torch.max(F.softmax(outputs, dim=1), 1)
        pred_labels = pred_labels.view(-1)
        l_loss = criterion_label(outputs, labels.long())
        c_loss = criterion_closs(feature, labels.long())
        loss = l_loss + closs_weight * c_loss
        
        accuracy += torch.sum(torch.eq(pred_labels, labels)).item()
        total += len(labels)
        test_loss.extend([loss.item()]*feats.size()[0])
        del feats
        del labels

    model.train()
    return np.mean(test_loss), accuracy/total, c_loss, l_loss, criterion_label

In [None]:
closs_weight = 1
lr_cent = 0.5
feat_dim = 100

num_feats = 3
hidden_sizes1 = [128, 256, 512]
hidden_sizes2 = [512, 1024, 2048]
reps = [4,6,3]
# network = Network(num_feats, hidden_sizes, num_classes, feat_dim)

# network.apply(init_weights)

network = Network(num_feats, hidden_sizes1, hidden_sizes2, reps, num_classes, feat_dim)

# network = Resnet50(num_feats, num_classes, feat_dim)
criterion_label = nn.CrossEntropyLoss()
criterion_closs = CenterLoss(num_classes, feat_dim, device)
# optimizer_label = torch.optim.SGD(network.parameters(), lr=learningRate, weight_decay=weightDecay, momentum=0.9)
# optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr=lr_cent)        
optimizer_label = torch.optim.SGD(network.parameters(), lr=0.01)
optimizer_closs = torch.optim.SGD(criterion_closs.parameters(), lr = 0.5)


i == 0 and j == 0
i != 0 and j != 0
i != 0 and j != 0
i != 0 and j != 0
i != 0 and j == 0
i != 0 and j != 0
i != 0 and j != 0
i != 0 and j != 0
i != 0 and j != 0
i != 0 and j != 0
i != 0 and j == 0
i != 0 and j != 0
i != 0 and j != 0


In [None]:
print(network)

Network(
  (conv1): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (layers): Sequential(
    (0): Conv2d(3, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (1): BottleNeck(
      (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (layers): Sequential(

## Start Training .... 

In [None]:
# optimizer_label = torch.optim.Adam(network.parameters(), amsgrad=True, lr=0.001)
# optimizer_closs = torch.optim.Adam(criterion_closs.parameters(), amsgrad=True, lr = 0.001)
network.train()
network.to(device)
train_closs(optimizer_closs, optimizer_label, network, train_loader, validation_loader)
model_save_name = 'classifier_trail2_final.pt'
path = F"/content/drive/My Drive/{model_save_name}" 
torch.save(network.state_dict(), path)

Current running lr is:  0.01
Epoch: 1	Batch: 50	Avg-Loss: 106.8876
Time for 50 batch  11.152533292770386
Epoch: 1	Batch: 100	Avg-Loss: 101.6963
Time for 50 batch  20.62079429626465
Epoch: 1	Batch: 150	Avg-Loss: 97.1377
Time for 50 batch  30.113090753555298
Epoch: 1	Batch: 200	Avg-Loss: 93.3351
Time for 50 batch  39.53418254852295
Epoch: 1	Batch: 250	Avg-Loss: 88.7976
Time for 50 batch  48.95110034942627
Epoch: 1	Batch: 300	Avg-Loss: 85.4999
Time for 50 batch  58.34293293952942
Epoch: 1	Batch: 350	Avg-Loss: 82.8654
Time for 50 batch  67.7856125831604
Epoch: 1	Batch: 400	Avg-Loss: 78.1947
Time for 50 batch  77.20900654792786
Epoch: 1	Batch: 450	Avg-Loss: 75.0018
Time for 50 batch  86.62083911895752
Epoch: 1	Batch: 500	Avg-Loss: 73.0690
Time for 50 batch  96.0624508857727
Epoch: 1	Batch: 550	Avg-Loss: 69.6154
Time for 50 batch  105.48491597175598
Epoch: 1	Batch: 600	Avg-Loss: 67.3595
Time for 50 batch  114.88822841644287
Epoch: 1	Batch: 650	Avg-Loss: 64.6115
Time for 50 batch  124.2935736

## Resume Training .... 

In [None]:
# resume learning
closs_weight = 1
lr_cent = 0.5
feat_dim = 100

num_feats = 3
hidden_sizes1 = [128, 256, 512]
hidden_sizes2 = [512, 1024, 2048]
reps = [4,6,3]

my_model = Network(num_feats, hidden_sizes1, hidden_sizes2, reps, num_classes, feat_dim)

# load model
checkpoint = torch.load('./models/classifier_trial2_15.pt')
my_model.load_state_dict(checkpoint['model_state_dict'])

# load optimizer
criterion_label = nn.CrossEntropyLoss()
criterion_closs = CenterLoss(num_classes, feat_dim, device)
optimizer_label = torch.optim.Adam(my_model.parameters(), amsgrad=True, lr=0.008)
optimizer_closs = torch.optim.Adam(criterion_closs.parameters(), amsgrad=True, lr=0.4)
# optimizer_label.load_state_dict(checkpoint['optimizer_state_dict'][1])
# optimizer_closs.load_state_dict(checkpoint['optimizer_state_dict'][0])

# retrain
my_model.to(device)
my_model.train()

In [None]:
train_closs(optimizer_closs, optimizer_label, my_model, train_loader, validation_loader)

In [None]:
test_classify_closs(my_model, validation_loader)

## Triplet Loss
___
You can make a dataloader that returns a tuple of three images. Two being from the same class and one from a different class. You can then use triplet loss to seperate out the different class pair distance and decrease same class pair distance.

More on this link: https://github.com/adambielski/siamese-triplet/blob/master/losses.py

In [None]:
triplet_loss = nn.TripletMarginLoss(margin=1.0, p=2)
face_img1, label_img1 = trainset.__getitem__(0)
face_img2, label_img2 = trainset.__getitem__(1)
face_img3, label_img3 = trainset.__getitem__(-1)

print(label_img1, label_img2, label_img3)
## face_img1 and face_img2 are from the same class and face_img3 is from a different class.
loss = triplet_loss(face_img1, face_img2, face_img3)
print ("Loss={:0.2f}".format(loss))