## **Pre-trained networks, Transfer learning and Ensembles**

# Using ResNet for Fashion MNIST in PyTorch

In [24]:
import torch
print(torch.__version__)
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.models as models
from torchvision import transforms
import time
from tqdm.autonotebook import tqdm
from torch.utils.data import DataLoader
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import inspect

import matplotlib.pyplot as plt
import numpy as np



1.13.1


### Downloading a pre-trained network, and changing the first and last layers

The input and output layers of the pre-trained network need to be changed, since ResNet was originally designed for ImageNet competition, which was a color (3-channel) image classification task with 1000 classes. 

MNIST dataset howerver only contains 10 classes and it’s images are in the grayscale (1-channel)

In [25]:
class MnistResNet(nn.Module):
  def __init__(self, in_channels=1):
    super(MnistResNet, self).__init__()

    # Load a pretrained resnet model from torchvision.models in Pytorch
    self.model = models.resnet50(weights='IMAGENET1K_V2')

    # Change the input layer to take Grayscale image, instead of RGB images. 
    # Hence in_channels is set as 1 or 3 respectively
    # original definition of the first layer on the ResNet class
    # self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
    self.model.conv1 = nn.Conv2d(in_channels, 64, kernel_size=7, stride=2, padding=3, bias=False)
    
    # Change the output layer to output 10 classes instead of 1000 classes
    num_ftrs = self.model.fc.in_features
    self.model.fc = nn.Linear(num_ftrs, 10)

  def forward(self, x):
    return self.model(x)


my_resnet = MnistResNet()



### Test defined network, and verify layers


In [26]:
input = torch.randn((16,1,244,244))
output = my_resnet(input)
print(output.shape)

print(my_resnet)

torch.Size([16, 10])
MnistResNet(
  (model): ResNet(
    (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample):

### Define device

In [27]:
print(torch.backends.mps.is_available())
if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device("cpu")

True


In [28]:
#Dataset
import os
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torchvision.io import read_image
import pandas as pd

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Grayscale(num_output_channels=1)
    # transforms.ToTensor()
])

path = "/Users/jeonboyun/Desktop/ssd/2023_1/ai_capstone/Test_Dataset"

class TestDataset(Dataset):
    
    def __init__(self, annotation_file, img_dir, transform=None):
        self.img_labels = pd.read_csv(annotation_file, encoding='utf-8')
        self.img_dir = img_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.img_labels)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 1])
        image = read_image(img_path).float()
        label = self.img_labels.iloc[idx, 2]
        if self.transform: 
            image = self.transform(image)
        return image, label
    
test_data = TestDataset('test.csv', path, transform)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

print(test_data[0][0].size())

torch.Size([1, 224, 224])


### Dataloaders


In [29]:
from torch.utils.data import Dataset
import os
import pandas as pd
import cv2
# class CTL(Dataset):
#     folder_label = {'Shoes' : 0, 'Belong' : 1, 'Shirts' : 2, 'Jewels' : 3}
#     label_folder = {0 : 'Shoes', 1 : 'Belong', 2 : 'Shirts', 3 : 'Jewels'}
#     def __init__(path:str):
#         super().__init__()
#         csv = pd.read_csv(path)
#         img = csv['Image']
#         label = csv['Label']

        
#     def __getitem__(self, index):
#         pass
#     def __len__():
#         pass

def get_data_loaders(train_batch_size, val_batch_size):
    fashion_mnist = torchvision.datasets.FashionMNIST(download=True, train=True, root=".").train_data.float()
    
    data_transform = transforms.Compose([ transforms.Resize((224, 224)),
                                         transforms.ToTensor(), 
                                         transforms.Normalize((fashion_mnist.mean()/255,), (fashion_mnist.std()/255,))])

    train_loader = DataLoader(torchvision.datasets.FashionMNIST(download=True, root=".", transform=data_transform, train=True),
                              batch_size=train_batch_size, shuffle=True)

    val_loader = DataLoader(torchvision.datasets.FashionMNIST(download=True, root=".", transform=data_transform, train=False),
                            batch_size=val_batch_size, shuffle=False)
    return train_loader, val_loader

### Supporting functions for metric calculation

In [30]:
def calculate_metric(metric_fn, true_y, pred_y):
    if "average" in inspect.getfullargspec(metric_fn).args:
        return metric_fn(true_y, pred_y, average="macro")
    else:
        return metric_fn(true_y, pred_y)
    
def print_scores(p, r, f1, a, batch_size):
    for name, scores in zip(("precision", "recall", "F1", "accuracy"), (p, r, f1, a)):
        print(f"\t{name.rjust(14, ' ')}: {sum(scores)/batch_size:.4f}")

### Pytorch Deep Learning Boilerplate

Boilerplate are the sections of code that have to be included in many places with little or no alteration

In [7]:
# model:
model = MnistResNet().to(device)

# params you need to specify:
epochs = 5
batch_size = 64

# Dataloaders
train_loader, val_loader = get_data_loaders(batch_size, batch_size)

# loss function and optimiyer
loss_function = nn.CrossEntropyLoss() # your loss function, cross entropy works well for multi-class problems

# optimizer, I've used Adadelta, as it wokrs well without any magic numbers
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) # Using Karpathy's learning rate constant

start_ts = time.time()

losses = []
batches = len(train_loader)
val_batches = len(val_loader)

# loop for every epoch (training + evaluation)
for epoch in range(epochs):
    total_loss = 0

    # progress bar (works in Jupyter notebook too!)
    progress = tqdm(enumerate(train_loader), desc="Loss: ", total=batches)

    # ----------------- TRAINING  -------------------- 
    # set model to training
    model.train()
    
    for i, data in progress:
        X, y = data[0].to(device), data[1].to(device)
        
        # training step for single batch
        model.zero_grad()
        outputs = model(X)
        loss = loss_function(outputs, y)
        loss.backward()
        optimizer.step()

        # getting training quality data
        current_loss = loss.item()
        total_loss += current_loss

        # updating progress bar
        progress.set_description("Loss: {:.4f}".format(total_loss/(i+1)))
        
    # releasing unceseccary memory in GPU
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # ----------------- VALIDATION  ----------------- 
    val_losses = 0
    precision, recall, f1, accuracy = [], [], [], []
    
    # set model to evaluating (testing)
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            X, y = data[0].to(device), data[1].to(device)

            outputs = model(X) # this get's the prediction from the network

            val_losses += loss_function(outputs, y)

            predicted_classes = torch.max(outputs, 1)[1] # get class from network's prediction
            
            # calculate P/R/F1/A metrics for batch
            # for acc, metric in zip((precision, recall, f1, accuracy), 
            #                        (precision_score, recall_score, f1_score, accuracy_score)):
            #     acc.append(
            #         calculate_metric(metric, y.cpu(), predicted_classes.cpu())
            #     )
          
    print(f"Epoch {epoch+1}/{epochs}, training loss: {total_loss/batches}, validation loss: {val_losses/val_batches}")
    # print_scores(precision, recall, f1, accuracy, val_batches)
    losses.append(total_loss/batches) # for plotting learning curve
print(f"Training time: {time.time()-start_ts}s")

            




Loss:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 1/5, training loss: 0.3164469185692351, validation loss: 0.2243638038635254


Loss:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 2/5, training loss: 0.19055580337251873, validation loss: 0.20968376100063324


Loss:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 3/5, training loss: 0.15132231373431237, validation loss: 0.20165635645389557


Loss:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 4/5, training loss: 0.12125796068614639, validation loss: 0.20154008269309998


Loss:   0%|          | 0/938 [00:00<?, ?it/s]

Epoch 5/5, training loss: 0.09380623715113538, validation loss: 0.19532957673072815
Training time: 5482.075833082199s


### Save Model

In [31]:
#torch.save(model.state_dict(), "/tmp/MnistResNet.pt")    
#model = MnistResNet()
model_state_dict = torch.load("/tmp/MnistResNet.pt")
#model.load_state_dict(model_state_dict)
my_resnet.load_state_dict(model_state_dict)

<All keys matched successfully>

# Ensembles



Given a list of models, we can produce predictions for each model and then make an average to make a final prediction.

### Join a bunch of ResNet models


In [9]:
models_ensemble = [models.resnet50().to(device), models.resnet50().to(device)]
predictions = [F.softmax(m(torch.rand(1,3,224,244).to(device))) for m in models_ensemble] 
avg_prediction = torch.stack(predictions).mean(0).argmax()

NameError: name 'F' is not defined

### Test the defined network, and verify layers

In [None]:
print(models_ensemble)

In [None]:
avg_prediction

In [None]:
torch.stack(predictions)

In [32]:
#test
folder_label = {'Shoes' : 0, 'Belong' : 1, 'Shirts' : 2, 'Jewels' : 3}
label_folder = {0 : 'Shoes', 1 : 'Belong', 2 : 'Shirts', 3 : 'Jewels'}
import os
import timm
from tqdm.autonotebook import tqdm

#device = "cuda" if torch.cuda.is_available() else "cpu"
#model = torch.load("/Users/jeonboyun/Desktop/ssd/2023_1/ai_capstone/classifier/MnistResNet.pt", map_location=device)
criterion = torch.nn.CrossEntropyLoss()
val_losses = 0

my_resnet.eval()
with torch.no_grad():
    for i, data in tqdm(enumerate(test_loader)):
      X, y = data[0], data[1]
      outputs = my_resnet(X) # this get's the prediction from the network

      val_losses += criterion(outputs, y)

      predicted_classes = torch.max(outputs, 1)[1] # get class from network's prediction

0it [00:00, ?it/s]

In [34]:
val_losses

tensor(5830653.5000)