In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

In [2]:
class Block(nn.Module):
    expansion = 4
    def __init__(self, in_channels, inner_channels, stride=1, projection=None):
        super(Block, self).__init__()
        self.block = nn.Sequential(nn.Conv2d(in_channels, inner_channels, 1, stride=stride, bias=False),
                                   nn.BatchNorm2d(inner_channels),
                                   nn.ReLU(inplace=True),
                                   nn.Conv2d(inner_channels, inner_channels, 3, padding=1, bias=False),
                                   nn.BatchNorm2d(inner_channels),
                                   nn.Conv2d(inner_channels, inner_channels * self.expansion, 1, bias=False),
                                   nn.BatchNorm2d(inner_channels*self.expansion)
        )

        #Projection is needed to downsample the identity mapping
        self.projection = projection
        self.relu = nn.ReLU(inplace=True)


    def forward(self, x):
        identity = x
        if self.projection is not None:
            identity = self.projection(x)
        
        #output is set to be residual
        output = self.block(x)
        output = self.relu(identity + output)
    
        return output


In [3]:
class ResNet_50(nn.Module):
    expansion = 4
    def __init__(self, in_channel, num_classes):
        super(ResNet_50, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.in_channels = 64

        self.conv2 = self.make_stage(64, 3)
        self.conv3 = self.make_stage(128, 4, stride=2)
        self.conv4 = self.make_stage(256, 6, stride=2)
        self.conv5 = self.make_stage(512, 3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(2048, num_classes)

    def make_stage(self, inner_channels, num_blocks, stride=1):
        #Only need projection when dimension is not equal 
        if stride != 1 or self.in_channels != inner_channels * self.expansion:
            projection = nn.Sequential(nn.Conv2d(self.in_channels, inner_channels * self.expansion, stride=stride, kernel_size=1, bias=False),
                                       nn.BatchNorm2d(inner_channels * self.expansion))
        else:
            projection = None

        layers = []
        layers += [Block(self.in_channels, inner_channels, stride=stride, projection=projection)]
        self.in_channels = inner_channels * self.expansion

        #Only the first layer is needed to be projected
        for i in range(1, num_blocks):
            layers += [Block(self.in_channels, inner_channels)]
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        #first layer
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)

        #Second Block 
        x = self.conv2(x)
        #Third Block
        x = self.conv3(x)
        #Fourth Block
        x = self.conv4(x)
        #Fifth Block 
        x = self.conv5(x)

        #GAP
        x = self.avgpool(x)
        x = torch.flatten(x,1)
        #Flatten and input to the classifier which is fully connected layer
        x = self.fc(x)

        return x

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = ResNet_50(3, 101).to(device)
print(model)

cuda
ResNet_50(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU(inplace=True)
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv2): Sequential(
    (0): Block(
      (block): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (projection): Sequential(
        (0): Conv2d(64, 256,

In [5]:
transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

In [6]:
train_dataset = datasets.Food101(root="./data", split="train", transform=transforms, download=False)
test_dataset = datasets.Food101(root="./data", split="test", transform=transforms, download=False)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, drop_last=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, drop_last=True)


In [8]:
for images, labels in train_loader:
    print(images.shape)
    print(labels.shape)
    break

torch.Size([64, 3, 224, 224])
torch.Size([64])


In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 50
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}")



Epoch [1/50], loss: 4.2044, Accuracy: 6.82
Epoch [2/50], loss: 3.7192, Accuracy: 13.66
Epoch [3/50], loss: 3.2546, Accuracy: 22.10
Epoch [4/50], loss: 2.8055, Accuracy: 31.36
Epoch [5/50], loss: 2.4455, Accuracy: 38.97
Epoch [6/50], loss: 2.1590, Accuracy: 45.31
Epoch [7/50], loss: 1.9200, Accuracy: 50.67
Epoch [8/50], loss: 1.7179, Accuracy: 55.15
Epoch [9/50], loss: 1.5257, Accuracy: 59.57
Epoch [10/50], loss: 1.3497, Accuracy: 63.84
Epoch [11/50], loss: 1.1576, Accuracy: 68.34
Epoch [12/50], loss: 0.9789, Accuracy: 72.79
Epoch [13/50], loss: 0.7951, Accuracy: 77.56
Epoch [14/50], loss: 0.6319, Accuracy: 81.72
Epoch [15/50], loss: 0.4830, Accuracy: 85.77
Epoch [16/50], loss: 0.3757, Accuracy: 88.93
Epoch [17/50], loss: 0.2966, Accuracy: 91.26
Epoch [18/50], loss: 0.2447, Accuracy: 92.66
Epoch [19/50], loss: 0.2098, Accuracy: 93.74
Epoch [20/50], loss: 0.1869, Accuracy: 94.40
Epoch [21/50], loss: 0.1632, Accuracy: 95.10
Epoch [22/50], loss: 0.1456, Accuracy: 95.63
Epoch [23/50], loss:

In [None]:
!pip install torchinfo



In [None]:
from torchinfo import summary
summary(model, input_size=(2,3,224,224), device="cuda")

Layer (type:depth-idx)                   Output Shape              Param #
ResNet_50                                [2, 101]                  --
├─Conv2d: 1-1                            [2, 64, 112, 112]         9,408
├─BatchNorm2d: 1-2                       [2, 64, 112, 112]         128
├─ReLU: 1-3                              [2, 64, 112, 112]         --
├─MaxPool2d: 1-4                         [2, 64, 56, 56]           --
├─Sequential: 1-5                        [2, 256, 56, 56]          --
│    └─Block: 2-1                        [2, 256, 56, 56]          --
│    │    └─Sequential: 3-1              [2, 256, 56, 56]          16,896
│    │    └─Sequential: 3-2              [2, 256, 56, 56]          58,112
│    │    └─ReLU: 3-3                    [2, 256, 56, 56]          --
│    └─Block: 2-2                        [2, 256, 56, 56]          --
│    │    └─Sequential: 3-4              [2, 256, 56, 56]          70,400
│    │    └─ReLU: 3-5                    [2, 256, 56, 56]          --

In [10]:
class_correct = torch.zeros(101, device=device)
class_total = torch.zeros(101, device=device)
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)

        output = model(images)

        _, predicted = torch.max(output, 1)
    
        for i in range(len(labels)):
            label = labels[i]
            class_correct[label]  += (predicted[i] == label).float()
            class_total[label] += 1

        
for i in range(101):
    if class_total[i] > 0:
        print(f"Class {i}: {100 * class_correct[i] / class_total[i]:.2f}%")

accuracy = 100 * class_correct.sum() / class_total.sum()
print(f"Overall Test Accuracy : {accuracy:.2f}%")

Class 0: 33.20%
Class 1: 40.40%
Class 2: 61.20%
Class 3: 56.40%
Class 4: 60.00%
Class 5: 47.20%
Class 6: 74.00%
Class 7: 72.40%
Class 8: 28.40%
Class 9: 25.60%
Class 10: 36.80%
Class 11: 71.60%
Class 12: 50.80%
Class 13: 74.80%
Class 14: 60.40%
Class 15: 42.80%
Class 16: 49.60%
Class 17: 52.40%
Class 18: 47.20%
Class 19: 52.00%
Class 20: 72.40%
Class 21: 43.20%
Class 22: 35.60%
Class 23: 53.20%
Class 24: 67.20%
Class 25: 70.80%
Class 26: 38.00%
Class 27: 63.20%
Class 28: 63.20%
Class 29: 53.60%
Class 30: 72.40%
Class 31: 43.60%
Class 32: 82.80%
Class 33: 98.00%
Class 34: 74.80%
Class 35: 49.60%
Class 36: 46.40%
Class 37: 30.00%
Class 38: 62.00%
Class 39: 29.60%
Class 40: 74.80%
Class 41: 63.60%
Class 42: 47.20%
Class 43: 22.80%
Class 44: 61.60%
Class 45: 65.60%
Class 46: 52.00%
Class 47: 34.00%
Class 48: 76.00%
Class 49: 38.80%
Class 50: 39.60%
Class 51: 77.60%
Class 52: 61.60%
Class 53: 43.60%
Class 54: 84.40%
Class 55: 49.20%
Class 56: 31.60%
Class 57: 53.20%
Class 58: 34.40%
Class 5