In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

In [2]:
class Block(nn.Module):
    expansion = 4
    def __init__(self, in_channels, inner_channels, stride=1, projection=None):
        super(Block, self).__init__()
        self.block = nn.Sequential(nn.Conv2d(in_channels, inner_channels, 1, stride=stride, bias=False),
                                   nn.BatchNorm2d(inner_channels),
                                   nn.ReLU(inplace=True),
                                   nn.Conv2d(inner_channels, inner_channels, 3, padding=1, bias=False),
                                   nn.BatchNorm2d(inner_channels),
                                   nn.Conv2d(inner_channels, inner_channels * self.expansion, 1, bias=False),
                                   nn.BatchNorm2d(inner_channels*self.expansion)
        )

        #Projection is needed to downsample the identity mapping
        self.projection = projection
        self.relu = nn.ReLU(inplace=True)


    def forward(self, x):
        identity = x
        if self.projection is not None:
            identity = self.projection(x)
        
        #output is set to be residual
        output = self.block(x)
        output = self.relu(identity + output)
    
        return output


In [None]:
class ResNet_50(nn.Module):
    expansion = 4
    def __init__(self, in_channel, num_classes):
        super(ResNet_50, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channels=64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu1 = nn.ReLU(inplace=True)
        self.maxpool1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.in_channels = 64

        self.conv2 = self.make_stage(64, 3)
        self.conv3 = self.make_stage(128, 4, stride=2)
        self.conv4 = self.make_stage(256, 6, stride=2)
        self.conv5 = self.make_stage(512, 3, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc = nn.Linear(2048, num_classes)

    def make_stage(self, inner_channels, num_blocks, stride = 1):
        #Only need projection when dimension is not equal 
        if stride != 1 or self.in_channels != inner_channels * self.expansion:
            projection = nn.Sequential(nn.Conv2d(self.in_channels, inner_channels * self.expansion, kernel_size=1, bias=False),
                                       nn.BatchNorm2d(inner_channels * self.expansion))
        else:
            projection = None

        layers = []
        layers += [Block(self.in_channels, inner_channels, stride=stride, projection=projection)]
        self.in_channels = inner_channels * self.expansion

        #Only the first layer is needed to be projected
        for i in range(1, num_blocks):
            layers += [Block(self.in_channels, inner_channels)]
        
        return nn.Sequential(*layers)
    
    def forward(self, x):
        #first layer
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)

        #Second Block 
        x = self.conv2(x)
        #Third Block
        x = self.conv3(x)
        #Fourth Block
        x = self.conv4(x)
        #Fifth Block 
        x = self.conv5(x)

        #GAP
        x = self.avgpool(x)
        x = torch.flatten(x,1)
        #Flatten and input to the classifier which is fully connected layer
        x = self.fc(x)

        return x

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = ResNet_50(3, 101).to(device)
print(model)

cuda
ResNet_50(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU(inplace=True)
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv2): Sequential(
    (0): Block(
      (block): Sequential(
        (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU(inplace=True)
        (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (5): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (6): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (projection): Sequential(
        (0): Conv2d(64, 256,

In [5]:
transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
])

In [6]:
train_dataset = datasets.Food101(root="./data", split="train", transform=transforms, download=False)
test_dataset = datasets.Food101(root="./data", split="test", transform=transforms, download=False)

In [7]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [8]:
for images, labels in train_loader:
    print(images.shape)
    print(labels.shape)
    break

torch.Size([32, 3, 224, 224])
torch.Size([32])


In [10]:
!nvidia-smi

Thu Mar 20 10:52:40 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3080      WDDM  |   00000000:01:00.0  On |                  N/A |
|  0%   32C    P8             36W /  370W |    2246MiB /  10240MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 10
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    correct, total = 0, 0
    iteration = 0

    for image, labels in train_loader:
        iteration += 1
        print(f"iteration : {iteration}")
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
    accuracy = 100 * correct / total
    print(f"Epoch [{epoch+1}/{num_epochs}], loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}")



iteration : 1


KeyboardInterrupt: 

In [10]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [11]:
from torchinfo import summary
summary(model, input_size=(2,3,224,224), device="cuda")

Layer (type:depth-idx)                   Output Shape              Param #
ResNet_50                                [2, 101]                  --
├─Conv2d: 1-1                            [2, 64, 112, 112]         9,408
├─BatchNorm2d: 1-2                       [2, 64, 112, 112]         128
├─ReLU: 1-3                              [2, 64, 112, 112]         --
├─MaxPool2d: 1-4                         [2, 64, 56, 56]           --
├─Sequential: 1-5                        [2, 256, 56, 56]          --
│    └─Block: 2-1                        [2, 256, 56, 56]          --
│    │    └─Sequential: 3-1              [2, 256, 56, 56]          16,896
│    │    └─Sequential: 3-2              [2, 256, 56, 56]          58,112
│    │    └─ReLU: 3-3                    [2, 256, 56, 56]          --
│    └─Block: 2-2                        [2, 256, 56, 56]          --
│    │    └─Sequential: 3-4              [2, 256, 56, 56]          70,400
│    │    └─ReLU: 3-5                    [2, 256, 56, 56]          --

In [9]:
import time
for images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    torch.cuda.synchronize()
    start_time = time.time()
    outputs = model(images)
    torch.cuda.synchronize()
    end_time = time.time()
    print(f"loading time : {end_time - start_time}")


loading time : 18.62337040901184


KeyboardInterrupt: 