<a href="https://colab.research.google.com/github/singhbishtabhishek/ResNet-Implementation-/blob/main/ResNet_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn


In [2]:
class block(nn.Module):
  def __init__(self, in_channels, out_channels, identity_downsamples=None, stride=1):
    super(block, self).__init__()
    self.expansion=4
    self.conv1=nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0)
    self.bn1=nn.BatchNorm2d(out_channels)
    self.conv2=nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1)
    self.bn2=nn.BatchNorm2d(out_channels)
    self.conv3=nn.Conv2d(out_channels, out_channels*self.expansion, kernel_size=1, stride=1, padding=0)
    self.bn3=nn.BatchNorm2d(out_channels*self.expansion)
    self.relu=nn.ReLU()
    self.identity_downsamples=identity_downsamples

  def forward(self, x):
    identity=x

    x=self.conv1(x)
    x=self.bn1(x)
    x=self.relu(x)
    x=self.conv2(x)
    x=self.bn2(x)
    x=self.relu(x)
    x=self.conv3(x)
    x=self.bn3(x)

    if self.identity_downsamples is not None:
      identity=self.identity_downsamples(identity)

    x+=identity
    x=self.relu(x)
    return x

In [3]:
class ResNet(nn.Module):
  def __init__(self, block, layers, image_channels, num_classes):
    super(ResNet, self).__init__()
    self.in_channels=64
    self.conv1=nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
    self.bn1=nn.BatchNorm2d(64)
    self.relu=nn.ReLU()
    self.maxpool=nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

    #ResNet layers starting from here
    self.layer1=self._make_layer(block, layers[0], out_channels=64, stride=1)
    self.layer2=self._make_layer(block, layers[1], out_channels=128, stride=2)
    self.layer3=self._make_layer(block, layers[2], out_channels=256, stride=2)
    self.layer4=self._make_layer(block, layers[3], out_channels=512, stride=2)

    self.avgpool=nn.AdaptiveAvgPool2d((1,1))
    self.fc=nn.Linear(512*4, num_classes)

  def forward(self, x):
    x=self.conv1(x)
    x=self.bn1(x)
    x=self.relu(x)
    x=self.maxpool(x)

    x=self.layer1(x)
    x=self.layer2(x)
    x=self.layer3(x)
    x=self.layer4(x)

    x=self.avgpool(x)
    x=x.reshape(x.shape[0], -1)
    x=self.fc(x)
    return x

  def _make_layer(self, block, num_residual_blocks, out_channels, stride):
    identity_downsample=None
    layers=[]

    if stride!=1 or self.in_channels!=out_channels*4:
      identity_downsample=nn.Sequential(nn.Conv2d(self.in_channels, out_channels*4, kernel_size=1, stride=stride), nn.BatchNorm2d(out_channels*4))

      layers.append(block(self.in_channels, out_channels, identity_downsample, stride))
      self.in_channels=out_channels*4

      for i in range(num_residual_blocks-1):
        layers.append(block(self.in_channels, out_channels))

      return nn.Sequential(*layers)



In [4]:
def ResNet50(img_channels=3, num_classes=1000):
  return ResNet(block, [3,4,6,3], img_channels, num_classes)

def ResNet101(img_channels=3, num_classes=1000):
  return ResNet(block, [3,8,36,3], img_channels, num_classes)

def ResNet152(img_channels=3, num_classes=1000):
  return ResNet(block, [3,4,23,3], img_channels, num_classes)


In [5]:
def test():
  net=ResNet50()
  x=torch.randn(2,3,224,224)
  y=net(x).to('cuda')
  print(y.shape)

test()

torch.Size([2, 1000])


In [6]:
def test():
  net=ResNet101()
  x=torch.randn(2,3,224,224)
  y=net(x).to('cuda')
  print(y.shape)

test()

torch.Size([2, 1000])


In [7]:
def test():
  net=ResNet152()
  x=torch.randn(2,3,224,224)
  y=net(x).to('cuda')
  print(y.shape)

test()

torch.Size([2, 1000])


In [8]:
net = ResNet50()
print("ResNet-50 params:", sum(p.numel() for p in net.parameters()))

net = ResNet101()
print("ResNet-101 params:", sum(p.numel() for p in net.parameters()))

net = ResNet152()
print("ResNet-152 params:", sum(p.numel() for p in net.parameters()))


ResNet-50 params: 25583592
ResNet-101 params: 60268520
ResNet-152 params: 44601832


In [9]:
import torchvision
import torchvision.transforms as transforms


In [10]:
transform_train = transforms.Compose([
    transforms.Resize(224),
    transforms.RandomCrop(224, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

transform_test = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]),
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False)


100%|██████████| 170M/170M [00:21<00:00, 7.92MB/s]


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = ResNet50(img_channels=3, num_classes=10).to(device)


In [12]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)


In [13]:
from tqdm import tqdm

def train(net, trainloader, optimizer, criterion, device, epoch):
    net.train()
    running_loss, correct, total = 0.0, 0, 0

    loop = tqdm(trainloader, leave=True)
    for inputs, targets in loop:
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        loop.set_description(f"Epoch [{epoch}]")
        loop.set_postfix(loss=running_loss/total, acc=100.*correct/total)

    return running_loss/len(trainloader), 100.*correct/total



In [14]:
def test(net, testloader, criterion, device):
    net.eval()
    running_loss, correct, total = 0.0, 0, 0

    with torch.no_grad():
        for inputs, targets in testloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = net(inputs)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()

    avg_loss = running_loss / len(testloader)
    accuracy = 100. * correct / total
    return avg_loss, accuracy


In [15]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os

checkpoint_path = "/content/drive/MyDrive/checkpoints/resnet_checkpoint.pth"

start_epoch = 0
if os.path.isfile(checkpoint_path):
    checkpoint = torch.load(checkpoint_path, map_location=device)
    net.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    scheduler.load_state_dict(checkpoint['scheduler_state'])
    start_epoch = checkpoint['epoch']

checkpoint_dir = "/content/drive/MyDrive/checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

num_epochs = 100
for epoch in range(start_epoch + 1, num_epochs + 1):
    train_loss, train_acc = train(net, trainloader, optimizer, criterion, device, epoch)
    test_loss, test_acc = test(net, testloader, criterion, device)
    scheduler.step()

    print(f"Epoch {epoch}/{num_epochs} "
          f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}% "
          f"Test Loss: {test_loss:.4f} | Test Acc: {test_acc:.2f}%")

    state = {
        'epoch': epoch,
        'model_state': net.state_dict(),
        'optimizer_state': optimizer.state_dict(),
        'scheduler_state': scheduler.state_dict()
    }
    torch.save(state, checkpoint_path)


Epoch [17]: 100%|██████████| 391/391 [10:26<00:00,  1.60s/it, acc=87.2, loss=0.00285]


Epoch 17/100 Train Loss: 0.3645 | Train Acc: 87.20% Test Loss: 0.5005 | Test Acc: 83.35%


Epoch [18]: 100%|██████████| 391/391 [10:27<00:00,  1.60s/it, acc=88.1, loss=0.00267]


Epoch 18/100 Train Loss: 0.3408 | Train Acc: 88.12% Test Loss: 0.5348 | Test Acc: 82.92%


Epoch [19]: 100%|██████████| 391/391 [10:26<00:00,  1.60s/it, acc=88.5, loss=0.00256]


Epoch 19/100 Train Loss: 0.3271 | Train Acc: 88.51% Test Loss: 0.5375 | Test Acc: 82.54%


Epoch [20]: 100%|██████████| 391/391 [10:24<00:00,  1.60s/it, acc=89.2, loss=0.00239]


Epoch 20/100 Train Loss: 0.3051 | Train Acc: 89.22% Test Loss: 0.5545 | Test Acc: 81.45%


Epoch [21]: 100%|██████████| 391/391 [10:23<00:00,  1.59s/it, acc=90, loss=0.00227]


Epoch 21/100 Train Loss: 0.2907 | Train Acc: 89.97% Test Loss: 0.5363 | Test Acc: 82.47%


Epoch [22]: 100%|██████████| 391/391 [10:24<00:00,  1.60s/it, acc=90.4, loss=0.00215]


Epoch 22/100 Train Loss: 0.2748 | Train Acc: 90.43% Test Loss: 0.5107 | Test Acc: 83.48%


Epoch [23]: 100%|██████████| 391/391 [10:24<00:00,  1.60s/it, acc=90.7, loss=0.00208]


Epoch 23/100 Train Loss: 0.2664 | Train Acc: 90.67% Test Loss: 0.4438 | Test Acc: 85.11%


Epoch [24]: 100%|██████████| 391/391 [10:23<00:00,  1.60s/it, acc=91.3, loss=0.00195]


Epoch 24/100 Train Loss: 0.2499 | Train Acc: 91.27% Test Loss: 0.4918 | Test Acc: 84.55%


Epoch [25]: 100%|██████████| 391/391 [10:24<00:00,  1.60s/it, acc=91.6, loss=0.00186]


Epoch 25/100 Train Loss: 0.2381 | Train Acc: 91.63% Test Loss: 0.4909 | Test Acc: 84.35%


Epoch [26]: 100%|██████████| 391/391 [10:24<00:00,  1.60s/it, acc=91.6, loss=0.00182]


Epoch 26/100 Train Loss: 0.2321 | Train Acc: 91.64% Test Loss: 0.5462 | Test Acc: 82.70%


Epoch [27]:  11%|█         | 43/391 [01:09<09:19,  1.61s/it, acc=93.8, loss=0.00142]