<a href="https://colab.research.google.com/github/martinpius/ARCHITECTURES/blob/main/RESIDUAL_NETWORK_PYTORCH_IMPLEMENTANTION_FROM_SCRATCH_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>> You are on CoLaB with torch version: {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)} {e}\n>>>> please correct {type(e)} and reload your device")
  COLAB = False
def __time__(t: float = 123.819)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h} min: {m:>02} sec: {s:>05.2f}"
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f">>>> testing the time formating function....\n>>>> time elapsed\t{__time__()}")

Mounted at /content/drive
>>>> You are on CoLaB with torch version: 1.9.0+cu102
>>>> testing the time formating function....
>>>> time elapsed	hrs: 0 min: 02 sec: 03.00


In [5]:
#In this notbook we are going to implement the residual network (RESNET101) from 
#scratch. The main feature of this network sre the residual connections between the
#convolution blocks. This idea of skip-connection helps to improve the network by adding more layers

In [6]:
import torch.nn as nn
import time

In [14]:
#We first create a general convolution block to be used later in the network
class C_block(nn.Module):
  def __init__(self, in_channels, out_channels, id_downsample = None, stride = 1):
    super(C_block, self).__init__()
    self.id_downsample = id_downsample
    self.expansion = 4
    self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride = 1, padding = 0)
    self.bnorm1 = nn.BatchNorm2d(num_features = out_channels)
    self.conv2 = nn.Conv2d(out_channels,out_channels, kernel_size = 3, stride = stride, padding = 1)
    self.bnorm2 = nn.BatchNorm2d(out_channels)
    self.conv3 = nn.Conv2d(out_channels, self.expansion * out_channels, kernel_size =1, stride = 1, padding = 0)
    self.bnorm3 = nn.BatchNorm2d(self.expansion* out_channels)
    self.relu = nn.ReLU()
  
  def forward(self, x):
    identity = x
    x = self.conv1(x)
    x = self.bnorm1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.bnorm2(x)
    x = self.relu(x)
    x = self.conv3(x)
    x = self.bnorm3(x)
    x = self.relu(x)
    if self.id_downsample is not None:
      identity = self.id_downsample(identity)
    x+= identity
    x = self.relu(x)
    return x

class ResNet(nn.Module):
  def __init__(self, block, layers, image_channels, num_classes):
    super(ResNet, self).__init__()
    #The initial block is a 7x7 conv block-bnorm-relu-maxpoolx3
    self.in_channels = 64
    self.conv1 = nn.Conv2d(image_channels, 64, kernel_size = 7, stride = 2, padding = 3)
    self.bnorm = nn.BatchNorm2d(64)
    self.relu = nn.ReLU()
    self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
    #Residual blocks start here:
    self.res1 = self.__layers__(block, layers[0],out_channels = 64, stride = 1)
    self.res2 = self.__layers__(block, layers[1],out_channels = 128, stride = 2)
    self.res3 = self.__layers__(block, layers[2],out_channels = 256, stride = 2)
    self.res4 = self.__layers__(block, layers[3],out_channels = 512, stride = 2)
    self.avg_pool = nn.AdaptiveAvgPool2d((1,1))
    self.fc = nn.Linear(4 * 512, num_classes)
  
  def forward(self, input_tensor):
    #intro block
    x = self.maxpool(self.relu(self.bnorm(self.conv1(input_tensor))))
    #residual blocks
    x = self.res1(x)
    x = self.res2(x)
    x = self.res3(x)
    x = self.res4(x)
    x = self.avg_pool(x)
    x = x.reshape(x.shape[0], -1)
    return self.fc(x)

  def __layers__(self, block, num_res_blocks, out_channels, stride):
    id_downsample = None
    layers = []
    if stride !=1 or self.in_channels != out_channels*4:
      id_downsample = nn.Sequential(
          nn.Conv2d(self.in_channels, out_channels*4, kernel_size = 1, stride = stride),
          nn.BatchNorm2d(4*out_channels))
    layers.append(block(self.in_channels, out_channels, id_downsample, stride))
    self.in_channels = 4 * out_channels
    for k in range(num_res_blocks - 1):
      layers.append(block(self.in_channels, out_channels))
    return nn.Sequential(*layers)

#Define the specific resnet50, resnet101 and resnet152
def resnet50(img_channels = 3, num_classes = 1000):
  return ResNet(C_block, [3, 4, 6, 3], img_channels, num_classes)

def resnet101(img_channels = 3, num_classes = 1000):
  return ResNet(C_block, [3, 4, 23, 3], img_channels, num_classes)

def resnet152(img_channels = 3, num_classes = 1000):
  return ResNet(C_block, [3, 8, 36, 3], img_channels, num_classes)

#Instantiate the model class and test to see the expected outputs
def __testing__():
  rnd_data = torch.randn(size = (1, 3, 224,224))
  net1 = resnet50()
  out1 = net1(rnd_data).to(device = device)
  net2 = resnet101()
  out2 = net2(rnd_data).to(device = device)
  net3 = resnet152()
  out3 = net3(rnd_data).to(device = device)
  print(f">>>> resnet50_out_shape: {out1.shape}\n>>>> resnet101_out_shape: {out2.shape}\n>>>> resnet152_out_shape: {out3.shape}")
  print("\n\n")
  print(f"\n>>>> resnet50 Graph\n\n{net1}")
  print("\n\n")
  print(f"\n>>>> resnet101 Graph\n\n{net2}")
  print("\n\n")
  print(f"\n>>>> resnet152 Graph\n\n{net3}")
tic = time.time()
__testing__()
toc = time.time()
print(f"\n>>>> time elapsed: {__time__(toc - tic)}")


>>>> resnet50_out_shape: torch.Size([1, 1000])
>>>> resnet101_out_shape: torch.Size([1, 1000])
>>>> resnet152_out_shape: torch.Size([1, 1000])




>>>> resnet50 Graph

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
  (bnorm): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU()
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (res1): Sequential(
    (0): C_block(
      (id_downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1))
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (bnorm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (bnorm2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True,