<a href="https://colab.research.google.com/github/martinpius/ARCHITECTURES/blob/main/EFFICIENTNET_ARCHITECTURE_WITH_PYTORCH_IMPLEMENTATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount("/content/drive", force_remount = True)
try:
  COLAB = True
  import torch
  print(f">>>> You are on CoLaB with torch version: {torch.__version__}")
except Exception as e:
  print(f">>>> {type(e)} {e}\n>>>> please correct {type(e)} and re-load your drive")
def time_fmt(t: float = 123.87)->float:
  h = int(t / (60 * 60))
  m = int(t % (60 * 60) / 60)
  s = int(t % 60)
  return f"hrs: {h} min: {m:>02} sec: {s:>05.2f}"
if torch.cuda.is_available():
  device = torch.device("cuda")
else:
  device = torch.device("cpu")
print(f">>>> testing the time formating function...\n>>>> time elapsed: {time_fmt()}")

Mounted at /content/drive
>>>> You are on CoLaB with torch version: 1.9.0+cu102
>>>> testing the time formating function...
>>>> time elapsed: hrs: 0 min: 02 sec: 03.00


In [1]:
import torch.nn as nn
from math import ceil
import time


In [2]:
#In this notebook we will implement the EfficientNet from scratch:
#This network provides an efficient way to optimize any computer vision
#network. Using some standardized techniques such as stochastic mini-batches,
#Squeeze Excitation and inverted residual blocks. The network can be scaled with
# some specified parameters to acquire better performance with less number of parameters

In [2]:
#information on expansion-ratio, channels, number of layers, stride and kernel size to be used in our base network
net_info = [
            [1, 16, 1, 1, 3],
            [6, 24, 2, 2, 3],
            [6, 40, 2, 2, 5],
            [6, 80, 3, 2, 3],
            [6, 112, 3, 1, 5],
            [6, 192, 4, 2, 5],
            [6, 320, 1, 1, 3]]

#Information on the scale ratio parameters (phi-values, resolution, drop_rate) to be used in our base network
scale_info = {
    'b0': (0, 224, 0.20),
    'b1': (0.5, 240, 0.20),
    'b2': (1, 260, 0.30),
    'b3': (2, 300, 0.30),
    'b4': (3, 300, 0.40),
    'b5': (4, 456, 0.40),
    'b6': (5, 528, 0.50),
    'b7': (6, 600, 0.50),
}

In [4]:
#Convilution block..to be used later on in the network
class CNN(nn.Module):
  def __init__(self, in_channels, out_channels, kernel_size, stride, padding, groups = 1):
    super(CNN, self).__init__()
    self.conv = nn.Conv2d(in_channels, 
                          out_channels,
                          kernel_size,
                          stride,
                          padding, groups = groups, bias = False)
    self.bnorm = nn.BatchNorm2d(out_channels)
    self.silu = nn.SiLU()
  
  def forward(self, input_tensor):
    return self.silu(self.bnorm(self.conv(input_tensor)))

In [6]:
#A class to compute attention-like scores for each channel:
#This class will be used in the inverted-residual block
class ChannelAttention(nn.Module):
  def __init__(self,in_channels, reduduction):
    super(ChannelAttention, self).__init__()
    self.atn = nn.Sequential(
        nn.AdaptiveAvgPool2d(1), 
        nn.Conv2d(in_channels,reduction, 1),
        nn.SiLU(),
        nn.Conv2d(reduction, in_channels, 1),
        nn.Sigmoid()) # weight probabilities for each channel
  def forward(self, input_tensor):
    ''' each input channels is multiplied by the prob weight to
    determine their importance to the network'''
    x = input_tensor * self.attn(input_tensor)
    return x
      


In [9]:
#The inverted residual network: We utilize the above CNN block here
class InvResNet(nn.Module):
  def __init__(self, in_channels,
               out_channels,
               kernel_size, 
               stride, 
               padding, 
               expansion, 
               reduction = 4, surv_prob = 0.8):
    super(InvResNet, self).__init__()
    self.surv_prob = 0.8
    self.use_residual = in_channels == out_channels and stride==1
    hidden_dim = in_channels * expansion #updated dimension (expanded)
    self.expand = in_channels != hidden_dim
    reduced_dim = int(in_channels / reduction) #reduction factor/ratio
    #We now use the convolution blocks we created earlier 
    if self.expand:
      self.expand_cnn = CNN(in_channels, 
                            out_channels = hidden_dim, 
                            kernel_size = 3,
                            stride = 1, 
                        padding = 1)
    
    self.conv = nn.Sequential(
        CNN(in_channels = hidden_dim,
            out_channels = hidden_dim, 
            kernel_size = kernel_size,
            stride = stride, 
            padding = padding, 
            groups = hidden_dim),
       ChannelAttention(hidden_dim, reduced_dim),
       nn.Conv2d(in_channels = hidden_dim, 
                 out_channels = out_channels,
                 kernel_size = 1, 
                 bias = False),
                 nn.BatchNorm2d(out_channels))
  

  def stochastic_depth(self, input_tensor):
    ''' this will drop some layers randomly during training = layer's dropout'''
    if not self.training:
      return input_tensor
      #generate random binary numbers to decide when we drop a layer during training
    binary_v = torch.rand(input_tensor.shape[0], 1, 1, 1, device = device) < self.surv_prob
    return torch.div(input_tensor, self.surv_prob) * binary_v
  
  def forward(self, input_tensor):
    x = self.expand_cnn(input_tensor) if self.expand else input_tensor
    if self.use_residual:
      return self.stochastic_depth(self.conv(input_tensor)) + input_tensor
    else:
      return self.conv(input_tensor)


In [10]:
#The EfficientNet class: We now develop our model class:

In [11]:
class EfficientNet(nn.Module):
  def __init__(self, version, num_classes):
    super(EfficientNet, self).__init__()
    w_factor, d_factor, drp_rate = self.compute_factors(version)
    final_channel = ceil(1280 * w_factor) #last channel (given in the paper)
    self.pool = nn.AdaptiveAvgPool2d(1)
    self.features = self.create_features(w_factor, d_factor, final_channel) #creating features
    self.classifier = nn.Sequential(
        nn.Dropout(drp_rate),
        nn.Linear(final_channel, num_classes))
    
  
  def compute_factors(self, version, alpha = 1.2, beta = 1.1):
    '''parameters are provided according to the paper where
    alpha == depth-scalling and beta == width-scalling'''
    phi, res, drp_rate = scale_info[version]
    d_factor = alpha ** phi #how to expand with number of layers
    w_factor = beta ** phi # how to expand with number of channels
    return d_factor, w_factor, drp_rate
  
  def create_features(self, w_factor, d_factor, final_channels):
    channels = int(32 * w_factor)
    features = [CNN(in_channels = 3, out_channels = channels, kernel_size = 3, stride = 2, padding = 1)]
    in_channels = channels
    for expand_ratio, channels, repeats, stride, kernel_size in net_info:
      out_channels = 4* ceil((channels * w_factor) / 4)
      layers_rpt = ceil(repeats * w_factor)
      #iterate over the repeated layers
      for layer in range(layers_rpt):
        features.append(InvResNet(in_channels, 
                                  out_channels,
                                  expand_ratio = expand_ratio,
                                  stride = stride if layer == 0 else 1,
                                  kernel_size = kernel_size,
                                  padding = kernel_size//2))
        in_channels = out_channels
      
    features.append(CNN(in_channels, final_channels, kernel_size=1, stride=1, padding=0))
    return nn.Sequential(*features)
  
  def forward(self, input_tensor):
    x = self.pool(self.features(input_tensor))
    x = x.view(x.shape[0],-1)
    x = self.classifier(x)
    return x
    

