<a href="https://colab.research.google.com/github/mralamdari/Computer-Vision-Papers/blob/main/FFSSD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import torch
import torchvision
from IPython.display import clear_output

!pip install torch_intermediate_layer_getter
from torch_intermediate_layer_getter import IntermediateLayerGetter

clear_output()
tf.__version__

'2.12.0'

In [11]:
class Encoder(torch.nn.Module):
  def __init__(self):
    super().__init__()
    model = torchvision.models.vgg16(weights='IMAGENET1K_V1')
    model.features = torch.nn.Sequential(*([model.features[i] for i in range(30)] + [model.features[i] for i in range(23, 30)]))
    return_layers = {'20': 'out_conv4_2', '22': 'out_conv4_3', '29': 'out_conv5_3','34': 'out_conv7_2', '36': 'out_conv6_2'}
    self.backbone = IntermediateLayerGetter(model.features, return_layers=return_layers)
    device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
  def forward(self, images):
    return self.backbone(images)

In [31]:
def ftb(current_layer, former_layer=None):
  x = torch.nn.Conv2d(current_layer.shape[1], 256, kernel_size=3, stride=1, padding=1)(current_layer)
  x = torch.nn.ReLU(inplace=True)(x)
  x = torch.nn.Conv2d(256, 512, kernel_size=3, padding=1)(x)
  
  if former_layer is None:
    return x
  elif x.shape == former_layer.shape:
    return torch.add(x, former_layer)

  else:
    d = torch.nn.ConvTranspose2d(former_layer.shape[1], 512, kernel_size=4, stride=2, padding=1)(former_layer)
    return torch.add(x, d)

In [32]:
def rfem(x):
  channel = x.shape[1]
  #branch1
  
  x1 = torch.nn.Conv2d(in_channels=channel, out_channels=channel//4, kernel_size=(1, 1), padding='same')(x)
  x1 = torch.nn.ReLU()(x1)
  x1 = torch.nn.BatchNorm2d(num_features=channel//4)(x1)

  #branch2
  x2 = torch.nn.Conv2d(in_channels=channel, out_channels=channel//4, kernel_size=(1, 1), padding='same')(x)
  x2 = torch.nn.ReLU()(x2)
  x2 = torch.nn.BatchNorm2d(num_features=channel//4)(x2)
  x2 = torch.nn.Conv2d(in_channels=channel//4, out_channels=channel//4, kernel_size=(1, 3), padding='same')(x2)
  x2 = torch.nn.ReLU()(x2)
  x2 = torch.nn.BatchNorm2d(num_features=channel//4)(x2)

  #branch3
  x3 = torch.nn.Conv2d(in_channels=channel, out_channels=channel//4, kernel_size=(1, 1), padding='same')(x)
  x2 = torch.nn.ReLU()(x2)
  x3 = torch.nn.BatchNorm2d(num_features=channel//4)(x3)
  x3 = torch.nn.Conv2d(in_channels=channel//4, out_channels=channel//4, kernel_size=(3, 1), padding='same')(x3)
  x3 = torch.nn.ReLU()(x3)
  x3 = torch.nn.BatchNorm2d(num_features=channel//4)(x3)

  #branch4
  x4 = torch.nn.Conv2d(in_channels=channel, out_channels=channel//4, kernel_size=(1, 1), padding='same')(x)
  x4 = torch.nn.ReLU()(x4)
  x4 = torch.nn.BatchNorm2d(num_features=channel//4)(x4)
  x4 = torch.nn.Conv2d(in_channels=channel//4, out_channels=channel//3, kernel_size=(3, 1), padding='same')(x4)
  x4 = torch.nn.ReLU()(x4)
  x4 = torch.nn.BatchNorm2d(num_features=channel//3)(x4)
  x4 = torch.nn.Conv2d(in_channels=channel//3, out_channels=channel//3, kernel_size=(1, 3), padding='same')(x4)
  x4 = torch.nn.ReLU()(x4)
  x4 = torch.nn.BatchNorm2d(num_features=channel//3)(x4)
  x4 = torch.nn.Conv2d(in_channels=channel//3, out_channels=channel//4, kernel_size=(3, 1), padding='same')(x4)
  x4 = torch.nn.ReLU()(x4)
  x4 = torch.nn.BatchNorm2d(num_features=channel//4)(x4)
  x4 = torch.nn.Conv2d(in_channels=channel//4, out_channels=channel//4, kernel_size=(1, 3), padding='same')(x4)
  x4 = torch.nn.ReLU()(x4)
  x4 = torch.nn.BatchNorm2d(num_features=channel//4)(x4)

  combined_x = torch.concat((x1, x2, x3, x4), dim=1)

  return torch.add(combined_x, x/0.5)

In [50]:
def training(img):
  backbone = Encoder()
  feature_maps = backbone(img)

  conv4_2 = feature_maps[0]['out_conv4_2']
  conv4_2=torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=2)(conv4_2)
  conv4_2=torch.nn.ReLU()(conv4_2)
  conv4_3 = feature_maps[0]['out_conv4_3']
  conv4_3=torch.nn.Conv2d(in_channels=512, out_channels=512, kernel_size=2, stride=2)(conv4_3)
  conv4_3=torch.nn.ReLU()(conv4_3)
  conv5_3 = feature_maps[0]['out_conv5_3'][0]
  conv6_2 = feature_maps[0]['out_conv6_2'][0]
  conv7_2 = feature_maps[0]['out_conv7_2'][0]

  rfem11 = torch.add(rfem(conv4_2), conv4_3)
  rfem12 = torch.add(rfem(rfem11), conv5_3)
  rfem13 = torch.add(rfem(rfem12), conv7_2)
  rfem14 = torch.add(rfem(rfem13), conv6_2)

  ftb_4 = ftb(rfem14)
  ftb_3 = ftb(rfem13)
  ftb_2 = ftb(rfem12)
  ftb_1 = ftb(rfem11)

  rfem21 = torch.add(rfem(ftb_1), conv4_3)
  rfem22 = torch.add(rfem(ftb_2), conv5_3)
  rfem23 = torch.add(rfem(ftb_3), conv7_2)
  rfem24 = torch.add(rfem(ftb_4), conv6_2)

  det_1 = torch.nn.Softmax(dim=1)(torch.cat((rfem21, rfem11)))
  det_2 = torch.nn.Softmax(dim=1)(torch.cat((rfem22, rfem12)))
  det_3 = torch.nn.Softmax(dim=1)(torch.cat((rfem23, rfem13)))
  det_4 = torch.nn.Softmax(dim=1)(torch.cat((rfem24, rfem14)))

  D = torch.cat((det_1, det_2, det_3, det_4), dim=1)
  return D

In [56]:
x = torch.rand(5,3,320,320)

encoder = Encoder()
y = encoder(x)
print(y[0].keys())
print(y[1].shape)

odict_keys(['out_conv4_2', 'out_conv4_3', 'out_conv7_2', 'out_conv5_3', 'out_conv6_2'])
torch.Size([5, 512, 10, 10])


In [57]:
D = training(x)
print(D.shape)

torch.Size([10, 2048, 20, 20])
