<a href="https://colab.research.google.com/github/elhamsh93/object-tracking/blob/main/SiamRPNPP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi

/bin/bash: nvidia-smi: command not found


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torchvision
from torchvision.models import resnet50
from torchvision.models._utils import IntermediateLayerGetter

In [None]:
model = resnet50(True, replace_stride_with_dilation=[True, True, True])
# model

In [None]:
# model.avgpool = nn.Identity()
# model.fc = nn.Identity()
# model

In [None]:
# model = nn.Sequential(*list(model.children())[:-2])
model(torch.rand(1, 3, 224, 224)).shape

torch.Size([1, 1000])

In [None]:
new_model = IntermediateLayerGetter(model, 
                                {'layer2': 'out1',
                                 'layer3': 'out2',
                                 'layer4': 'out3'})

In [None]:
[v.shape for k,v in new_model(torch.rand(1, 3, 224, 224)).items()]

[torch.Size([1, 512, 56, 56]),
 torch.Size([1, 1024, 56, 56]),
 torch.Size([1, 2048, 56, 56])]

In [None]:
class CorrHead(nn.Module):
  def __init__(self, out_channels=2):
    super(CorrHead, self).__init__()
    self.adj_search = nn.Sequential(
        nn.Conv2d(256, 256, 3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
    )
    self.adj_target = nn.Sequential(
        nn.Conv2d(256, 256, 3, padding=1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
    )
    self.head = nn.Sequential(
        nn.Conv2d(256, 256, 1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
        nn.Conv2d(256, out_channels, 1)
    )
  def forward(self, fx, fz):
    fx = self.adj_search(fx)
    fz = self.adj_target(fz)
    fz= fz.permute(1, 0, 2, 3)
    feature = F.conv2d(fx, fz, padding=13, groups=256)
    out = self.head(feature)
    return out


In [None]:
class DWRPNHead(nn.Module):
  def __init__(self, num_anchors=2):
    super(DWRPNHead, self).__init__()
    self.cls = CorrHead(2*num_anchors)
    self.box = CorrHead(4*num_anchors)

  def forward(self, fx, fz):
    cls = self.cls(fx, fz)
    loc = self.box(fx, fz)
    return cls, loc


In [None]:
class SiamRPNPP(nn.Module):
  def __init__(self,):
    super(SiamRPNPP, self).__init__()
    #Body
    self.backbone = resnet50(True, replace_stride_with_dilation=[True, True, True])
    self.backbone = IntermediateLayerGetter(self.backbone, 
                                {'layer2': 'out1',
                                 'layer3': 'out2',
                                 'layer4': 'out3'})
    #Neck
    self.neck1 = nn.Sequential(
        nn.Conv2d(512, 256, 1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
    )
    self.neck2 = nn.Sequential(
        nn.Conv2d(1024, 256, 1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
    )
    self.neck3 = nn.Sequential(
        nn.Conv2d(2048, 256, 1),
        nn.BatchNorm2d(256),
        nn.ReLU(inplace=True),
    )

    #Head
    self.head = DWRPNHead(num_anchors=2)

    #Weight
    self.alpha = nn.Parameter(torch.ones(3))
    self.beta = nn.Parameter(torch.ones(3))

  def forward(self, search, target):
    search_feature = self.backbone(search)
    search_out1 = self.neck1(search_feature['out1'])
    search_out2 = self.neck2(search_feature['out2'])
    search_out3 = self.neck3(search_feature['out3'])

    target_feature = self.backbone(target)
    target_out1 = self.neck1(target_feature['out1'])
    target_out2 = self.neck2(target_feature['out2'])
    target_out3 = self.neck3(target_feature['out3'])

    cls1, loc1 = self.head(search_out1, target_out1)
    cls2, loc2 = self.head(search_out2, target_out2)
    cls3, loc3 = self.head(search_out3, target_out3)

    cls = self.alpha[0]*cls1 + self.alpha[1]*cls2 + self.alpha[2]*cls3
    loc = self.beta[0]*loc1 + self.beta[1]*loc2 + self.beta[2]*loc3

    return cls, loc

In [None]:
model = SiamRPNPP()



In [None]:
cls, loc = model(torch.rand(1, 3, 224, 224), torch.rand(1, 3, 112, 112))

In [None]:
cls.shape, loc.shape

(torch.Size([1, 4, 55, 55]), torch.Size([1, 8, 55, 55]))

In [None]:
#input tensor of shape 
#(minibatch,in_channels,iH,iW)
#filters of shape
#(out_channels, groups/in_channels, kH, kW)

inputs = torch.randn(3, 256, 56, 56)
filters = torch.randn(256, 1, 28, 28)
F.conv2d(inputs, filters, padding=13, groups=256).shape

torch.Size([3, 256, 55, 55])



*   https://github.com/STVIR/pysot
*   https://github.com/visionml/pytracking
*   https://github.com/got-10k/toolkit

