In [1]:
import torch
import torchvision

from torch import nn
#from torchsummary import summary

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
sig = nn.Sigmoid()
x = torch.tensor([[0.6, 0.6, 0.6, 0.6]])
x = sig(x)
x

tensor([[0.6457, 0.6457, 0.6457, 0.6457]])

: 

In [20]:
class EncoderBlock(nn.Module):

    def __init__(self, d_in, d_out):

        super(EncoderBlock, self).__init__()

        self.conv_1 = nn.Sequential(
            nn.Conv2d(d_in, d_out, 3, 1, "same"),
            nn.ReLU()
        )

        self.pool = nn.MaxPool2d(2, 2)

        self.conv_2 = nn.Sequential(
            nn.Conv2d(d_out, d_out, 3, 1, "same"),
            nn.ReLU()
        )
    
    def forward(self, inputs):

        a = self.conv_1(inputs)
        a = self.conv_2(a)
        x = self.pool(a)
        x = self.conv_2(x)

        return x, a

In [21]:
class LastEncoder(nn.Module):

    def __init__(self, d_in, d_out):

        super(LastEncoder, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(d_in, d_out, 3, 1, "same"),
            nn.ReLU()
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(d_out, d_out, 3, 1, "same"),
            nn.ReLU()
        )

    def forward(self, inputs):

        x = self.conv2(self.conv1(inputs))

        return x

In [24]:
class FullEncoder(nn.Module):

    def __init__(self, d_in, filters):

        super(FullEncoder, self).__init__()

        self.encoder_blocks = []
        for f in filters[:-1]:

            encoder = EncoderBlock(d_in, f)
            self.encoder_blocks.append(encoder)
            d_in = f
        
        self.last_encoder = LastEncoder(f, filters[-1])


    def forward(self, inputs):

        activations = []
        x = inputs
        for eb in self.encoder_blocks:
            x, a = eb(x)
            activations.append(a)
        
        x = self.last_encoder(x)

        return x, activations


In [25]:
unet_encoder = FullEncoder(3, [64, 128, 256, 512, 1024])

## Decoder for the U-Net

In [27]:
class DecoderBlock(nn.Module):

    def __init__(self, d_in, d_out):

        super(DecoderBlock, self).__init__()
        self.upconv = nn.Sequential(
            nn.ConvTranspose2d(d_in, d_out, 2, 2, padding="same"),
            nn.ReLU()
        )

        self.conv = nn.Sequential(
            nn.Conv2d(d_out, d_out, 3, 1, padding="same"),
            nn.ReLU()
        )

    def forward(self, inp, a):

        x = self.upconv(inp)
        if a is not None:
            x = torch.cat([a, x], axis=-1)
        
        x = self.conv(self.conv(x))
    
        return x

In [31]:
class Decoder(nn.Module):

    def __init__(self, d_in, filters, num_classes):

        super(Decoder, self).__init__()

        self.decoder_blocks = []

        for f in filters:

            self.db = DecoderBlock(d_in, f)
            self.decoder_blocks.append(self.db)
            d_in = f
        
        self.output = nn.Conv2d(f, num_classes, 1, 1)
    
    def forward(self, inputs, activations):

        x = inputs
        for db, a in zip(self.decoder_blocks, activations):

            x = db(x, a)
        
        output = self.output(x)

        return output
        


### Decoder with Bilinear Interpolation

In [None]:
class SimpleDecoderBlock(nn.Module):

    def __init__(self, d_in, d_out):

        super().__init__()

        self.upconv = nn.UpsamplingBilinear2d(scale_factor=2)
        self.conv_1 = nn.Conv2d(d_in, d_out, 1, 1)
        self.relu = nn.ReLU()
        self.conv_2 = nn.Conv2d(d_out*2, d_out, 3, 1, "same")
        self.conv_3 = nn.Conv2d(d_out, d_out, 3, 1, "same")

    def forward(self, inp, a):

        x = self.upconv(inp)
        x = self.relu(self.conv_1(x))

        if a is not None:
            x = torch.cat([a, x], axis=1)
            x = self.relu(self.conv_2(x))
            
        x = self.relu(self.conv_3(x))

        return x


        

## Full U-Net

In [32]:
class UNet(nn.Module):

    def __init__(self, d_in, num_classes, filters):

        super(UNet, self).__init__()
        self.encoder = FullEncoder(d_in, filters[:-1])

        self.decoder = Decoder(d_in, filters[:-1][::-1], num_classes)
    
    def forward(self,inputs):

        x, activations = self.encoder(inputs)

        o = self.decoder(x, activations[::-1])

In [33]:
unet = UNet(3, 5, [64, 128, 256, 512, 1024])

## Training Loop 

## VGG U-NET

In [2]:
from torchvision.models import vgg19, VGG19_Weights

vgg19 = vgg19(weights=VGG19_Weights.DEFAULT)

In [3]:
vgg19_backbone = nn.Sequential(*(list(vgg19.children())[0][:-1]))

In [154]:
vgg19_backbone

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (17): ReLU(inplace=True)
  (18): MaxPoo

In [5]:
activations = []
def getActivations():
    def hook(model, input, output):
        activations.append(output)
    return hook

In [5]:
h1 = vgg19_backbone[3].register_forward_hook(getActivations())
h2 = vgg19_backbone[8].register_forward_hook(getActivations())
h3 = vgg19_backbone[17].register_forward_hook(getActivations())
h4 = vgg19_backbone[26].register_forward_hook(getActivations())


In [6]:
test_img = torch.zeros((1, 3, 512, 512))

In [3]:
activations

[]

In [8]:
out = vgg19_backbone(test_img)

In [9]:
out.shape

torch.Size([1, 512, 32, 32])

In [4]:
print(activations[0].shape)
print(activations[1].shape)
print(activations[2].shape)
print(activations[3].shape)

IndexError: list index out of range

In [None]:
class VGGUNet(nn.Module):

    def __init__(self, vgg_back, num_classes):

        super().__init__()
        self.activations = []
        self.vgg = vgg_back
        filters = [512, 256, 128, 64]
        self.decoder = Decoder(filters[-1], filters, num_classes)
    
    def getActivations(self):
        def hook(model, input, output):
            self.activations.append(output)
        return hook
    
    def forward(self, input):

        self.activations = []

        h1 = self.vgg[3].register_forward_hook(self.getActivations())
        h2 = self.vgg[8].register_forward_hook(self.getActivations())
        h3 = self.vgg[17].register_forward_hook(self.getActivations())
        h4 = self.vgg[26].register_forward_hook(self.getActivations())

        vgg_output = self.vgg(input)

        final_output = self.decoder(vgg_output, self.activations[::-1])

        h1.remove()
        h2.remove()
        h3.remove()
        h4.remove()

        return final_output


## ResUNet

In [81]:
from torchvision.models import resnet50, ResNet50_Weights

resnet_model = resnet50(weights=ResNet50_Weights.DEFAULT)

In [82]:
resnet_backbone = nn.Sequential(*(list(resnet_model.children())[0:7]))

In [83]:
test_img = torch.zeros((1, 3, 512, 512))

In [84]:
resnet_backbone

Sequential(
  (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace=True)
  (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (4): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)


In [85]:
resnet_backbone[4][2]

Bottleneck(
  (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
)

In [86]:
activations = []
def getActivations():
    def hook(model, input, output):
        activations.append(output)
    return hook

In [92]:
hr1 = resnet_backbone[2].register_forward_hook(getActivations())
hr2 = resnet_backbone[4][2].register_forward_hook(getActivations())
hr3 = resnet_backbone[5][-1].register_forward_hook(getActivations())

In [93]:
activations = []

In [94]:
out = resnet_backbone(test_img)

In [95]:
len(activations)

3

In [96]:
for a in activations:
    print(a.shape)

torch.Size([1, 64, 256, 256])
torch.Size([1, 256, 128, 128])
torch.Size([1, 512, 64, 64])


In [91]:
out.shape

torch.Size([1, 1024, 32, 32])

In [77]:
hr1.remove()
hr2.remove()
#hr3.remove()

In [None]:
class ResNetUNet(nn.Module):

    def __init__(self, num_classes):

        super().__init__()

        self.activations = [None]
        resnet_model = resnet50(weights=ResNet50_Weights.DEFAULT)

        self.resnet_backbone = nn.Sequential(*(list(resnet_model.children())[0:7]))
        for param in self.resnet_backbone.parameters():
            param.requires_grad = False

        filters = [512, 256, 128, 64]
        self.decoder = Decoder(1024, filters, num_classes)
    
    def getActivations(self):
        def hook(model, input, output):
            self.activations.append(output)
        return hook
    
    def forward(self, input):

        self.activations = [None]

        hr1 = self.resnet_backbone[2].register_forward_hook(self.getActivations())
        hr2 = self.resnet_backbone[4][2].relu.register_forward_hook(self.getActivations())
        hr3 = self.resnet_backbone[5][-1].relu.register_forward_hook(self.getActivations())

        resnet_output = self.resnet_backbone(input)

        final_output = self.decoder(resnet_output, self.activations[::-1])

        hr1.remove()
        hr2.remove()
        hr3.remove()

        return final_output
        

## Efficient Net

In [17]:
from torchsummary import summary

In [2]:
from torchvision.models import efficientnet_b4, EfficientNet_B4_Weights

effnet_b4 = efficientnet_b4(weights=EfficientNet_B4_Weights)



In [9]:
effnet_b4

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
            (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActiv

In [18]:
effnet_b4_backbone = nn.Sequential(*(list(effnet_b4.children())[:2]))

In [19]:
summary(effnet_b4_backbone, (3,512,512))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 48, 256, 256]           1,296
       BatchNorm2d-2         [-1, 48, 256, 256]              96
              SiLU-3         [-1, 48, 256, 256]               0
            Conv2d-4         [-1, 48, 256, 256]             432
       BatchNorm2d-5         [-1, 48, 256, 256]              96
              SiLU-6         [-1, 48, 256, 256]               0
 AdaptiveAvgPool2d-7             [-1, 48, 1, 1]               0
            Conv2d-8             [-1, 12, 1, 1]             588
              SiLU-9             [-1, 12, 1, 1]               0
           Conv2d-10             [-1, 48, 1, 1]             624
          Sigmoid-11             [-1, 48, 1, 1]               0
SqueezeExcitation-12         [-1, 48, 256, 256]               0
           Conv2d-13         [-1, 24, 256, 256]           1,152
      BatchNorm2d-14         [-1, 24, 2

In [18]:
e1 = effnet_b4_backbone[0].register_forward_hook(getActivations())
e2 = effnet_b4_backbone[2][-1].register_forward_hook(getActivations())
e3 = effnet_b4_backbone[3][-1].register_forward_hook(getActivations())
e4 = effnet_b4_backbone[5][-1].register_forward_hook(getActivations())
e5 = effnet_b4_backbone[7][-1].register_forward_hook(getActivations())


# e1.remove()
# e2.remove()
# e3.remove()
# e4.remove()
#e5.remove()

In [19]:
test_img = torch.zeros((1, 3, 512, 512))

In [20]:
activations = []

In [21]:
o = effnet_b4_backbone(test_img)

In [22]:
o.shape

torch.Size([1, 1792, 16, 16])

In [23]:
len(activations)

5

In [16]:
print(activations[0].shape)
print(activations[1].shape)
print(activations[2].shape)
print(activations[3].shape)
print(activations[4].shape)

torch.Size([1, 48, 256, 256])
torch.Size([1, 32, 128, 128])
torch.Size([1, 56, 64, 64])
torch.Size([1, 160, 32, 32])
torch.Size([1, 448, 16, 16])


In [None]:
class EfficientNetUNet(nn.Module):

    def __init__(self, num_classes):

        super().__init__()
        self.activations = [None]

        effnet_b4 = efficientnet_b4(weights=EfficientNet_B4_Weights)
        self.effnet_b4_backbone = nn.Sequential(*(list(effnet_b4.children())[0]))
        for param in self.effnet_b4_backbone.parameters():
            param.requires_grad = False
        
        filters = [160, 56, 32, 48, 64]
        self.decoder = Decoder(filters[0], filters, num_classes)
    
    def getActivations(self):
        def hook(model, input, output):
            self.activations.append(output)
        return hook
    
    def forward(self, input):

        self.activations = [None]

        e1 = self.effnet_b4_backbone[0].register_forward_hook(self.getActivations())
        e2 = self.effnet_b4_backbone[2][-1].register_forward_hook(self.getActivations())
        e3 = self.effnet_b4_backbone[3][-1].register_forward_hook(self.getActivations())
        e4 = self.effnet_b4_backbone[5][-1].register_forward_hook(self.getActivations())
        e5 = self.effnet_b4_backbone[7][-1].register_forward_hook(self.getActivations())

        _ = self.effnet_b4_backbone(input)
        effnet_output = self.activations.pop()

        final_output = self.decoder(effnet_output, self.activations[::-1])

        e1.remove()
        e2.remove()
        e3.remove()
        e4.remove()
        e5.remove()

        return final_output

        
        

        

## Pretrained U-Net Testing

In [169]:
net = torch.hub.load('milesial/Pytorch-UNet', 'unet_carvana', pretrained=True, scale=0.5)

Using cache found in /Users/akhtar/.cache/torch/hub/milesial_Pytorch-UNet_master


TypeError: unet_carvana() got an unexpected keyword argument 'map_location'

## Inception U-Net

In [97]:
from torchvision.models import inception_v3, Inception_V3_Weights

In [98]:
inception = inception_v3(weights=Inception_V3_Weights)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /Users/akhtar/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:05<00:00, 20.7MB/s] 


In [99]:
inception

Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80, 192, kernel_size=(3, 3), stri