# EE - UY 4563: Intro to Machine Learning ~> Seg_net

#### Kyle Ong
#### Rohan Chakraborty

Current Flavors of image segmentation networks such as RCNN require complicated architectures including resnet backbones and feature pyramids. We propse a lightweight architecture for image segmentation consisting of a convolution fed into hebian weights. 

### Seg_net model.

![alt text](https://saytosid.github.io/images/segnet/Complete%20architecture.png)

#### Here we import the necessary libraries.

torch ~> python deep learning library  

torch.nn ~>  the base class for all neural network modules

numpy ~> scientific computing with python 

In [None]:
import torch
import torch.nn as nn
import os
import numpy as np
import torchvision.utils as vutils
import math
import torch.nn.functional as Funct

### down_block       -        a single encoder block of the Segnet architecture

`down_block` class implements the convulutional, batch normalisation and ReLU encoder layers. The class initializes either 2 or 3 convolutional layers. `forward` function implements the data-flow through a single encoder.

```python

class down_block(nmModule):
''''
    decoder of seg_net consists of convolution, batch normalisation,maxpool,  ReLu 
    convolution performs a (3,3) kernel with stride of 1
    maxpool perfroms a (2,2) kernel with a stride of  2
''''

    def __init__(self):
    '''
        intialize convolutional layers
    '''
    
    def forward(self, x, indices, size):
    '''
        perform a single pass through an encoder
       
        returns a tensor with decreased dimensions, the maxpool indices, and size
    '''

```

In [34]:
class down_block(nn.Module):
    #using the input channels I specify the channels for repeated use of this block
    def __init__(self, channels, num_of_convs = 2):
        super(down_block,self).__init__()

        self.num_of_convs = num_of_convs

        # Declare operations with learning features
        self.conv1 = nn.Conv2d(channels[0], channels[1], kernel_size=(3,3),stride=1,padding=0,dilation=0,bias=True)
        self.batchnorm1 = nn.BatchNorm2d(channels[1])
        self.conv2 = nn.Conv2d(channels[1], channels[1], kernel_size=(3,3),stride=1,padding=0,dilation=0,bias=True)
        self.batchnorm2 = nn.BatchNorm2d(channels[1])
        if(num_of_convs == 3):
            self.conv3 = nn.Conv2d(channels[1], channels[1], kernel_size=(3,3),stride=1,padding=0,dilation=0,bias=True)
            self.batchnorm3 = nn.BatchNorm2d(channels[1])

        # Declare operations without learning features
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=(2,2), stride=2, return_indices = True)
        
        # Initialize Kernel weights for the encoder section with vgg weights
        # this will be done on another python file after an instance of the model network is created
                
    #forward function through the block
    def forward(self, x):
        input_size = x.size()
        
        fwd_map = self.conv1(x)
        fwd_map = self.batchnorm1(fwd_map)
        self.relu(fwd_map)

        fwd_map = self.conv2(fwd_map)
        fwd_map = self.batchnorm2(fwd_map)
        self.relu(fwd_map)

        if(self.num_of_convs == 3):
            fwd_map = self.conv3(fwd_map)
            fwd_map = self.batchnorm3(fwd_map)
            self.relu(fwd_map)

        #Saving the tensor and for unpooling tensor size & indeces to map it to the layers deeper in the model
        output_size = fwd_map.size()
        fwd_map, indices = self.maxpool(fwd_map)
        
        size = {"input_size": input_size, "b4max": output_size}
        return (fwd_map, indices, size)


###  up_block       -        a single decoder block of the Segnet architecture

`up_block` class implements convolution, batch normalisation, ReLu, and softmax. The class initializes either 2 or 3 convultional layers.  `forward` implements the data-flow through a single decoder.

```python

class up_block(nmModule):
''''
    decoder of seg_net consists of convolution, batch normalisation, ReLu and softmax
    convolution performs a (3,3) kernel with stride of 1
''''

    def __init__(self):
    '''
        intialize convolutional layers
    '''
    
    def forward(self, x, indices, size):
    '''
        perform a single pass through  a decoder
             
        returns a tensor with increased dimensions
        
    '''

```

In [32]:
class up_block(nn.Module):

    def __init__(self,channels,num_of_convs = 2):
        super(up_block,self).__init__()
        
        self.num_of_convs = num_of_convs
        
        self.unpooled = nn.MaxUnpool2d(kernel_size=(2,2) , stride=2)
        self.upsample = nn.upsample(mode="bilinear")

        self.conv1 = nn.Conv2d(channels[0], channels[1], kernel_size=(3,3), stride=1, padding=0, dilation=0, bias=True)
        self.batchnorm1 = nn.BatchNorm2d(channels[1])
        
        if(num_of_convs== 2):
            self.conv2 = nn.Conv2d(channels[1], channels[1], kernel_size=(3,3), stride=1, padding=0, dilation=0, bias=True)
        elif(num_of_convs == 3):
            self.conv2 = nn.Conv2d(channels[1], channels[1], kernel_size=(3,3), stride=1, padding=0, dilation=0, bias=True)
            self.batchnorm2 = nn.BatchNorm2d(channels[1])
            self.conv3 = nn.Conv2d(channels[1], channels[1], kernel_size=(3,3), stride=1, padding=0, dilation=0, bias=True)
        
        self.batchnorm_for_last_conv = nn.BatchNorm2d(channels[1])

        self.relu = nn.ReLU(inplace=True)
        
        
        # Initialize Kernel weights for the decoder section with normally distributed weights
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))

                
    #forward function through the block
    def forward(self, x, indices, size):

        #print("Before upsampling: ", x.size())
        fwd_map = self.unpooled(x, indices, output_size=size)
        fwd_map = self.upsample(fwd_map)
        
        fwd_map = self.conv1(fwd_map)
        fwd_map = self.batchnorm1(fwd_map)
        self.relu(fwd_map)
        
        if(self.num_of_convs == 2):
            fwd_map = self.conv2(fwd_map)
            fwd_map = self.batchnorm_for_last_conv(fwd_map)
            self.relu(fwd_map)

        elif(self.num_of_convs == 3):
            fwd_map = self.conv2(fwd_map)
            fwd_map = self.batchnorm2(fwd_map)
            self.relu(fwd_map)

            fwd_map = self.conv3(fwd_map)
            fwd_map = self.batchnorm_for_last_conv(fwd_map)
            self.relu(fwd_map)

        #print("down block after convs: ", fwd_map.size())
        
        return fwd_map

### network

`network` class implements the convolutional network with up_blocks and down_blocks. The class intializes ten layers. The first ten layers are `down_blocks` and the rest are `up_blocks`. `Forward` function implements a single pass through seg_net

```python

class network(nmModule):

    def __init__(self):
    '''
        intialize the layers of the network
    '''
    
    def forward(self,x):
    '''
        perform a single pass through the network
        
        returns the output of softmax with k-channels
        where each channel represents a probability distribution of the corresponding label
        
    '''

```

In [33]:
class network(nn.Module):

    def __init__(self, num_classes):
        super(network,self).__init__()
        self.layer1 = down_block((3,64), 2)              
        self.layer2 = down_block((64,128), 2)
        self.layer3 = down_block((128,256), 3)
        self.layer4 = down_block((256,512), 3)
        self.layer5 = down_block((512,1024), 3)
        
        #self.layer6 = up_block((inp,curr,next), 3)
        self.layer6 = up_block((512,1024), 3)
        self.layer7 = up_block((512,256), 3)
        self.layer8 = up_block((256,128), 3)
        self.layer9 = up_block((128,64), 2)
        self.layer10 = up_block((64,1), 2)
        
        self.conv1x1 = nn.Conv2d(64, 35, kernel_size=(1,1), stride = 1, padding=0, dilation=0, bias=False)
        self.softmax = nn.Softmax(dim   = 2 )

    def forward(self,x):

        #print("\nLayer1...")
        out1, indices1, size1= self.layer1(x)
        #print("in forward ", Funct.softmax(out1).size())
        #print("\nLayer2...")
        out2, indices2, size2 = self.layer2(out1)
        #print("\nLayer3...")
        out3, indices3, size3= self.layer3(out2)
        #print("\nLayer4...")
        out4, indices4,size4 = self.layer4(out3)
        #print("\nLayer5...")
        out5, indices5, size5 = self.layer5(out4)

        #print("\nLayer6...")
        out6 = self.layer6(out5, indices5, size5['b4max'])
        #print("\nLayer7...")
        out7 = self.layer7(out6, indices4, size4['b4max'])
        #print("\nLayer8...")
        out8 = self.layer8(out7, indices3, size3['b4max'])
        #print("\nLayer9...")
        out9 = self.layer9(out8, indices2, size2['b4max'])
        #print("\nLayer10...")
        out10 = self.layer10(out9, indices1, size1['b4max'])
        
        print("\nconv1x1")
        out_conv1x1 = self.conv1x1(out10)
        
        #print(out10)
        #print("size of out10:", out10.size())
        #print("\nSoftmax Layer...")
        #res = Funct.softmax(out10)
        
        
        res = self.softmax(out10)

        return res