In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from PIL import Image
from torchsummary import summary
from torch.nn.parameter import Parameter
import random

In [None]:
input_dim = 250
channels = 4
batch_size = 10
epochs = 10
device = torch.device('cpu')

## Dynamic Convolution Layer
![Alt text](<image/Screenshot 2023-09-08 at 1.02.46 PM.png>)

In [None]:
class dynamic_kernel1d(nn.Module):

    def __init__(self, input_channels, output_channels, device):
        super(dynamic_kernel1d, self).__init__()
        self.device = device
        self.out_c = output_channels
        self.conv1 = nn.Conv2d(input_channels, 32,
                               (7, 7), 1, padding='valid')
        self.conv2 = nn.Conv2d(32, 32, (7, 7), 1, padding='valid')
        self.max_pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.soft = nn.Softmax(dim=0)

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.max_pooling(x)
        x = self.conv2(x)
        x = self.max_pooling(x)
        x = self.conv2(x)
        x = self.max_pooling(x)

        x = x.view(-1)
        shape = x.size(0)
        v1 = nn.Linear(shape, self.out_c, device=self.device)(x)
        h1 = nn.Linear(shape, self.out_c, device=self.device)(x)
        v1 = self.soft(v1)
        h1 = self.soft(h1)

        v1 = v1.unsqueeze(1)  # output x 1
        h1 = h1.unsqueeze(0)  # 1 x output

        return v1, h1

## Dynamic Filter network
![Alt text](<image/Screenshot 2023-09-09 at 11.10.45 AM.png>)


![Alt text](<image/Screenshot 2023-09-08 at 7.48.12 PM.png>)

In [None]:
class dynamic_kernel2d(nn.Module):

    def __init__(self, input_channels, filters, kernel_size, device):
        super(dynamic_kernel2d, self).__init__()
        self.device = device
        self.filters = filters
        self.input_channels = input_channels
        self.kernel_size = kernel_size[0]
        self.K = 1

        self.conv1 = nn.Conv2d(input_channels, input_channels, kernel_size=(7, 7), padding='valid', stride=1,
                               groups=input_channels)
        self.point1 = nn.Conv2d(input_channels, 64, kernel_size=(1, 1), padding='same', stride=1,
                                groups=1)
        self.conv2 = nn.Conv2d(64, self.K*64, kernel_size=(7, 7), padding='valid', stride=1,
                               groups=64)
        self.point2 = nn.Conv2d(self.K*64, 64, kernel_size=(1, 1), padding='same', stride=1,
                                groups=1)

        self.max_pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.soft = nn.Softmax(dim=0)

    def linear_layer(self, input, input_shape, output_shape):
        return nn.Linear(input_shape, output_shape, device=self.device)(input)

    def forward(self, inputs):

        x = self.conv1(inputs)
        x = self.point1(x)
        x = self.max_pooling(x)
        x = self.conv2(x)
        x = self.point2(x)
        x = self.max_pooling(x)
        x = self.conv2(x)
        x = self.point2(x)
        x = self.max_pooling(x)

        # kernel (filters, input_channels, kernel_size, kernel_size)
        output_features = self.filters * self.input_channels * \
            self.kernel_size * self.kernel_size
        x = x.view(-1)  # Flatten max pooling layer
        input_features = x.size(0)  # get flatten size
        # fully connected to get desired kernel size
        v1 = self.linear_layer(x, input_features, output_features)
        # reshape into desired kernel size
        v1 = v1.view(self.filters, self.input_channels,
                     self.kernel_size, self.kernel_size)

        return v1

In [None]:
class dynamic_filter2d(nn.Module):

    def __init__(self, input_channels, kernel_size, filters, device):
        super(dynamic_filter2d, self).__init__()
        self.device = device
        self.filters = filters
        self.kernel_size = kernel_size[0]
        self.input_channels = input_channels
        self.K = 1

        self.conv1 = nn.Conv2d(input_channels, input_channels, kernel_size=(7, 7), padding='valid', stride=1,
                               groups=input_channels)
        self.point1 = nn.Conv2d(input_channels, 64, kernel_size=(1, 1), padding='same', stride=1,
                                groups=1)
        self.conv2 = nn.Conv2d(64, self.K*64, kernel_size=(7, 7), padding='valid', stride=1,
                               groups=64)
        self.point2 = nn.Conv2d(self.K*64, 64, kernel_size=(1, 1), padding='same', stride=1,
                                groups=1)

        self.max_pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
        self.soft = nn.Softmax(dim=0)

    def linear_layer(self, input, input_shape, output_shape):
        return nn.Linear(input_shape, output_shape, device=self.device)(input)

    def expanding_conv(self, inputs, h, w):
        k = h * w * self.filters
        return nn.Conv2d(self.input_channels, self.input_channels*k, kernel_size=(1, 1), padding='same', stride=1,
                         groups=self.input_channels)(inputs)

    def forward(self, inputs):
        h = inputs.size(2)
        w = inputs.size(3)

        if True:
            # a 2d filter for each pixel of input image for transformation
            x = self.conv1(inputs)
            x = self.point1(x)
            x = self.max_pooling(x)
            x = self.conv2(x)
            x = self.point2(x)
            x = self.max_pooling(x)
            x = self.conv2(x)
            x = self.point2(x)
            x = self.max_pooling(x)
            x = self.conv2(x)
            x = self.point2(x)
            x = self.max_pooling(x)

            # kernel (input_height, input_width, input_filters, input_channels, kernel_size, kernel_size)
            output_features = self.input_channels * self.kernel_size * self.kernel_size

            x = x.view(-1)  # Flatten max pooling layer
            input_features = x.size(0)  # get flatten size

            # fully connected to get desired kernel size
            v1 = self.linear_layer(x, input_features, output_features)
            # Reshape to input into conv layer
            v1 = v1.view(1, self.input_channels,
                         self.kernel_size, self.kernel_size)

            # Conv layer to add depth to the kernel to reshape it into desired filter size
            v1 = self.expanding_conv(v1, h, w)
            # reshape into desired kernel size
            v1 = v1.view(h, w, self.filters, self.input_channels,
                         self.kernel_size, self.kernel_size)

            return v1

In [None]:
class dynamic_conv2d(nn.Module):
    def __init__(self, in_channels, out_channels, filter_type, kernel_size, padding, device,
                 bias: bool = True, dtype=torch.float32) -> None:
        super(dynamic_conv2d, self).__init__()
        factory_kwargs = {'device': device, 'dtype': dtype}
        self.filter_type = filter_type
        if bias:
            self.bias = Parameter(torch.empty(
                out_channels, **factory_kwargs), requires_grad=True)

        assert padding in ['valid', 'same'], "Either 'same' or 'valid' "
        self.padding = padding

        assert filter_type in [
            '2d_filter', 'dynamic_filter'], "Type must be one of : \n1) 2d_filter \n2) dynamic_filter"

        if self.filter_type == '2d_filter':
            self.filter_network = dynamic_kernel2d(
                in_channels, out_channels, kernel_size, device)
        else:
            self.filter_network = dynamic_filter2d(
                in_channels, out_channels, kernel_size, device)

    def forward(self, inputs):
        if self.filter_type == '2d_filter':
            kernel = self.filter_network(inputs)
        else:
            kernel = self.filter_network(inputs)

        return F.conv2d(inputs, kernel, self.bias, padding=self.padding)

In [None]:
inp = torch.rand((1, 3, 250, 250))
inp = inp.to(device)

In [None]:
i = '2d_filter'  # 2d_filter or dynamic_filter
model = dynamic_conv2d(3, 3, i, (3, 3), padding='same', device='cpu')

In [None]:
summary(model, (3, 250, 250), batch_size=1, device="cpu")

In [None]:
# Read input image
img = Image.open('IMG_4392.JPG')

# convert the input image to torch tensor
img = transforms.ToTensor()(img)
print("Input image size:", img.size())  # size = [3, 466, 700]

# unsqueeze the image to make it 4D tensor
img = img.unsqueeze(0)  # image size = [1, 3, 466, 700]
# define convolution layer
# conv = nn.Conv2d(in_channels, out_channels, kernel_size)

img = model(img)
print(img.shape)
# squeeze image to make it 3D
img = img.squeeze(0)  # now size is again [3, 466, 700]

# convert image to PIL image
img = transforms.ToPILImage()(img)

# display the image after convolution
img.show()

## Squeeze and Excitation (Channel Attention)
![Alt text](<image/Screenshot 2023-09-10 at 1.19.37 PM.png>)


![Alt text](<image/Screenshot 2023-09-10 at 1.20.10 PM.png>)

In [None]:
class SqueezeAndExcitation(nn.Module):
    def __init__(self, device, ratio=8):
        super().__init__()
        self.ratio = ratio
        self.device = device

    def forward(self, inputs):
        b = inputs.size(0)
        c = inputs.size(1)
        x = F.adaptive_avg_pool2d(inputs, 1)  # (batch_size, channels)
        # device=self.device
        x = nn.Linear(c, c // self.ratio, device=self.device)(x.view(b, c))
        x = F.relu(x)
        # device=self.device
        x = nn.Linear(c // self.ratio, c, device=self.device)(x)
        x = torch.sigmoid(x)
        x = x.view(b, c, 1, 1)
        x = inputs * x  # same size as input
        return x

## changing weights and Convolution
![Alt text](<image/Screenshot 2023-09-12 at 11.55.12 AM.png>) 

![Alt text](<image/Screenshot 2023-09-12 at 11.55.17 AM.png>)

In [None]:
class dynamic_attention(nn.Module):
    def __init__(self, device, n_conv, ratio=8):
        super().__init__()
        self.ratio = ratio
        self.device = device
        self.n_conv = n_conv

    def forward(self, inputs):
        b = inputs.size(0)  # batch,channel,h,w
        x = inputs.view(b, -1)
        channels = x.size(1)
        # x = F.adaptive_avg_pool2d(inputs, 1)  # (batch_size, channels, 1, 1)
        # device=self.device
        x = nn.Linear(channels, channels // self.ratio, device=self.device)(x)
        x = F.relu(x)
        # device=self.device
        x = nn.Linear(channels // self.ratio,
                      self.n_conv, device=self.device)(x)
        x = torch.softmax(x, dim=1)

        return x

In [None]:
class Test1(nn.Module):
    def __init__(self, seed):
        super(Test1, self).__init__()
        # fix the seed
        random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        # init conv

        # Assuming these convs are expert filter and are not changing
        self.conv1 = nn.Conv2d(3, 3, kernel_size=3, padding=1, bias=True)
        self.conv2 = nn.Conv2d(3, 3, kernel_size=3, padding=1, bias=True)
        self.conv3 = nn.Conv2d(3, 3, kernel_size=3, padding=1, bias=True)
        self.channel_attention = dynamic_attention('cpu', n_conv=3, ratio=1)

    def apply_attention(self, attention, weights):
        weights *= attention
        return weights

    def forward(self, x):
        attention = self.channel_attention(x)
        # print(f"attention value for each conv :{attention} ")

        weight1 = nn.Parameter(self.apply_attention(
            attention[0][0], self.conv1.weight.clone()), requires_grad=True)
        weight2 = nn.Parameter(self.apply_attention(
            attention[0][1], self.conv2.weight.clone()), requires_grad=True)
        weight3 = nn.Parameter(self.apply_attention(
            attention[0][2], self.conv3.weight.clone()), requires_grad=True)
        # print(f"weight 1 : {weight1} \n weight 2 : {weight2} \n weight 1 : {weight3}")

        bias1 = nn.Parameter(self.apply_attention(
            attention[0][0], self.conv1.bias.clone()), requires_grad=True)
        bias2 = nn.Parameter(self.apply_attention(
            attention[0][1], self.conv2.bias.clone()), requires_grad=True)
        bias3 = nn.Parameter(self.apply_attention(
            attention[0][2], self.conv3.bias.clone()), requires_grad=True)

        weight = weight1+weight2+weight3
        bias = bias1 + bias2 + bias3
        # print(f"Final weight : {weight}")
        # print(f"Final bias : {bias}")

        x = F.conv2d(x, weight, bias=bias, stride=1, padding='same')
        x = F.relu(x)

        # print(f"out1 weights :{weight} ")

        return x

In [None]:
input = torch.rand([1, 3, 128, 128])
model = Test1(123)

In [None]:
output = model(input)

In [None]:
targets = torch.zeros([1, 1, 3, 3])

In [None]:
metrics = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:

# Shape of scores = (batch,classes, height,widht)
loss = metrics(output, targets)
# Shape of targets = (batch, height,widht)
# calculate to show at final time
print(loss)
# CAlculate gradient of the loss wrt the parameters
# set optimizer's gradients to zero for every batch initially
optimizer.zero_grad()
loss.backward()

# gradient descent
optimizer.step()

## Residual Spatial Attention
![Alt text](<image/Screenshot 2023-09-16 at 8.51.03 PM.png>)

In [None]:
class soft_mask(nn.Module):
    def __init__(self, in_channels) -> None:
        super(soft_mask, self).__init__()

        self.down = nn.Conv2d(in_channels, in_channels, (3, 3), 2, padding=1)
        self.upsample = nn.Upsample(scale_factor=2, mode="bilinear")
        self.conv_final = nn.Conv2d(in_channels, 1, (1, 1), 1, padding="same")

    def forward(self, inputs):
        x = self.down(inputs)  # half the size
        x = self.down(x)  # half the size

        x = self.upsample(x)
        x = self.upsample(x)

        x = self.conv_final(x)
        x = F.sigmoid(x)
        x = x + 1

        return x

In [None]:
class spatial_attention(nn.Module):
    def __init__(self, in_channels) -> None:
        super(spatial_attention, self).__init__()

        self.soft_mask = soft_mask(in_channels)
        self.down = nn.Conv2d(in_channels, in_channels,
                              (3, 3), 1, padding="same")

    def forward(self, inputs):
        x = self.down(inputs)
        attention = self.soft_mask(inputs)
        x = x * attention
        return x

In [None]:
a = torch.rand((2, 64, 128, 128))
model = spatial_attention(64)

In [None]:
y = model(a)
y.shape

## BAM & CBAM - Bottleneck Attention (Spatial and Channel)
![Alt text](<image/Screenshot 2023-09-17 at 4.31.03 PM.png>)

In [None]:
class BAM_channel(nn.Module):
    def __init__(self, device, ratio=8):
        super(BAM_channel, self).__init__()
        self.ratio = ratio
        self.device = device

    def forward(self, inputs):
        b = inputs.size(0)
        c = inputs.size(1)
        x = F.adaptive_avg_pool2d(inputs, 1)  # (batch_size, channels)
        # device=self.device
        x = nn.Linear(c, c // self.ratio, device=self.device)(x.view(b, c))
        x = F.relu(x)
        # device=self.device
        x = nn.Linear(c // self.ratio, c, device=self.device)(x)
        x = F.sigmoid(x)
        x = x.view(b, c, 1, 1)
        return x

In [None]:
class BAM_spatial(nn.Module):
    def __init__(self, in_channels, ratio=8) -> None:
        super(BAM_spatial, self).__init__()

        self.conv1 = nn.Conv2d(in_channels, in_channels //
                               ratio, (1, 1), 1, padding="same")
        self.conv2 = nn.Conv2d(in_channels // ratio, in_channels //
                               ratio, (3, 3), 1, padding="same", dilation=2)
        self.final = nn.Conv2d(in_channels // ratio, 1,
                               (1, 1), 1, padding="same")

    def forward(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.conv2(x)
        x = self.conv2(x)
        x = self.final(x)
        x = F.sigmoid(x)

        return x

In [None]:
class BAM(nn.Module):
    def __init__(self, in_channels, ratio, device) -> None:
        super(BAM, self).__init__()

        self.BAM_channel = BAM_channel(device, ratio)
        self.BAM_spatial = BAM_spatial(in_channels, ratio)
        self.batch_norm_channel = nn.BatchNorm2d(in_channels)
        self.batch_norm_spatial = nn.BatchNorm2d(1)

    def forward(self, inputs):
        channel = self.BAM_channel(inputs)
        channel = self.batch_norm_channel(channel)
        spatial = self.BAM_spatial(inputs)
        spatial = self.batch_norm_spatial(spatial)

        bam = spatial + channel
        bam = F.sigmoid(bam)

        inter = bam * inputs

        final = inputs + inter

        return final

In [None]:
# Sequential BAM
class CBAM(nn.Module):
    def __init__(self, in_channels, ratio, device) -> None:
        super(CBAM, self).__init__()

        self.BAM_channel = BAM_channel(device, ratio)
        self.BAM_spatial = BAM_spatial(in_channels, ratio)
        self.batch_norm_channel = nn.BatchNorm2d(in_channels)
        self.batch_norm_spatial = nn.BatchNorm2d(1)

    def forward(self, inputs):
        x = self.BAM_channel(inputs)
        x = self.batch_norm_channel(x)

        x = inputs * x

        spatial = self.BAM_spatial(x)
        spatial = self.batch_norm_spatial(spatial)

        x = spatial * x

        return x

In [None]:
inp = torch.rand((2, 64, 56, 56))
model = CBAM(64, 8, 'cpu')

In [None]:
y = model(inp)

## Extras

In [None]:
# class dynamic_kernel2d(nn.Module):

#     def __init__(self, output_type, height, width, input_channels, kernel_size,
#                  filters, device):
#         super(dynamic_kernel2d, self).__init__()
#         self.device = device
#         self.output_type = output_type
#         self.filters = filters
#         self.kernel_size = kernel_size[0]
#         self.input_channels = input_channels
#         self.k = width * height * self.filters

#         assert output_type in [
#             '2d_filter', 'dynamic_filter'], "Type must be one of : \n1) 2d_filter \n2) dynamic_filter"

#         self.conv1 = nn.Conv2d(input_channels, input_channels, kernel_size=(7, 7), padding='valid', stride=1,
#                                groups=input_channels)
#         self.point1 = nn.Conv2d(input_channels, 64, kernel_size=(1, 1), padding='same', stride=1,
#                                 groups=1)
#         self.conv2 = nn.Conv2d(64, 64, kernel_size=(7, 7), padding='valid', stride=1,
#                                groups=64)
#         self.point2 = nn.Conv2d(64, 64, kernel_size=(1, 1), padding='same', stride=1,
#                                 groups=1)
#         self.conv3 = nn.Conv2d(input_channels, input_channels*self.k, kernel_size=(1, 1), padding='same', stride=1,
#                                groups=input_channels)

#         self.max_pooling = nn.MaxPool2d(kernel_size=(2, 2), stride=2)
#         self.soft = nn.Softmax(dim=0)

#     def linear_layer(self,input,input_shape, output_shape):
#         return nn.Linear(input_shape, output_shape, device=self.device)(input)

#     def forward(self, inputs):
#         h = inputs.size(2)
#         w = inputs.size(3)

#         if (self.output_type == '2d_filter'):
#             # a 2d filter capable to creating transformation
#             x = self.conv1(inputs)
#             x = self.point1(x)
#             x = self.max_pooling(x)
#             x = self.conv2(x)
#             x = self.point2(x)
#             x = self.max_pooling(x)
#             x = self.conv2(x)
#             x = self.point2(x)
#             x = self.max_pooling(x)

#             # kernel (filters, input_channels, kernel_size, kernel_size)
#             output_features = self.filters * self.input_channels * \
#                 self.kernel_size * self.kernel_size
#             x = x.view(-1)  # Flatten max pooling layer
#             input_features = x.size(0)  # get flatten size
#             v1 = self.linear_layer(x,input_features, output_features)  # fully connected to get desired kernel size
#             # reshape into desired kernel size
#             v1 = v1.view(self.filters, self.input_channels,
#                          self.kernel_size, self.kernel_size)

#             return v1

#         elif (self.output_type == 'dynamic_filter'):
#             # a 2d filter for each pixel of input image for transformation
#             x = self.conv1(inputs)
#             x = self.point1(x)
#             x = self.max_pooling(x)
#             x = self.conv2(x)
#             x = self.point2(x)
#             x = self.max_pooling(x)
#             x = self.conv2(x)
#             x = self.point2(x)
#             x = self.max_pooling(x)
#             x = self.conv2(x)
#             x = self.point2(x)
#             x = self.max_pooling(x)

#             # kernel (input_height, input_width, input_filters, input_channels, kernel_size, kernel_size)
#             output_features = self.input_channels * self.kernel_size * self.kernel_size

#             x = x.view(-1)  # Flatten max pooling layer
#             input_features = x.size(0)  # get flatten size

#             # fully connected to get desired kernel size
#             v1 = self.linear_layer(x,input_features, output_features)
#             # Reshape to input into conv layer
#             v1 = v1.view(1, self.input_channels,
#                          self.kernel_size, self.kernel_size)

#             # Conv layer to add depth to the kernel to reshape it into desired filter size
#             v1 = self.conv3(v1)
#             # reshape into desired kernel size
#             v1 = v1.view(h, w, self.filters, self.input_channels,
#                          self.kernel_size, self.kernel_size)

#             return v1

In [None]:
# class dynamic_conv2d(nn.Module):
#     def __init__(self, filter, padding, device, bias: bool = True) -> None:
#         super(dynamic_conv2d, self).__init__()
#         factory_kwargs = {'device': device, 'dtype': torch.float32}
#         self.filter = filter
#         self.out_channels = filter.size(0)
#         if bias:
#             self.bias = Parameter(torch.empty(
#                 self.out_channels, **factory_kwargs))
#         self.padding = padding
#         self.device = device

#         assert padding in ['valid', 'same'], "Either 'same' or 'valid' "

#     def forward(self, inputs):
#         return F.conv2d(inputs, self.filter, self.bias, padding=self.padding)