In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class MyResidualBlock(nn.Module):
    def __init__(self, downsample):
        super(MyResidualBlock,self).__init__()
        self.downsample = downsample
        self.stride = 2 if self.downsample else 1
        K = 9
        P = (K-1)//2
        self.conv1 = nn.Conv2d(in_channels=256,
                               out_channels=256,
                               kernel_size=(1,K),
                               stride=(1,self.stride),
                               padding=(0,P),
                               bias=False)
        self.bn1 = nn.BatchNorm2d(256)

        self.conv2 = nn.Conv2d(in_channels=256,
                               out_channels=256,
                               kernel_size=(1,K),
                               padding=(0,P),
                               bias=False)
        self.bn2 = nn.BatchNorm2d(256)

        if self.downsample:
            self.idfunc_0 = nn.AvgPool2d(kernel_size=(1,2),stride=(1,2))
            self.idfunc_1 = nn.Conv2d(in_channels=256,
                                      out_channels=256,
                                      kernel_size=(1,1),
                                      bias=False)

    def forward(self, x):
        identity = x
        x = F.leaky_relu(self.bn1(self.conv1(x)))
        x = F.leaky_relu(self.bn2(self.conv2(x)))
        if self.downsample:
            identity = self.idfunc_0(identity)
            identity = self.idfunc_1(identity)

        x = x+identity
        return x

class NN(nn.Module): 
    def __init__(self, embedding_dim):
        super().__init__()
        self.conv = nn.Conv2d(in_channels = 12,
                              out_channels = 256,
                              kernel_size = (1, 5),
                              padding = (0, 2),
                              stride = (1, 2),
                              bias = False)
        
        self.bn = nn.BatchNorm2d(256)
        self.rb_0 = MyResidualBlock(downsample=True)
        self.rb_1 = MyResidualBlock(downsample=True)
        self.rb_2 = MyResidualBlock(downsample=True)
        self.rb_3 = MyResidualBlock(downsample=True)

        self.rb_4 = MyResidualBlock(downsample=False)
    
        self.pool = nn.AdaptiveMaxPool1d(output_size=1)

        self.fc_1 = nn.Linear(256, embedding_dim)


    def forward(self, x):
        x = F.leaky_relu(self.bn(self.conv(x[:, :, None, :])))

        x = self.rb_0(x)
        x = self.rb_1(x)
        x = self.rb_2(x)
        x = self.rb_3(x)

        x = F.dropout(x,p=0.5, training=self.training)

        x = self.rb_4(x)      

        x = x.squeeze(2)
        x = self.pool(x).squeeze(2)

        x = self.fc_1(x)
        return x

class NN_v2(nn.Module):
    def __init__(self, embedding_dim):
        super().__init__()
        self.conv = nn.Conv2d(in_channels = 12,
                              out_channels = 256,
                              kernel_size = (1, 5),
                              padding = (0, 2),
                              stride = (1, 2),
                              bias = False)
        
        self.bn = nn.BatchNorm2d(256)
        self.rb_0 = MyResidualBlock(downsample=True)
        self.rb_0_add1 = MyResidualBlock(downsample=False)
        self.rb_1 = MyResidualBlock(downsample=True)
        self.rb_1_add1 = MyResidualBlock(downsample=False)
        self.rb_2 = MyResidualBlock(downsample=True)
        self.rb_2_add1 = MyResidualBlock(downsample=False)
        self.rb_3 = MyResidualBlock(downsample=True)
        self.rb_3_add1 = MyResidualBlock(downsample=False)

        self.rb_4 = MyResidualBlock(downsample=False)
        self.rb_4_add1 = MyResidualBlock(downsample=False)
    
        self.pool = nn.AdaptiveMaxPool1d(output_size=1)

        self.fc_1 = nn.Linear(256, embedding_dim)


    def forward(self, x):
        x = F.leaky_relu(self.bn(self.conv(x[:, :, None, :])))

        x = self.rb_0(x)
        x = self.rb_0_add1(x)
        x = self.rb_1(x)
        x = self.rb_1_add1(x)
        x = self.rb_2(x)
        x = self.rb_2_add1(x)
        x = self.rb_3(x)
        x = self.rb_3_add1(x)

        x = F.dropout(x,p=0.5, training=self.training)

        x = self.rb_4(x)   
        x = self.rb_4_add1(x)

        x = x.squeeze(2)
        x = self.pool(x).squeeze(2)

        x = self.fc_1(x)
        return x

class NN_v3(nn.Module):
    def __init__(self, embedding_dim):
        super().__init__()
        self.conv = nn.Conv2d(in_channels = 12,
                              out_channels = 256,
                              kernel_size = (1, 5),
                              padding = (0, 2),
                              stride = (1, 2),
                              bias = False)
        
        self.bn = nn.BatchNorm2d(256)
        self.rb_0 = MyResidualBlock(downsample=True)
        self.rb_0_add1 = MyResidualBlock(downsample=False)
        self.rb_0_add2 = MyResidualBlock(downsample=False)
        self.rb_1 = MyResidualBlock(downsample=True)
        self.rb_1_add1 = MyResidualBlock(downsample=False)
        self.rb_1_add2 = MyResidualBlock(downsample=False)
        self.rb_2 = MyResidualBlock(downsample=True)
        self.rb_2_add1 = MyResidualBlock(downsample=False)
        self.rb_2_add2 = MyResidualBlock(downsample=False)
        self.rb_3 = MyResidualBlock(downsample=True)
        self.rb_3_add1 = MyResidualBlock(downsample=False)
        self.rb_3_add2 = MyResidualBlock(downsample=False)

        self.rb_4 = MyResidualBlock(downsample=False)
        self.rb_4_add1 = MyResidualBlock(downsample=False)
        self.rb_4_add2 = MyResidualBlock(downsample=False)
    
        self.pool = nn.AdaptiveMaxPool1d(output_size=1)

        self.fc_1 = nn.Linear(256, embedding_dim)


    def forward(self, x):
        x = F.leaky_relu(self.bn(self.conv(x[:, :, None, :])))

        x = self.rb_0(x)
        x = self.rb_0_add1(x)
        x = self.rb_0_add2(x)
        x = self.rb_1(x)
        x = self.rb_1_add1(x)
        x = self.rb_1_add2(x)
        x = self.rb_2(x)
        x = self.rb_2_add1(x)
        x = self.rb_2_add2(x)
        x = self.rb_3(x)
        x = self.rb_3_add1(x)
        x = self.rb_3_add2(x)

        x = F.dropout(x,p=0.5, training=self.training)

        x = self.rb_4(x)   
        x = self.rb_4_add1(x)
        x = self.rb_4_add2(x)

        x = x.squeeze(2)
        x = self.pool(x).squeeze(2)

        x = self.fc_1(x)
        return x


class RNN(nn.Module):
    def __init__(self, embedding_dim):
        super(NN,self).__init__()
        self.conv = nn.Conv2d(in_channels = 12,
                              out_channels = 256,
                              kernel_size = (1, 5),
                              padding = (0, 2),
                              stride = (1, 2),
                              bias = False)
        
        self.bn = nn.BatchNorm2d(256)
        self.rb_0 = MyResidualBlock(downsample=True)
        self.rb_1 = MyResidualBlock(downsample=True)
        self.rb_2 = MyResidualBlock(downsample=True)
        self.rb_3 = MyResidualBlock(downsample=True)

        self.rnn = torch.nn.GRU(256, 128, batch_first=True, bidirectional=True, num_layers=1)
        self.pool = nn.AdaptiveMaxPool1d(output_size=1)

        self.fc_1 = nn.Linear(256, embedding_dim)


    def forward(self, x):
        x = F.leaky_relu(self.bn(self.conv(x[:, :, None, :])))

        x = self.rb_0(x)
        x = self.rb_1(x)
        x = self.rb_2(x)
        x = self.rb_3(x)

        x = F.dropout(x,p=0.5, training=self.training)
        x = x.squeeze(2)
        x, s = self.rnn(x.permute(0, 2, 1))
        x = x.permute(0, 2, 1)
        x = self.pool(x).squeeze(2)
        x = self.fc_1(x)
        return x

c:\Users\navme\AppData\Local\Programs\Python\Python38\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
c:\Users\navme\AppData\Local\Programs\Python\Python38\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll


### MyResidualBlock Class

The MyResidualBlock class defines a residual block used in the neural network models. Here's a breakdown of its components and functionality:

- **Initialization (__init__ method)**:
  - **Parameters**:
    - downsample: A boolean indicating whether to downsample the input.
  - **Attributes**:
    - stride: Set to 2 if downsampling, otherwise 1.
    - conv1: First convolutional layer with a kernel size of 9, stride based on downsample, and padding to maintain the input size.
    - bn1: Batch normalization layer for the output of conv1.
    - conv2: Second convolutional layer with the same kernel size and padding.
    - bn2: Batch normalization layer for the output of conv2.
    - idfunc_0: Average pooling layer for downsampling the identity connection (used if downsample is True).
    - idfunc_1: Convolutional layer for adjusting the number of channels in the identity connection (used if downsample is True).

- **Forward Pass (forward method)**:
  - **Input**: x, the input tensor.
  - **Operations**:
    - Store the input tensor in identity.
    - Apply the first convolutional layer (conv1) followed by batch normalization (bn1) and Leaky ReLU activation.
    - Apply the second convolutional layer (conv2) followed by batch normalization (bn2) and Leaky ReLU activation.
    - If downsample is True:
      - Downsample the identity tensor using idfunc_0.
      - Adjust the number of channels in the identity tensor using idfunc_1.
    - Add the processed input tensor (x) to the identity tensor.
  - **Output**: The result of adding the processed input tensor to the identity tensor, which is the output of the residual block.

### Summary
- **Purpose**: The MyResidualBlock class implements a residual block with optional downsampling, which helps in training deep neural networks by allowing gradients to flow through the network more easily.
- **Components**: Two convolutional layers with batch normalization and Leaky ReLU activation, and optional downsampling for the identity connection.
- **Functionality**: Processes the input tensor through the convolutional layers and adds it to the identity tensor, enabling residual learning.


Sure, here is a line-by-line breakdown of the `MyResidualBlock` class:

### Initialization (`__init__` method)

```python
class MyResidualBlock(nn.Module):
    def __init__(self, downsample):
        super(MyResidualBlock, self).__init__()
```
- **Purpose**: Defines a class `MyResidualBlock` that inherits from `nn.Module`.
- **Parameters**: downsample is a boolean indicating whether to downsample the input.
- **Initialization**: Calls the parent class (`nn.Module`) initializer.

```python
        self.downsample = downsample
        self.stride = 2 if self.downsample else 1
```
- **Purpose**: Sets the downsample attribute and determines the stride for the convolutional layers.
- **Details**: If downsample is `True`, the stride is set to 2 (downsampling); otherwise, it is set to 1.

```python
        K = 9
        P = (K-1)//2
```
- **Purpose**: Defines the kernel size (`K`) and padding (`P`) for the convolutional layers.
- **Details**: Kernel size is 9, and padding is calculated to maintain the input size.

```python
        self.conv1 = nn.Conv2d(in_channels=256,
                               out_channels=256,
                               kernel_size=(1, K),
                               stride=(1, self.stride),
                               padding=(0, P),
                               bias=False)
```
- **Purpose**: Defines the first convolutional layer.
- **Details**: 
  - `in_channels` and `out_channels` are both 256.
  - Kernel size is `(1, 9)`.
  - Stride is `(1, self.stride)`.
  - Padding is `(0, 4)`.
  - Bias is set to `False`.

```python
        self.bn1 = nn.BatchNorm2d(256)
```
- **Purpose**: Defines the first batch normalization layer.
- **Details**: Normalizes the output of the first convolutional layer.

```python
        self.conv2 = nn.Conv2d(in_channels=256,
                               out_channels=256,
                               kernel_size=(1, K),
                               padding=(0, P),
                               bias=False)
```
- **Purpose**: Defines the second convolutional layer.
- **Details**: 
  - `in_channels` and `out_channels` are both 256.
  - Kernel size is `(1, 9)`.
  - Padding is `(0, 4)`.
  - Bias is set to `False`.

```python
        self.bn2 = nn.BatchNorm2d(256)
```
- **Purpose**: Defines the second batch normalization layer.
- **Details**: Normalizes the output of the second convolutional layer.

```python
        if self.downsample:
            self.idfunc_0 = nn.AvgPool2d(kernel_size=(1, 2), stride=(1, 2))
            self.idfunc_1 = nn.Conv2d(in_channels=256,
                                      out_channels=256,
                                      kernel_size=(1, 1),
                                      bias=False)
```
- **Purpose**: Defines the identity function layers for downsampling.
- **Details**: 
 - If downsample is `True`, an average pooling layer (`idfunc_0`) and a convolutional layer (`idfunc_1`) are defined.
  - `idfunc_0` reduces the spatial dimensions by half.
  - `idfunc_1` adjusts the number of channels to match the output of the main path.

### Forward Pass (`forward` method)

```python
    def forward(self, x):
        identity = x
```
- **Purpose**: Defines the forward pass of the residual block.
- **Details**: Stores the input tensor `x` in `identity` for the skip connection.

```python
        x = F.leaky_relu(self.bn1(self.conv1(x)))
```
- **Purpose**: Applies the first convolutional layer, followed by batch normalization and Leaky ReLU activation.
- **Details**: 
  - `self.conv1(x)` applies the first convolution.
  - `self.bn1(...)` normalizes the output.
  - `F.leaky_relu(...)` applies the Leaky ReLU activation function.

```python
        x = F.leaky_relu(self.bn2(self.conv2(x)))
```
- **Purpose**: Applies the second convolutional layer, followed by batch normalization and Leaky ReLU activation.
- **Details**: 
  - `self.conv2(x)` applies the second convolution.
  - `self.bn2(...)` normalizes the output.
  - `F.leaky_relu(...)` applies the Leaky ReLU activation function.

```python
        if self.downsample:
            identity = self.idfunc_0(identity)
            identity = self.idfunc_1(identity)
```
- **Purpose**: Applies the identity function layers if downsampling is required.
- **Details**: 
  - `self.idfunc_0(identity)` applies average pooling to downsample the identity tensor.
  - `self.idfunc_1(identity)` applies a convolution to adjust the number of channels.

```python
        x = x + identity
```
- **Purpose**: Adds the processed input tensor `x` to the identity tensor.
- **Details**: This is the core of the residual connection, enabling the network to learn residual functions.

```python
        return x
```
- **Purpose**: Returns the output tensor.
- **Details**: The output tensor is the result of adding the processed input tensor to the identity tensor.

### Summary
- **MyResidualBlock**: Implements a residual block with optional downsampling.
- **Components**: Two convolutional layers with batch normalization and Leaky ReLU activation, and optional downsampling for the identity connection.
- **Functionality**: Processes the input tensor through the convolutional layers and adds it to the identity tensor, enabling residual learning.

In [None]:
from graphviz import Digraph

def visualize_residual_block(downsample):
    dot = Digraph(comment='MyResidualBlock')

    # Input node
    dot.node('x', 'Input')

    # Convolutional layers
    dot.node('conv1', 'Conv2d\n(1, 9)')
    dot.node('bn1', 'BatchNorm2d')
    dot.node('relu1', 'LeakyReLU')
    dot.node('conv2', 'Conv2d\n(1, 9)')
    dot.node('bn2', 'BatchNorm2d')
    dot.node('relu2', 'LeakyReLU')

    # Identity path
    if downsample:
        dot.node('idfunc_0', 'AvgPool2d\n(1, 2)')
        dot.node('idfunc_1', 'Conv2d\n(1, 1)')

    # Output node
    dot.node('output', 'Output')

    # Edges for main path
    dot.edges([('x', 'conv1'), ('conv1', 'bn1'), ('bn1', 'relu1'), ('relu1', 'conv2'), ('conv2', 'bn2'), ('bn2', 'relu2')])

    # Edges for identity path
    if downsample:
        dot.edge('x', 'idfunc_0')
        dot.edge('idfunc_0', 'idfunc_1')
        dot.edge('idfunc_1', 'output')
    else:
        dot.edge('x', 'output')

    # Merge paths
    dot.edge('relu2', 'output')

    # Render the graph
    dot.render('residual_block', format='png', view=True)

# Visualize the block with downsampling
visualize_residual_block(downsample=True)

# Visualize the block without downsampling
visualize_residual_block(downsample=False)

Here are the main differences between the 

NN, NN_v2, and NN_v3 models:

### NN Model
- **Residual Blocks**: 5 blocks (4 downsampled, 1 non-downsampled)
- **Structure**:
  - rb_0 (downsampled)
  - rb_1 (downsampled)
  - rb_2 (downsampled)
  - rb_3 (downsampled)
  - rb_4 (non-downsampled)

### NN_v2 Model
- **Residual Blocks**: 9 blocks (4 downsampled, 5 non-downsampled)
- **Structure**:
  - rb_0 (downsampled)
  - rb_0_add1 (non-downsampled)
  - rb_1 (downsampled)
  - rb_1_add1 (non-downsampled)
  - rb_2 (downsampled)
  - rb_2_add1 (non-downsampled)
  - rb_3 (downsampled)
  - rb_3_add1 (non-downsampled)
  - rb_4 (non-downsampled)
  - rb_4_add1 (non-downsampled)

### NN_v3 Model
- **Residual Blocks**: 14 blocks (4 downsampled, 10 non-downsampled)
- **Structure**:
  - rb_0 (downsampled)
  - rb_0_add1 (non-downsampled)
  - rb_0_add2 (non-downsampled)
  - rb_1 (downsampled)
  - rb_1_add1 (non-downsampled)
  - rb_1_add2 (non-downsampled)
  - rb_2 (downsampled)
  - rb_2_add1 (non-downsampled)
  - rb_2_add2 (non-downsampled)
  - rb_3 (downsampled)
  - rb_3_add1 (non-downsampled)
  - rb_3_add2 (non-downsampled)
  - rb_4 (non-downsampled)
  - rb_4_add1 (non-downsampled)
  - rb_4_add2 (non-downsampled)

### Summary
- **NN**: Simplest model with fewer residual blocks.
- **NN_v2**: Adds additional non-downsampled residual blocks after each downsampled block.
- **NN_v3**: Further increases the number of non-downsampled residual blocks, adding two after each downsampled block.

These differences in the number and arrangement of residual blocks can affect the model's capacity and performance.