In [2]:
import torch
from torch import nn
from torch.nn import functional as F

So, in the example of a tensor with a shape of (1, 5, 10):

Batch Size: 1 (processing one sequence at a time).
Channels: 5 (representing information from 5 different features or sensors).
Sequence Length: 10 (the length of each sequence).

In [10]:
conv0 = nn.Conv1d(in_channels=5,out_channels=3,kernel_size=3,stride=1)
input0 = torch.randn(1, 5, 10)
print(input0.shape)
out = conv0(input0)
print(out.shape)
out

torch.Size([1, 5, 10])
torch.Size([1, 3, 8])


tensor([[[ 1.3189e-01,  1.3326e+00,  2.9513e-01, -1.2541e-01,  4.8359e-01,
           1.9740e-01,  7.8930e-01,  3.7541e-01],
         [-3.4906e-01, -3.4798e-01, -5.7723e-02,  1.0021e+00, -5.6749e-01,
          -3.6059e-01,  1.0436e-02, -6.7176e-01],
         [ 9.7128e-02, -1.1572e+00,  5.4874e-01,  2.3012e-01,  3.0749e-02,
          -8.1957e-01, -9.3595e-02,  1.3155e-03]]],
       grad_fn=<ConvolutionBackward0>)

In [3]:
conv = nn.Conv1d(in_channels=2, out_channels=1, kernel_size=2, stride=1)

Conv1d(2, 1, kernel_size=(2,), stride=(1,))

In [4]:
input = torch.randn(1, 2, 10)
input

tensor([[[ 0.1065,  0.0325,  0.3835, -0.4821, -0.5456, -0.6344, -1.2692,
           0.5657,  1.6293,  0.5169],
         [-0.1785, -0.5415,  1.6034,  0.1962, -0.6446, -0.0886,  1.8281,
          -0.3059,  0.7381, -0.1009]]])

In [5]:
expected = conv(input)
expected

tensor([[[-0.7086, -0.1653, -0.2917, -0.3048, -0.2009,  0.5983,  0.1331,
          -0.8856, -1.2601]]], grad_fn=<ConvolutionBackward0>)

In [8]:
conv.weight

Parameter containing:
tensor([[[-0.4575, -0.2033],
         [ 0.1255,  0.2920]]], requires_grad=True)

In [12]:
# Toy Dataset for Binary Classification
input_size_cnn1d = 100  # Adjust the input size based on your data
num_classes_cnn1d = 1   # Binary classification

cnn1d_data = torch.randn(100, 1, input_size_cnn1d)  # Batch size of 100
cnn1d_labels = torch.randint(0, 2, (100,)).float()

Mechanically, the rows are considered as channels, and columns are part of the channel, like a signal

There can be many batches of these channels and signals. The dimension of the signals are important.

In examples below, when 2d data is pushed into 1d convolution, Runtime error is raised.

In [14]:
cnn1d_data[0]

tensor([[-0.1160, -0.4742, -0.5641, -1.1295, -0.4921,  1.2707,  1.6504,  3.0685,
          0.2358,  0.9147,  0.2113, -1.0332, -1.0887,  1.2971,  1.1889,  0.7151,
          1.1036, -0.8386, -0.3371, -0.3723,  0.0834, -1.2234,  1.0445,  0.4863,
         -1.0724,  0.3048,  0.5339, -0.7957,  0.8397, -1.7197, -0.6979,  0.5634,
         -1.4362, -0.6159,  0.8184,  0.0582,  0.7082,  0.1869, -0.5493,  0.7642,
          0.8636, -1.8948, -1.2380, -0.6086,  2.0619,  1.5022, -0.8410, -2.5913,
         -0.1620, -0.3567, -0.1441,  0.9850, -1.4515, -0.6341,  0.6756, -1.2304,
         -0.0053,  0.6021,  0.7283,  1.1197, -0.7650,  2.0131, -0.4366,  0.5713,
         -0.5437, -0.2138, -0.7753,  0.5506,  0.6962, -0.6483,  1.7465,  0.0736,
         -0.3780, -0.0973, -0.4908, -0.2331,  1.3002, -1.3981,  0.6399,  0.7240,
          0.9336, -0.1400,  0.1766, -0.7851, -0.6539, -1.1359,  1.5926, -0.2918,
         -0.3544,  1.3547,  1.1667,  1.4869,  0.6319,  0.0561,  1.3375,  0.7295,
         -0.7983,  0.7254,  

In [None]:
conv1d = nn.Conv1d(in_channels=1, out_channels=5, kernel_size=3)
conved_1d = conv1d(cnn1d_data)
conved_1d

In [None]:
conv = nn.Conv1d(in_channels=2, out_channels=10, kernel_size=2, stride=1)
conv.weight

In [16]:
conv_0_2d = nn.Conv2d(in_channels=1, out_channels=2, kernel_size=(2, 2))
conv_0_2d.weight

Parameter containing:
tensor([[[[-0.1382, -0.0935],
          [ 0.4611,  0.1107]]],


        [[[ 0.0292, -0.1385],
          [ 0.2915, -0.0875]]]], requires_grad=True)

In [20]:
conv02 = nn.Conv1d(in_channels=2, out_channels=1, kernel_size=1, stride=1)
conv02.weight

Parameter containing:
tensor([[[-0.4646],
         [-0.0630]]], requires_grad=True)

In [26]:
conv1d2k = nn.Conv1d(in_channels=2, out_channels=1, kernel_size=(2, 1), stride=1)
conv1d2k.weight

Parameter containing:
tensor([[[[ 0.0165],
          [ 0.2303]],

         [[ 0.0955],
          [-0.0068]]]], requires_grad=True)

In [28]:
input_3d = torch.randn(20, 16, 10, 50, 100)
conv3d0 = nn.Conv3d(in_channels=16,out_channels=8, kernel_size=3)
output_3d = conv3d0(input_3d)
output_3d.shape

torch.Size([20, 8, 8, 48, 98])

In [30]:
conv_0_1k = nn.Conv2d(in_channels=16, out_channels=2, kernel_size=(1, 2))
# conv_0_1k.weight

In [None]:
# sending 3d input into 2d conv
# Runtime Error: Expected 3D (unbatched) or 4D (batched) input to conv2d, but got input of size: [20, 16, 10, 50, 100]
in3d_out2d = conv_0_1k(input_3d)
in3d_out2d 

In [None]:
conv_2d_data = torch.randn([5, 1, 3, 3])
conv_2d_data

In [None]:
convolved_2d = conv_0_2d(conv_2d_data)
convolved_2d

Need to understand conv1d, 2d and 3d intutively

https://www.youtube.com/playlist?list=PLQ-UNteTsc3CJ1DI974uXFkHXt8wQhXBv

In [32]:
# Convolutional 1D network

import torch
import torch.nn as nn

# Define a simple 1D Convolutional Neural Network for sequence classification
class CNN1D(nn.Module):
    def __init__(self, input_size, num_classes):
        super(CNN1D, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32 * (input_size // 2), num_classes)  # observer where the input_size is applied

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.fc1(x)
        return x

# Create an instance of the CNN1D model
input_size_cnn1d = 20
num_classes_cnn1d = 2

model_cnn1d = CNN1D(input_size_cnn1d, num_classes_cnn1d)

In [47]:
data_to_model = torch.randn(10, 1, 10)
temp1d = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, stride=2, padding=2)
out = temp1d(data_to_model)

In [48]:
out.shape

torch.Size([10, 32, 6])

OutputSequenceLength = (input_sequence_length + 2 * padding - kernel_size) / stride

OutputSequenceLength = OutputSequenceLength + 1

In [43]:
def get_out_seq(in_length, padding, kernel_size, stride):
    temp = in_length + (2 * padding) - kernel_size
    return (temp / stride) + 1

In [44]:
get_out_seq(10, 1, 3, 1)

10.0

In [49]:
get_out_seq(10, 2, 3, 2)

6.5