# Simulate a Convolutional Neural Network

Simulate data from a CNN in 1 dimension. Then, fit a CNN model
to the data to determine whether we can recover the true parameters.

In [1]:
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

import matplotlib.pyplot as plt

In [2]:
np.random.seed(1337)

In [3]:
# Generate the X tensor. Each observation has n_channels_in * n_vector_elements observations.
n_channels_in = 2
n_vector_elements = 5
n_observations = 1000

kernel_size = 3
n_channels_out = 4

x = np.random.normal(0, 1, size=[n_observations, n_channels_in, n_vector_elements])
x_tensor = torch.Tensor(x)

In [4]:
x_tensor.shape

torch.Size([1000, 2, 5])

In [5]:
# Generate some true weights and biases.
true_conv1_weights = np.random.normal(0, 1, [n_channels_out, n_channels_in, kernel_size])
true_lin_weights = np.random.normal(0, 1, n_channels_out * n_vector_elements)
true_lin_bias = -0.5

In [6]:

# Make an empty hidden layer to hold the convolution output.
z = np.zeros((n_observations, n_channels_out, n_vector_elements))

# Perform the convolution transforming x to z.
padding = int((kernel_size - 1) / 2)
for obs in range(n_observations):
    for out_channel in range(n_channels_out):
        for kernel_center in range(n_vector_elements):
            if kernel_center < padding:  # In this case, we need to pad to the left.
                z[obs, :, kernel_center] = (
                    (x[obs, :, :(kernel_center + padding + 1)] 
                    * true_conv1_weights[:, :, (kernel_size - kernel_center - padding - 1):])
                    .sum(axis=1)
                    .sum(axis=1)
                )
            elif kernel_center >= (n_vector_elements - padding):  # Pad to the right
                z[obs, :, kernel_center] = (
                    (x[obs, :, (kernel_center - padding):] 
                    * true_conv1_weights[:, :, :(n_vector_elements - kernel_center + padding)])
                    .sum(axis=1)
                    .sum(axis=1)
                )
            else:  # No padding
                z[obs, :, kernel_center] = (x[obs, :, (kernel_center - padding):(kernel_center + padding + 1)] 
                                            * true_conv1_weights[:, :, :]).sum(axis=1).sum(axis=1)

# Flatten z
z = z.reshape((n_observations, n_channels_out * n_vector_elements))

# Apply linear transformation layer
y = true_lin_bias + (true_lin_weights * z).sum(axis=1)


In [7]:
y = y + np.random.normal(0, 1, n_observations)  # Add random noise
y_tensor = torch.Tensor(y)

In [None]:
def _padding(downsample, kernel_size):
    """Compute required padding"""
    padding = max(0, int(np.floor((kernel_size - downsample + 1) / 2)))
    print(f"Padding: {padding}")
    return padding

class ConvNN(nn.Module):
    def __init__(self, n_channels_in, n_channels_out, kernel_size, dropout_rate):
        super(ConvNN, self).__init__()
        padding = _padding(1, kernel_size)
        self.conv1 = nn.Conv1d(in_channels=n_channels_in, 
                               out_channels=n_channels_out,
                               kernel_size=kernel_size,
                               padding=padding,
                               bias=False)
        self.bn1 = nn.BatchNorm1d(n_channels_out)
        self.relu = nn.ReLU()
        self.dropout1 = nn.Dropout(dropout_rate)
        self.lin = nn.Linear(n_channels_out * n_vector_elements, 1)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.dropout1(out)

        # Flatten array
        out = out.view(out.size(0), -1)

        out = self.lin(out)

        return out


In [9]:
model = ConvNN(n_channels_in=n_channels_in,
               n_channels_out=n_channels_out,
               kernel_size=kernel_size,
               dropout_rate=0.25)
criterion = nn.MSELoss()  # Mean Squared Error Loss for regression
optimizer = optim.SGD(model.parameters(), lr=0.001) # Stochastic Gradient Descent


Padding: 1


In [10]:
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(x_tensor)
    loss = criterion(outputs, y_tensor)

    # Backward and optimize
    optimizer.zero_grad() # Clear gradients from previous iteration
    loss.backward()       # Compute gradients
    optimizer.step()      # Update model parameters

    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')



  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [100/1000], Loss: 161.2242
Epoch [200/1000], Loss: 161.1678


KeyboardInterrupt: 

In [None]:
model_weights = model.state_dict()

In [None]:
model_weights

In [None]:
with torch.no_grad():  # Disable gradient calculation for inference
    prediction = model(x_tensor)

In [None]:
plt.scatter(y_tensor,
            prediction)
plt.show()

In [None]:
plt.scatter(true_conv1_weights.flatten(),
            model_weights['conv1.weight'].flatten())
plt.show()