# Supplementary material for Q12

Under which circumstances are fully connected and convolutional layers equivalent?

In [1]:
import torch
print(f"PyTorch version: {torch.__version__}")

PyTorch version: 2.0.0


## 1) Reference fully connected layer

<img src="img/fc-cnn-equivalent-1.png" width="400px">

In [2]:
torch.manual_seed(123)

fc = torch.nn.Linear(4, 2)

inputs = torch.tensor([[1., 2., 3., 4.]])

with torch.no_grad():
    out1 = fc(inputs)
    
print(out1)

tensor([[-0.4775, -2.1469]])


In [3]:
fc.weight

Parameter containing:
tensor([[-0.2039,  0.0166, -0.2483,  0.1886],
        [-0.4260,  0.3665, -0.3634, -0.3975]], requires_grad=True)

## 2) Scenario 1: The kernel size is equal to the input size

<img src="img/fc-cnn-equivalent-2.png" width="500px">

Convolutional layers in PyTorch expect inputs on NCHW format by default, where

- N = batch size
- C = channels
- H = height
- W = width

In [4]:
reshaped = inputs.reshape(-1, 1, 2, 2)
reshaped

tensor([[[[1., 2.],
          [3., 4.]]]])

In [5]:
conv = torch.nn.Conv2d(
    in_channels=1,
    out_channels=2,
    kernel_size=2
)

conv.weight.shape

torch.Size([2, 1, 2, 2])

Note that weights in Conv2d are also initialized randomly, so to get the exact same results, we overwrite the random weights in the convolutional layer with those in the fully connected layer.

In [6]:
with torch.no_grad():
    conv.weight[0][0] = fc.weight[0].reshape(1, 2, 2)
    conv.weight[1][0] = fc.weight[1].reshape(1, 2, 2)
    conv.bias[0] = fc.bias[0]
    conv.bias[1] = fc.bias[1]
    
    out2 = conv(reshaped)
    
print(out2)

tensor([[[[-0.4775]],

         [[-2.1469]]]])


In [7]:
out1.flatten() == out2.flatten()

tensor([True, True])

## 3) Scenario 2: The kernel has size one

<img src="img/fc-cnn-equivalent-3.png" width="500px">

In [8]:
reshaped2 = inputs.reshape(-1, 4, 1, 1)
reshaped2

tensor([[[[1.]],

         [[2.]],

         [[3.]],

         [[4.]]]])

In [9]:
conv = torch.nn.Conv2d(
    in_channels=4,
    out_channels=2,
    kernel_size=1
)

conv.weight.shape

torch.Size([2, 4, 1, 1])

In [10]:
with torch.no_grad():
    conv.weight[0] = fc.weight[0].reshape(4, 1, 1)
    conv.weight[1] = fc.weight[1].reshape(4, 1, 1)
    conv.bias[0] = fc.bias[0]
    conv.bias[1] = fc.bias[1]
    
    out3 = conv(reshaped2)
    
print(out3)

tensor([[[[-0.4775]],

         [[-2.1469]]]])


In [11]:
out1.flatten() == out3.flatten()

tensor([True, True])