<a href="https://colab.research.google.com/github/PacktPublishing/Modern-Computer-Vision-with-PyTorch-2E/blob/main/Chapter04/CNN_working_details.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn
from torch.utils.data import TensorDataset, Dataset, DataLoader
from torch.optim import SGD, Adam
from torchvision import datasets
import numpy as np
import matplotlib.pyplot as plt

device = 'cuda' if torch.cuda.is_available() else 'cpu'

%matplotlib inline

In [4]:
X_train = torch.tensor([[[[1,2,3,4],[2,3,4,5],[5,6,7,8],[1,3,4,5]]],[[[-1,2,3,-4],[2,-3,4,5],[-5,6,-7,8],[-1,-3,-4,-5]]]]).to(device).float()
X_train /= 8
y_train = torch.tensor([0,1]).to(device).float()
X_train.shape, y_train.shape

(torch.Size([2, 1, 4, 4]), torch.Size([2]))

In [5]:
def get_model():
    model = nn.Sequential(
        nn.Conv2d(1, 1, kernel_size=3),
        nn.MaxPool2d(2),
        nn.ReLU(),
        nn.Flatten(),
        nn.Linear(1, 1),
        nn.Sigmoid(),
    ).to(device)
    loss_fn = nn.BCELoss()
    optimizer = Adam(model.parameters(), lr=1e-2)
    return model, loss_fn, optimizer

In [7]:
from torchsummary import summary
model, loss_fn, optimizer = get_model()
summary(model, (1, 4, 4));

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1              [-1, 1, 2, 2]              10
         MaxPool2d-2              [-1, 1, 1, 1]               0
              ReLU-3              [-1, 1, 1, 1]               0
           Flatten-4                    [-1, 1]               0
            Linear-5                    [-1, 1]               2
           Sigmoid-6                    [-1, 1]               0
Total params: 12
Trainable params: 12
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [8]:
def train_batch(x, y, model, opt, loss_fn):
    model.train()
    prediction = model(x)
    batch_loss = loss_fn(prediction.view(-1), y)
    batch_loss.backward()
    opt.step()
    opt.zero_grad()
    return batch_loss.item()

In [9]:
trn_dl = DataLoader(TensorDataset(X_train, y_train))

In [10]:
for epoch in range(2000):
    for ix, batch in enumerate(iter(trn_dl)):
        x, y = batch
        batch_loss = train_batch(x, y, model, optimizer, loss_fn)

In [11]:
model(X_train[:1])

tensor([[0.0046]], device='cuda:0', grad_fn=<SigmoidBackward0>)

In [12]:
list(model.children())

[Conv2d(1, 1, kernel_size=(3, 3), stride=(1, 1)),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 ReLU(),
 Flatten(start_dim=1, end_dim=-1),
 Linear(in_features=1, out_features=1, bias=True),
 Sigmoid()]

In [13]:
(cnn_w, cnn_b), (lin_w, lin_b) = [(layer.weight.data, layer.bias.data) for layer in list(model.children()) if hasattr(layer, 'weight')]

In [14]:
(cnn_w, cnn_b), (lin_w, lin_b)

((tensor([[[[ 1.0224,  0.9717, -1.9896],
            [-2.0660,  0.5981,  0.5711],
            [ 0.9119, -1.9992,  0.7118]]]], device='cuda:0'),
  tensor([0.6306], device='cuda:0')),
 (tensor([[2.3517]], device='cuda:0'), tensor([-5.3811], device='cuda:0')))

In [15]:
h_im, w_im = X_train.shape[2:]
h_conv, w_conv = cnn_w.shape[2:]
sumprod = torch.zeros((h_im - h_conv + 1, w_im - w_conv + 1))
sumprod

tensor([[0., 0.],
        [0., 0.]])

In [16]:
for i in range(h_im - h_conv + 1):
    for j in range(w_im - w_conv + 1):
        img_subset = X_train[0, 0, i:(i+3), j:(j+3)]
        model_filter = cnn_w.reshape(3, 3)
        val = torch.sum(img_subset*model_filter) + cnn_b
        sumprod[i,j] = val
sumprod

tensor([[-0.0581, -0.2166],
        [-0.3670, -0.4115]])

In [17]:
sumprod.clamp_min_(0)

tensor([[0., 0.],
        [0., 0.]])

In [18]:
pooling_layer_output = torch.max(sumprod)
pooling_layer_output

tensor(0.)

In [19]:
intermediate_output_value = pooling_layer_output * lin_w + lin_b
intermediate_output_value

tensor([[-5.3811]], device='cuda:0')

In [20]:
from torch.nn import functional as F # torch library for numpy like functions
print(F.sigmoid(intermediate_output_value))

tensor([[0.0046]], device='cuda:0')
