In [1]:
#https://github.com/ava-orange-education/Mastering-Computer-Vision-with-PyTorch-2.0 

In [1]:
# ResNet in PyTorch

import torch
import torch.nn as nn

<img src="images/Resnet-2.png" width=1000 height=400/>

In [2]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        # First convolution: handles spatial downsampling via 'stride'
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1)
        # Normalize activations to speed up training and stabilize gradients
        self.bn1 = nn.BatchNorm2d(out_channels)
        # Activation function (inplace=True saves memory)
        self.relu = nn.ReLU(inplace=True)
        # Second convolution: always keeps the spatial dimensions the same (stride=1)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
        # Second normalization layer
        self.bn2 = nn.BatchNorm2d(out_channels)
        # If the input shape doesn't match the output, this layer fixes the 'shortcut' shape
        self.downsample = downsample

    def forward(self, x):
        # 1. Save the original input as the 'identity' or 'residual'
        residual = x
        
        # 2. If the dimensions changed (stride > 1), adjust the residual shape
        if self.downsample is not None:
            residual = self.downsample(x)
        
        # 3. Main Path: Conv -> BN -> ReLU -> Conv -> BN
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        
        # 4. THE MAGIC: Add the original input (residual) back to the output
        # This is the "Skip Connection" or "Short-cut"
        out += residual
        
        # 5. Final activation after the addition
        out = self.relu(out)
        
        return out

In [3]:
class ResNet(nn.Module):
    def __init__(self):
        super(ResNet, self).__init__()
        # Internal state to track the number of input channels for the next layer
        self.in_channels = 64
        
        # --- THE STEM: Initial processing of the raw image ---
        # Large 7x7 filter to capture initial spatial features; reduces resolution by half
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        # Maxpooling further reduces the spatial size (image becomes smaller and deeper)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # --- THE TRUNK: Stacking residual stages ---
        # layer1: 64 channels, stays the same size
        self.layer1 = self.make_layer(64, 2)
        # layer2: doubles channels to 128, reduces image size (stride=2)
        self.layer2 = self.make_layer(128, 2, 2)
        # layer3: doubles channels to 256, reduces image size (stride=2)
        self.layer3 = self.make_layer(256, 2, 2)
        # layer4: doubles channels to 512, reduces image size (stride=2)
        self.layer4 = self.make_layer(512, 2, 2)

        # --- THE HEAD: Classification ---
        # Squeezes every feature map into a 1x1 pixel (Global Average Pooling)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        # Final fully connected layer mapping 512 features to 10 classes
        self.fc = nn.Linear(512, 10)

    def make_layer(self, out_channels, blocks, stride=1):
        """Creates a sequence of ResidualBlocks."""
        downsample = None
        # Check if the shortcut path needs to resize the input to match the main path
        if stride != 1 or self.in_channels != out_channels:
            downsample = nn.Sequential(
                # Use 1x1 conv to change channel depth and/or spatial resolution
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels),
            )

        layers = []
        # The first block handles the change in dimensions (downsampling)
        layers.append(ResidualBlock(self.in_channels, out_channels, stride, downsample))
        # Update in_channels so subsequent blocks in this layer know the depth
        self.in_channels = out_channels
        
        # Add the remaining blocks (which don't change dimensions)
        for _ in range(1, blocks):
            layers.append(ResidualBlock(out_channels, out_channels))
        
        # Pack the list of blocks into a PyTorch Sequential container
        return nn.Sequential(*layers)

    def forward(self, x):
        # 1. Pass through the initial 'stem'
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)

        # 2. Pass through the 4 stages of residual blocks
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        # 3. Global average pooling (converts [Batch, 512, H, W] to [Batch, 512, 1, 1])
        out = self.avgpool(out)
        
        # 4. Flatten the tensor for the Linear layer
        out = out.view(out.size(0), -1)
        
        # 5. Get the class scores
        out = self.fc(out)
        
        return out

# NN VISUALIZATION

# 1. Using https://netron.app/

<img src="images/netron_app.png" width=200 height=400/>

In [6]:
#Plot NN in https://netron.app/ by exporting model
#           https://netron.app/

import torch

# 1. Initialize your model
model = ResNet()
model.eval() # Set to evaluation mode (crucial for Batch Norm layers)

# 2. Create a dummy input
# This is needed because ONNX actually "runs" the model once to trace the path
dummy_input = torch.randn(1, 3, 32, 32) 

# 3. Export to ONNX
torch.onnx.export(
    model,                  # model being run
    dummy_input,            # model input (or a tuple for multiple inputs)
    "my_resnet.onnx",       # where to save the model
    input_names = ['Input Image'],   # optional: name your input node
    output_names = ['Class Scores']  # optional: name your output node
)

print("Model exported successfully to 'my_resnet.onnx'")

Model exported successfully to 'my_resnet.onnx'


# 2. Using Tensor Board

<img src="images/tensorboard.png" width=1200 height=400/>

In [10]:
#Plot NN using Tensor Board

from torch.utils.tensorboard import SummaryWriter
import torch.nn as nn
import torch.optim as optim

# Create a TensorBoard writer to log metrics and model graph
writer = SummaryWriter(log_dir='./logs')

# If you have a random input tensor, pass it through the model to log the graph
# Example input size: (batch_size, channels, height, width)
example_input = torch.rand(1, 3, 224, 224)  # Example for a batch of size 1, RGB image 224x224
writer.add_graph(model, example_input)

# Now, let's define a simple training loop to log metrics as well

# Example: Training loop
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
criterion = nn.CrossEntropyLoss()

# Dummy dataset (replace with your real data)
# x_train, y_train = your_dataset

# Dummy training loop (replace with actual loop)
for epoch in range(10):
    # Forward pass (example)
    optimizer.zero_grad()
    outputs = model(example_input)  # Replace with actual inputs
    loss = criterion(outputs, torch.randint(0, 10, (1,)))  # Dummy target (replace with actual labels)

    # Backward pass and optimize
    loss.backward()
    optimizer.step()

    # Log the loss and other metrics to TensorBoard
    writer.add_scalar('Loss/train', loss.item(), epoch)

    # Optionally, log histograms of model parameters
    for name, param in model.named_parameters():
        writer.add_histogram(name, param, epoch)

# Close the TensorBoard writer after training is complete
writer.close()

In [14]:
''' in terminal hit 

            tensorboard --logdir=./logs

then in browset go to

            http://localhost:6006/

'''

' in terminal hit \n\n            tensorboard --logdir=./logs\n\nthen in browset go to\n\n            http://localhost:6006/\n\n'

# 3. Using https://graphviz.org/

<img src="images/graphviz.png" width=1200 height=400/>

In [5]:
# https://graphviz.org/

# 4. Using LaTeX

<img src="images/latex.png" width=1200 height=400/>

In [None]:
# https://github.com/HarisIqbal88/PlotNeuralNet

In [None]:
import sys
import os

# --- ADD THIS BLOCK AT THE VERY TOP ---
# We point Python directly to the folder containing 'pycore'
project_path = '/home/anonymous/Downloads/Books/Mastering Computer Vision-with Pytorch 2.0/PlotNeuralNet/'
sys.path.append(project_path)
# --------------------------------------

from pycore.tikzeng import *

# Define the Architecture
arch = [
    to_head('..'),
    to_cor(),
    to_begin(),

    # --- 0. INPUT IMAGE ---
    to_input('images/input.png', width=6, height=6, name="input"),

    # --- 1. INITIAL BLOCK (Stem) ---
    # Conv 7x7, stride 2 (Gray/Yellowish)
    to_Conv("conv1", 64, 64, offset="(0,0,0)", to="(0,0,0)", height=32, depth=32, width=2, caption="Stem"),
    to_Pool("pool1", offset="(0,0,0)", to="(conv1-east)", caption="Pool"),

    # --- 2. LAYER 1 (Residual Block 1) ---
    # Input: 32x32, 64ch -> Output: 32x32, 64ch (No size change)
    to_Conv("l1_conv1", 64, 64, offset="(2,0,0)", to="(pool1-east)", height=32, depth=32, width=2, caption="L1_Conv1"),
    to_Conv("l1_conv2", 64, 64, offset="(0,0,0)", to="(l1_conv1-east)", height=32, depth=32, width=2, caption="L1_Conv2"),
    
    # Connection (Main Path)
    to_connection("pool1", "l1_conv1"), 
    
    # Skip Connection (Curve from start of L1 to end of L1)
    to_skip(of='l1_conv1', to='l1_conv2', pos=1.25), 


    # --- 3. LAYER 2 (Residual Block 2) ---
    # Downsample! Size 32->16, Channels 64->128
    # We visually make the block smaller (height=16) and thicker (width=4)
    to_Conv("l2_conv1", 128, 16, offset="(2,0,0)", to="(l1_conv2-east)", height=16, depth=16, width=4, caption="L2_Conv1"),
    to_Conv("l2_conv2", 128, 16, offset="(0,0,0)", to="(l2_conv1-east)", height=16, depth=16, width=4, caption="L2_Conv2"),
    
    to_connection("l1_conv2", "l2_conv1"),
    # Skip Connection
    to_skip(of='l2_conv1', to='l2_conv2', pos=1.25),


    # --- 4. LAYER 3 (Residual Block 3) ---
    # Downsample! Size 16->8, Channels 128->256
    # Smaller (height=8) and even thicker (width=7)
    to_Conv("l3_conv1", 256, 8, offset="(2,0,0)", to="(l2_conv2-east)", height=8, depth=8, width=7, caption="L3_Conv1"),
    to_Conv("l3_conv2", 256, 8, offset="(0,0,0)", to="(l3_conv1-east)", height=8, depth=8, width=7, caption="L3_Conv2"),
    
    to_connection("l2_conv2", "l3_conv1"),
    to_skip(of='l3_conv1', to='l3_conv2', pos=1.25),


    # --- 5. FINAL CLASSIFICATION ---
    # Global Avg Pool (represented as a small flat layer)
    to_Pool("avg_pool", offset="(2,0,0)", to="(l3_conv2-east)", height=1, depth=1, width=1, caption="AvgPool"),
    
    # Fully Connected / Softmax (10 classes)
    to_SoftMax("soft1", 10, "(3,0,0)", "(avg_pool-east)", caption="FC 10"),
    
    to_connection("l3_conv2", "avg_pool"),
    to_connection("avg_pool", "soft1"),

    to_end()
]

def main():
    namefile = str(sys.argv[0]).split('.')[0]
    to_generate(arch, namefile + '.tex')

if __name__ == '__main__':
    main()