# Perceptron

In [None]:
import torch  # Main PyTorch library
import torch.nn as nn
import torch.optim as optim  # Optimization algorithms
import torch.nn.functional as F  # Functions
from torch.utils.data import DataLoader  # To load data
from torch.utils.data import Dataset  # To create new Datasets
from torchvision import datasets, transforms  # Datasets and image transformations

In [None]:
class convNet(nn.Module):
    def __init__(self):
        super(convNet, self).__init__()
        # 1. nn.Conv2d(in_channels, out_channels, kernel_size, stride)

        self.conv1 = nn.Conv2d(1, 16, 3, 1)  # 28x28x1 -> 
        self.conv2 = nn.Conv2d(16, 32, 3, 1)  # 14x14x16 ->
        self.conv3 = nn.Conv2d(32, 64, 3, 1)  # 7x7x32 ->
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(64*3*3, 128)  # First fully connected layer
        self.fc2 = nn.Linear(128, 10)      # Second fully connected layer

    def forward(self, x):
        x = self.cn1(x)
        x = F.relu(x)
        x = self.cn2(x)
        x = F.relu(x)
        x = self.cn3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

"""
    Detailed Breakdown
x = self.cn1(x)

Operation: Applies the first convolutional layer.
Purpose: Extracts initial features from the input image (or tensor) using learned filters.
x = F.relu(x)

Operation: Applies the ReLU (Rectified Linear Unit) activation function.
Purpose: Introduces non-linearity by setting all negative values to zero, allowing the network to learn more complex patterns.
x = self.cn2(x)

Operation: Applies the second convolutional layer.
Purpose: Further extracts features from the activations produced by cn1.
x = F.relu(x)

Operation: Again, applies the ReLU activation.
Purpose: Maintains non-linearity after the second convolution.
x = self.cn3(x)

Operation: Applies the third convolutional layer.
Purpose: Extracts even higher-level features from the input.
x = F.relu(x)

Operation: Applies ReLU once more.
Purpose: Ensures non-linear transformation after the third convolution.
x = F.max_pool2d(x, 2)

Operation: Applies 2D max pooling with a kernel size of 2.
Purpose: Reduces the spatial dimensions (height and width) of the feature maps by taking the maximum value over each 2×2 region. This helps in reducing computation and controls overfitting by providing a form of spatial invariance.
x = self.dropout1(x)

Operation: Applies the first dropout layer.
Purpose: Randomly sets a fraction of the activations to zero (as defined when the dropout was initialized) during training. This regularizes the model and reduces the chance of overfitting.
x = torch.flatten(x, 1)

Operation: Flattens the output tensor, starting from dimension 1.
Purpose: Converts the multi-dimensional feature maps into a single vector per sample, preparing the data for the fully connected layers. The batch dimension (dimension 0) is preserved.
x = self.fc1(x)

Operation: Passes the flattened vector through the first fully connected (dense) layer.
Purpose: Learns higher-level combinations of the features extracted by the convolutional layers. The layer transforms the input from a higher-dimensional space (e.g., 576 features) into a lower-dimensional space (128 features in this case).
x = F.relu(x)

Operation: Applies the ReLU activation to the output of the fully connected layer.
Purpose: Adds non-linearity after the dense layer, enabling the network to learn more complex relationships.
x = self.dropout2(x)

Operation: Applies the second dropout layer.
Purpose: Further regularizes the model by randomly dropping neurons during training, which helps prevent overfitting.
x = self.fc2(x)

Operation: Passes the result through the second fully connected layer.
Purpose: Maps the 128 features to the number of classes (e.g., 10 for a 10-class classification problem).
output = F.log_softmax(x, dim=1)

Operation: Applies the log softmax function along dimension 1.
Purpose: Converts the raw class scores (logits) into log-probabilities. The log softmax is numerically more stable than the regular softmax and is often used in conjunction with the negative log likelihood loss (nn.NLLLoss).
return output

Operation: Returns the final output.
Purpose: Provides the log-probabilities for each class as the prediction for each sample in the batch.
"""
