In [1]:
#Define the Model Structure:

In [2]:
import torch
import torch.nn as nn

#Define a new class named SimpleNet, this class have extends the nn.Module class
#In the constructor of the class, it specify all the layers in our network. The network is structured as convolution,
#relu, convolution, relu, pool, convolution, relu, convolution, relu, linear
class SimpleNet(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleNet, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        #Understand Convolution layer which Conv2d: Given our input as RGB images which contains 3 channels(RGB means Red-Green-Blue), thus we specify 
        #in_channels = 3. Since we want apply 12 feature detectors to the images, so we set out_channels = 12. We use 
        #standard 3*3 kernel size, thus kernel_size = 3. Stride = 1 means the convolution would movel 1 pixel at a time. 
        #padding = 1, this will ensures our images are padded with zeros to keep the input and output size the same.
        #** The out_channels in one layer will serves as the in_channels in the next layer.
        self.relu1 = nn.ReLU()
        #Understand ReLU layer which ReLU: This is the standard ReLU activation function, it will thresholds all incoming featuers to
        #be 0 or greater. In other words, when you apply relu to the incoming features, any number less than 0 will change to
        #zero, while others are kept the same.
        self.conv2 = nn.Conv2d(in_channels=12, out_channels=12, kernel_size=3, stride=1, padding=1)
        self.relu2 = nn.ReLU()
        
        self.pool = nn.MaxPool2d(kernel_size=2)
        #Understand MaxPool2d layer which MaxPool2d: This layer will reduce the dimension of the images, in here, reduce the image
        #by setting the kernel_size=2, reducing our image width and height by a factor of 2.
        self.conv3 = nn.Conv2d(in_channels=12, out_channels=24, kernel_size=3, stride=1, padding=1)
        self.relu3= nn.ReLU()
        
        self.conv4 = nn.Conv2d(in_channels=24, out_channels=24, kernel_size=3, stride=1, padding=1)
        self.relu4 = nn.ReLU()
        #This layer has output channels(out_channels) is 24. Due to 2*2 max pooling, so at this point our image has become
        #16*16(original, we have 32*32 pixels for images, now 32/2=16).
        self.fc = nn.linear(in_features=16*16*24, out_features=num_classes)
        #Understand linear layer which linear: This layer always be the last layer of our network. We have to flatten the entire feature
        #map, our flattened image would be of dimension 16*16*24.
        #out_features is the correspond number of classes we desire.
        
        
    #Define models in PyTorch, define layers in the constructor and pass in all inputs in the forward function.
    def forward(self, input):
        output = self.conv1(input)
        output = self.relu1(output)
        
        output = self.conv2(output)
        output = self.relu2(output)
        
        output = self.pool(output)
        
        output = self.conv3(output)
        output = self.relu3(output)
        
        output = self.conv4(output)
        output = self.relu4(output)
        #fltten the output of network into dimension 16*16*24 
        output = output.view(-1, 16*16*24)
        
        output = self.fc(output)
        
        return output