## MiniVGGNet CNN
This model is a lot more complex than the ShallowNet one. 

In [4]:
#imports and CUDA check
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
print("CUDA Available: ", torch.cuda.is_available())
print(torch.version.cuda)

cuda
CUDA Available:  True
12.1


### MiniVGGNet Model

First conv block
- 2 conv layers (3x3, 32 filters)
- ReLu activation (discards noise)
- BatchNorm (normalizes activation for efficiency)
- Max Pool (downsamples dimensions)
- Dropout 25% (reduces overfitting)

Second block
- 2 conv layers (3x3, 64 filters)
- BatchNorm
- MaxPool
- Dropout 25%

Final
- flatten (1D vector)
- FC (512 neurons, 64 feature maps * 8x8 spacial dimensions)

In [5]:
class MiniVGGNet(nn.Module):
    def __init__(self, num_classes=10):
        super(MiniVGGNet, self).__init__()

        # first block: 32 filters, 3x3 kernel
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1)
        self.relu = nn.ReLU() # discards noise
        self.batchnorm1 = nn.BatchNorm2d(32) # batch normalization - stabilization
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2) # max val per kernel
        self.dropout1 = nn.Dropout(0.25) # 25% chance of neuron deactivation, prevents overfitting

        # second block: 64 filters, 3x3 kernel
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.25)
        
        self.flatten = nn.Flatten() # converts feature maps to 1D vector
        self.fc1 = nn.Linear(64 * 8 * 8, 512) # first fc layer (64 feature maps * 8x8)
        self.batchnorm3 = nn.BatchNorm1d(512) # batch norm for FC
        self.dropout3 = nn.Dropout(0.5) # 50% chance neuron deactivation

        self.fc2 = nn.Linear(512, num_classes) #Cifar-10 has 10 classes

    def forward(self, x):
        # first block
        x = self.relu(self.batchnorm1(self.conv1(x)))
        x = self.relu(self.batchnorm1(self.conv2(x)))
        x = self.pool1(x)
        x = self.dropout1(x) 

        # Second block
        x = self.relu(self.batchnorm2(self.conv3(x)))
        x = self.relu(self.batchnorm2(self.conv4(x)))
        x = self.pool2(x) 
        x = self.dropout2(x) 

        # fc layers
        x = self.flatten(x) 
        x = self.relu(self.batchnorm3(self.fc1(x)))
        x = self.dropout3(x)
        x = self.fc2(x)
        return x

### Cifer10 Dataset