In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary
import torch.nn.functional as F

In [2]:
# Define the ResNet class as a subclass of nn.Module
class ResNet(nn.Module):
  # Constructor function
    def __init__(self, num_classes=10):
        # Call the constructor of the parent class
        super(ResNet, self).__init__()
        # Set the number of input planes to 32
        self.in_planes = 32
        # Set the kernel size for convolution to (3,1)
        self.kernel = (3,1)
        # Set the kernel size for skip connections to (1,0)
        self.skip_kernel = (1,0)
        # Set the number of layers to 4
        self.num_layers = 4
        # Set the number of blocks in each layer
        self.num_blocks = [1, 1, 1, 4]
        # Define the first convolution layer
        self.conv1 = nn.Conv2d(3, self.in_planes, kernel_size=self.kernel[0],stride=1, padding=self.kernel[1], bias=True)
        # Define the batch normalization layer
        self.bn1 = nn.BatchNorm2d(self.in_planes)
        # Define the layers with specified number of planes, blocks and stride
        self.layer1 = self._make_layer(self.in_planes, self.num_blocks[0], stride=1, bias=True)
        self.layer1 = self._make_layer(64, 3, stride=1)
        self.layer2 = self._make_layer(128, 3, stride=2)
        self.layer3 = self._make_layer(256, 3, stride=2) 
        self.layer4 = self._make_layer(512, 2, stride=2) 
        # Get the number of features from the last layer
        finalshape = list(getattr(self, "layer"+str(self.num_layers))[-1].modules())[-2].num_features
        # Set the multiplier based on the number of layers
        self.multiplier = 4 if self.num_layers == 2 else (2 if self.num_layers == 3 else 1)
        # Define the final linear layer with specified number of output classes
        self.linear = nn.Linear(finalshape, num_classes)

    # Helper function to define a layer
    def _make_layer(self, planes, num_blocks, stride, bias=True):
      # Create a list of strides for each block in the layer
        strides = [stride] + [1]*(num_blocks-1)
        # Create an empty list to store the custom layers
        custom_layers = []
        # Loop through the strides and define each block in the layer
        for stride in strides:
            custom_layers.append(nn.Sequential(
            nn.Conv2d(self.in_planes, planes, kernel_size=self.kernel[0], stride=stride, padding=self.kernel[1], bias=bias),
            nn.BatchNorm2d(planes),
            nn.ReLU(inplace=True),
            nn.Conv2d(planes, planes, kernel_size=self.kernel[0], stride=1, padding=self.kernel[1], bias=bias),
            nn.BatchNorm2d(planes),
            nn.ReLU(inplace=True)
        ))
            # Update the number of input planes for the next block
            self.in_planes = planes
            # Return the custom layer as a sequential module
        return nn.Sequential(*custom_layers)

    # Forward pass function
    def forward(self, x):
        # Apply the first convolution layer, batch normalization layer and ReLU activation function
        out = F.relu(self.bn1(self.conv1(x)))
        # Loop through all the layers and apply each layer in sequence
        for i in range(1, self.num_layers+1):
            out = eval("self.layer" + str(i) + "(out)")
        # perform global average pooling and flatten output
        out = F.avg_pool2d(out, 4*self.multiplier)
        out = out.view(out.size(0), -1)
        # pass output through fully connected layer and return
        out = self.linear(out)
        return out

In [None]:
# Define the data augmentation and normalization transforms for training data
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4), # Randomly crop the input image to size 32 with padding of 4 pixels
    transforms.RandomHorizontalFlip(), # Randomly flip the input image horizontally
    transforms.ToTensor(), # Convert the input image to a PyTorch tensor
    # Normalize the input image using the given mean and standard deviation values for each color channel
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
# Define the data normalization transforms for testing data
transform_test = transforms.Compose([
    transforms.ToTensor(), # Convert the input image to a PyTorch tensor
    # Normalize the input image using the given mean and standard deviation values for each color channel
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [None]:
# Load the CIFAR10 dataset for training
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

# Load the CIFAR10 dataset for testing
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Split the validation set from the training set
trainset_size = len(trainset) # get the length of the trainset
valset_size = int(trainset_size * 0.2) # calculate the size of validation set as 20% of the trainset
trainset_size -= valset_size # subtract the size of validation set from the trainset size
trainset, valset = torch.utils.data.random_split(trainset, [trainset_size, valset_size]) # split the trainset into trainset and validation set

# create a dataloader for the trainset with batch size of 128 and 2 worker threads for loading data in parallel
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
# create a dataloader for the validation set with batch size of 128 and no shuffling of data
valloader = torch.utils.data.DataLoader(valset, batch_size=128, shuffle=False, num_workers=2)

In [None]:
from torchsummary import summary

# Define the ResNet model and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Instantiate the ResNet model and move it to the selected device
net = ResNet().to(device)
# Print the summary of the model architecture
summary(net, input_size=(3, 32, 32))
# Define the loss function to be used
lossFunction = torch.nn.CrossEntropyLoss(reduction='sum')
# Define the learning rate for the optimizer
learningRate = 0.1
# Define the weight decay for the optimizer
weightDecay = 0.0001
# Instantiate the optimizer with the specified parameters and the model parameters
optimizer = torch.optim.Adadelta(net.parameters(), lr=learningRate, weight_decay=weightDecay)
#optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
#optimizer1 = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
#optimizer2 = optim.Adam(net.parameters(), lr=0.001, betas=(0.9, 0.999), weight_decay=1e-4)
#summary(net, input_size=(3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 32, 32]             896
       BatchNorm2d-2           [-1, 32, 32, 32]              64
            Conv2d-3           [-1, 32, 32, 32]           9,248
       BatchNorm2d-4           [-1, 32, 32, 32]              64
              ReLU-5           [-1, 32, 32, 32]               0
            Conv2d-6           [-1, 32, 32, 32]           9,248
       BatchNorm2d-7           [-1, 32, 32, 32]              64
              ReLU-8           [-1, 32, 32, 32]               0
            Conv2d-9           [-1, 64, 16, 16]          18,496
      BatchNorm2d-10           [-1, 64, 16, 16]             128
             ReLU-11           [-1, 64, 16, 16]               0
           Conv2d-12           [-1, 64, 16, 16]          36,928
      BatchNorm2d-13           [-1, 64, 16, 16]             128
             ReLU-14           [-1, 64,

In [None]:
print(device)

cuda


In [None]:
optimizer

Adadelta (
Parameter Group 0
    differentiable: False
    eps: 1e-06
    foreach: None
    lr: 0.1
    maximize: False
    rho: 0.9
    weight_decay: 0.0001
)

In [None]:
import torch.optim as optim
import torch.nn.functional as F

# Define the optimizer and loss function
#optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
lossFunction = torch.nn.CrossEntropyLoss(reduction='sum')
# Set the learning rate and weight decay for the optimizer
learningRate = 0.1
weightDecay = 0.0001
# Define the optimizer to be used
optimizer = torch.optim.Adadelta(net.parameters(), lr=learningRate, weight_decay=weightDecay)
# Define the criterion to be used for calculating the loss
criterion = nn.CrossEntropyLoss()


# Set the number of epochs to train the model
num_epochs = 300
# Initialize empty lists to store the training and validation loss and accuracy values
train_loss, train_acc = [], []
val_loss, val_acc = [], []
# Initialize a variable to keep track of the best validation accuracy so far
best_val_acc = 0.0

# Loop through each epoch
for epoch in range(num_epochs):
  # Set the model to training mode
    net.train()
    # Initialize variables to keep track of the running loss and accuracy for the current epoch
    running_loss, running_acc = 0.0, 0.0
    # Loop through each batch in the training data
    for i, data in enumerate(trainloader, 0):
      # Get the input and label data for the current batch, and move it to the GPU if available
        inputs, labels = data[0].to(device), data[1].to(device)
        # Zero out the gradients from the previous iteration
        optimizer.zero_grad()
        # Pass the input data through the model to get the output predictions
        outputs = net(inputs)
        # Calculate the loss between the predictions and the true labels
        loss = criterion(outputs, labels)
        # Backpropagate the loss through the model to calculate gradients for each parameter
        loss.backward()
        # Update the model parameters using the optimizer and the calculated gradients
        optimizer.step()
        
        # Add the current batch's loss and accuracy to the running totals for the epoch
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        running_acc += (predicted == labels).sum().item()

    # Calculate the average loss and accuracy for the epoch, and add them to the respective lists
    train_loss.append(running_loss / len(trainloader))
    train_acc.append(running_acc / len(trainloader.dataset))
    
    # Set the model to evaluation mode
    net.eval()
    # Initialize variables to keep track of the running loss and accuracy for the current epoch
    running_loss, running_acc = 0.0, 0.0
    # Loop through each batch in the validation data
    with torch.no_grad():
      # loop over the validation data
        for i, data in enumerate(valloader, 0):
          # Get the input and label data for the current batch, and move it to the GPU if available
            inputs, labels = data[0].to(device), data[1].to(device) # move the input and labels to the device (CPU or GPU)
            outputs = net(inputs) # forward pass
            loss = criterion(outputs, labels) # compute the loss
            
            running_loss += loss.item() # update the running loss
            _, predicted = torch.max(outputs.data, 1) # get the predicted class labels
            running_acc += (predicted == labels).sum().item() # update the running accuracy

    val_loss.append(running_loss / len(valloader)) # append the average validation loss for this epoch
    val_acc.append(running_acc / len(valloader.dataset)) # append the average validation accuracy for this epoch
    
    # Print the training and validation accuracy for every epoch
    print('Epoch [{}/{}], Train Loss: {:.4f}, Train Acc: {:.4f}, Val Loss: {:.4f}, Val Acc: {:.4f}'.format(
        epoch+1, num_epochs, train_loss[-1], train_acc[-1], val_loss[-1], val_acc[-1]))
    
    # Save the model with the best validation accuracy
    if val_acc[-1] > best_val_acc:
        best_val_acc = val_acc[-1]
        torch.save(net.state_dict(), 'best_model.pth')

Epoch [1/300], Train Loss: 1.6011, Train Acc: 0.4067, Val Loss: 1.5327, Val Acc: 0.4602
Epoch [2/300], Train Loss: 1.1933, Train Acc: 0.5692, Val Loss: 1.2747, Val Acc: 0.5442
Epoch [3/300], Train Loss: 0.9688, Train Acc: 0.6575, Val Loss: 1.0661, Val Acc: 0.6238
Epoch [4/300], Train Loss: 0.8316, Train Acc: 0.7085, Val Loss: 0.9306, Val Acc: 0.6834
Epoch [5/300], Train Loss: 0.7355, Train Acc: 0.7430, Val Loss: 0.8324, Val Acc: 0.7151
Epoch [6/300], Train Loss: 0.6685, Train Acc: 0.7684, Val Loss: 0.8144, Val Acc: 0.7188
Epoch [7/300], Train Loss: 0.6180, Train Acc: 0.7873, Val Loss: 0.7811, Val Acc: 0.7257
Epoch [8/300], Train Loss: 0.5717, Train Acc: 0.8042, Val Loss: 0.7602, Val Acc: 0.7485
Epoch [9/300], Train Loss: 0.5295, Train Acc: 0.8169, Val Loss: 0.7099, Val Acc: 0.7649
Epoch [10/300], Train Loss: 0.4977, Train Acc: 0.8276, Val Loss: 0.6663, Val Acc: 0.7738
Epoch [11/300], Train Loss: 0.4696, Train Acc: 0.8365, Val Loss: 0.6529, Val Acc: 0.7750
Epoch [12/300], Train Loss: 0.