In [1]:
# Import necessary modules from PyTorch
import torchvision
import torch
from torchvision import datasets
import torchvision.transforms.v2 as transforms

# Import necessary modules for Neural Network
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

## CNN Model Architecture

In [2]:
# Define custom Convolution Neural Network
class CNN(nn.Module):
    def __init__(self, num_channels=3, num_out_ch=[32, 64, 128, 256], dropout=0.5, num_neurons=1024, num_classes=102):
        super(CNN, self).__init__()

        # Convolutional layers
        self.layer1 = nn.Sequential( #This is technically not a type of layer but it helps in combining different operations that are part of the same step
            nn.Conv2d(in_channels=num_channels, out_channels=num_out_ch[0], kernel_size=3, stride=1, padding=1), # Applies a 2D convolution over an input image composed of several input planes
            nn.BatchNorm2d(num_out_ch[0]), # This applies batch normalization to the output from the convolutional layer
            nn.ReLU() # Activation function is used to introduce nonlinearity in a neural network, helping mitigate the vanishing gradient problem during machine learning model training and enabling neural networks to learn more complex relationships in data
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[0], out_channels=num_out_ch[0], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[0]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2) # Max pooling layer: down-sample an image by applying max filer to subregion
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[0], out_channels=num_out_ch[1], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[1]),
            nn.ReLU()
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[1], out_channels=num_out_ch[1], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[1]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[1], out_channels=num_out_ch[2], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[2]),
            nn.ReLU()
        )
        self.layer6 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[2], out_channels=num_out_ch[2], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[2]),
            nn.ReLU()
        )
        self.layer7 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[2], out_channels=num_out_ch[2], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[2]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer8 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[2], out_channels=num_out_ch[3], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[3]),
            nn.ReLU()
        )
        self.layer9 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[3], out_channels=num_out_ch[3], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[3]),
            nn.ReLU()
        )
        self.layer10 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[3], out_channels=num_out_ch[3], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[3]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.layer11 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[3], out_channels=num_out_ch[3], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[3]),
            nn.ReLU()
        )
        self.layer12 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[3], out_channels=num_out_ch[3], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[3]),
            nn.ReLU()
        )
        self.layer13 = nn.Sequential(
            nn.Conv2d(in_channels=num_out_ch[3], out_channels=num_out_ch[3], kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(num_out_ch[3]),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Fully connected layers
        self.fc = nn.Sequential(
            nn.Dropout(p=dropout), # Dropout layer to prevent overfitting
            nn.Linear(7*7*num_out_ch[3], num_neurons), # Performs a matrix multiplication of the input data with the weight matrix and adding the bias term
            nn.ReLU()
        )
        self.fc1 = nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(num_neurons, num_neurons),
            nn.ReLU()
        )
        self.fc2 = nn.Sequential(
            nn.Linear(num_neurons, num_classes)
        )


    
    # Defines the forward pass of the network, where input data x is passed through each layer sequentially.
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = self.layer13(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out

### Set Training Parameters, Device, Model, Optimizer, and Loss Function

In [3]:
# Hyperparameters
NUM_OUT_CH = [32, 64, 128, 256]
NUM_NEURONS = 1024
DROPOUT = 0.5
BATCH_SIZE = 128
NUM_WORKERS = 8
NUM_EPOCHS = 3000  # Number of training epochs
LR = 0.001
WEIGHT_DECAY = 0.001
STEP_SIZE = 1000
FACTOR = 0.9

# Define device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create an instance of the CNN model
model = CNN(num_channels=3, num_out_ch=NUM_OUT_CH, dropout=DROPOUT, num_neurons=NUM_NEURONS, num_classes=102).to(device)  # 102 classes for Flowers102 dataset

# Define the loss function
loss_fn = nn.CrossEntropyLoss()

# Define the optimizer
optimizer = optim.AdamW(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)

# Learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=FACTOR, patience=STEP_SIZE)

## Preprosessing Data

In [4]:
# Data augmentation and normalization
train_transform = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224),  # Randomly crop and resize the image
    transforms.RandomHorizontalFlip(),   # Randomly flip the image horizontally
    transforms.RandomVerticalFlip(),   # Randomly flip the image Vertically
    transforms.RandomRotation(10),       # Randomly rotate the image by up to 10 degrees
    transforms.ColorJitter(0.2, 0.2, 0.2, 0.1),  # Randomly adjust brightness, contrast, saturation
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True), # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

val_transform = transforms.Compose([
    transforms.Resize(256),              # Resize the image to 256x256
    transforms.CenterCrop(224),          # Crop the center of the image to 224x224
    transforms.ToImage(),
    transforms.ToDtype(torch.float32, scale=True), # Convert the image to a PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize the image
])

test_transform = val_transform

# Define dataset root directory
data_dir = 'dataset_flower102/'

In [5]:
# Apply transformations to the dataset during data loading
train_dataset = datasets.Flowers102(root=data_dir, split='train', transform=train_transform, download=True)
valid_dataset = datasets.Flowers102(root=data_dir, split='val', transform=val_transform, download=True)
test_dataset = datasets.Flowers102(root=data_dir, split='test', transform=test_transform, download=True)

In [6]:
# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS, pin_memory=True)

## CNN Model Training

### Model Training and Validation

In [None]:
# Optionally resume from the best trained model along with training and validation loss values, as well as optimizer and scheduler current state
resume_from_best_checkpoint = False
if resume_from_best_checkpoint:
    checkpoint = torch.load('latest_model.pth')
    model.load_state_dict(checkpoint['model_state'])
    optimizer.load_state_dict(checkpoint['optimizer_state'])
    scheduler.load_state_dict(checkpoint['scheduler_state'])
    start_epoch = checkpoint['epoch']
    train_loss_history = checkpoint['train_loss']
    val_loss_history = checkpoint['val_loss']
    best_val_loss = checkpoint['best_val_loss']
else:
    start_epoch = 0
    train_loss_history = []
    val_loss_history = []
    best_val_loss = float('inf')  # Initialize the best validation loss with a large value
    

print("best_val_loss: ", best_val_loss)

patience = 50  # Number of epochs to wait before stopping if validation loss doesn't improve

for epoch in range(start_epoch, NUM_EPOCHS):
    
    # Train the model
    model.train() # Set the model to training mode
    running_train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device) # Move images and labels to GPU

        outputs = model.forward(images) # Forward pass
        loss = loss_fn(outputs, labels) # Calculate the loss

        optimizer.zero_grad() # Zero the parameter gradients
        loss.backward() # Backward pass
        optimizer.step() # Optimize

        running_train_loss += loss.item() * images.size(0) #  scalar value of the loss tensor for the current batch * the batch size to account for the loss per sample in the batch
        _, predicted = outputs.max(1) # Returns a tuple containing the maximum value along the specified dimension (class probabilities for each sample in the batch) and index of the max value
        total += labels.size(0) # Accumulates the total number of sample seen during training
        correct += predicted.eq(labels).sum().item() # Accumulates the total number of correct predictions over all batches.
    
    # Calculate training loss and accuracy
    train_loss = running_train_loss / len(train_loader.dataset)
    train_loss_history.append(train_loss)
    train_accuracy = 100.0 * correct / total

    # Print training loss and accuracy
    print(f'Epoch {epoch + 1}/{NUM_EPOCHS}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%')



    # Validate the model
    model.eval()  # Set the model to evaluation mode
    running_val_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device) # Move images and labels to GPU

            outputs = model.forward(images)  # Forward pass
            loss = loss_fn(outputs, labels)  # Calculate the loss

            running_val_loss += loss.item() * images.size(0)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            del images, labels, outputs

    # Calculate validation loss and accuracy
    val_loss = running_val_loss / len(valid_loader.dataset)
    val_loss_history.append(val_loss)
    val_accuracy = 100.0 * correct / total

    # Step the scheduler
    scheduler.step(val_loss)

    # Print validation loss and accuracy
    print(f'Epoch {epoch + 1}/{NUM_EPOCHS}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')



    # Check if validation loss has improved
    if val_loss < best_val_loss:
        print("Creating new checkpoint for best model...")
        best_val_loss = val_loss
        patience = 50  # Reset patience if validation loss improves

        # Save the best trained model along with training and validation loss values
        torch.save({
            'model_state': model.state_dict(),
            'optimizer_state': optimizer.state_dict(),
            'scheduler_state': scheduler.state_dict(),
            'epoch': epoch,
            'train_loss': train_loss_history,
            'val_loss': val_loss_history, 
            'best_val_loss': best_val_loss
        }, 'best_model.pth')

    else:
        patience -= 1
        if patience == 0:
            print("Early stopping...")
            break

# save the completed model training
finish_model_state = model.state_dict()
torch.save({
    'model_state': finish_model_state,
    'optimizer_state': optimizer.state_dict(),
    'scheduler_state': scheduler.state_dict(),
    'epoch': epoch,
    'train_loss': train_loss_history,
    'val_loss': val_loss_history,
    'best_val_loss': best_val_loss
}, 'latest_model.pth')


In [None]:
# Test the model
checkpoint = torch.load('latest_model.pth')
model.load_state_dict(checkpoint['model_state'])
model.eval()  # Set the model to evaluation mode
running_test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device) # Move images and labels to GPU

        outputs = model.forward(images)  # Forward pass
        loss = loss_fn(outputs, labels)  # Calculate the loss

        running_test_loss += loss.item() * images.size(0)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

# Calculate validation loss and accuracy
test_loss = running_test_loss / len(test_loader.dataset)
test_accuracy = 100.0 * correct / total

# Print validation loss and accuracy
print(f'Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.2f}%')

In [None]:
# Visualize the network model
# import torch
# from torchviz import make_dot

# # Ensure model is in evaluation mode for inference (since you're not training)
# model.eval()

# # Create a dummy input tensor of the correct size
# dummy_input = torch.randn(1, 3, 224, 224)  # Adjust the size according to your input dimensions

# # Check if CUDA is available and move the tensor to GPU if it is
# if torch.cuda.is_available():
#     dummy_input = dummy_input.to('cuda')

# # Forward pass through the model
# output = model(dummy_input)

# print(output.shape)

# # Generate the graph
# dot = make_dot(output, params=dict(list(model.named_parameters()) + [('input', dummy_input)]))
# dot.format = 'svg'
# dot.render('network_architecture_enhanced', format='svg', engine='dot')
# dot.attr('graph', fontsize='10', nodesep='0.5', ranksep='0.75')
# dot.attr('node', shape='box', style='filled', fillcolor='lightblue', fontsize='12')
# dot.attr('edge', fontsize='10', penwidth='2')


torch.Size([1, 102])
