In [1]:
import cv2 as cv  # OpenCV for image processing
import os  # For directory and file manipulation
import numpy as np  # For numerical operations
import matplotlib.pyplot as plt  # For plotting loss curves
import torch  # For PyTorch framework
from torch import nn  # For neural network modules
import torch.nn.functional as F  # For utility functions
import torchvision.models as models  # For pre-trained models
import torchvision.ops as ops  # For image operations
from cnn import CNN

ModuleNotFoundError: No module named 'cnn'

In [None]:
# Set the device to GPU (cuda)
torch.cuda.set_device(0)
device = torch.device('cuda')  # Use the GPU if available
loss_function = nn.CrossEntropyLoss()  # Cross entropy loss function for classification

In [None]:
# Load the masks and images data
masks = torch.tensor(np.load("masks.npy"))  # Masks array loaded
images = torch.tensor(np.load("images.npy"))  # Images array loaded

In [None]:
# Crop images based on mask bounding boxes
cropped_images = []
for i in range(images.shape[0]):
    # Convert the mask to bounding boxes and crop the images
    points = ops.masks_to_boxes(masks[i].unsqueeze(0)).int().tolist()[0]  # Get bounding box coordinates
    img = images[i][points[1]:points[3], points[0]:points[2]]  # Crop image using bounding box
    img = cv.resize(img.numpy(), (64, 64))  # Resize image to 64x64
    cropped_images.append(img)  # Append cropped image to the list

In [None]:
# Stack cropped images into a single tensor
images = np.stack(cropped_images)

In [None]:
# Create labels from directory structure
labels = []
directory = "train\\"  # Path to training images
for folder in os.listdir(directory):
    inner_directory = os.path.join(directory, folder)  # Inner folder (class directories)
    for file in os.listdir(inner_directory):
        # Append the label multiple times (likely to account for class repetitions)
        labels.append(int(folder))
        labels.append(int(folder))
        labels.append(int(folder))
        labels.append(int(folder))

In [None]:
# Convert the list of labels to a tensor
labels = np.stack(labels)

In [None]:
# Convert images and labels to PyTorch tensors
images = torch.tensor(images, dtype=torch.float32)
labels = torch.tensor(labels, dtype=torch.long) - 1  # Subtract 1 to adjust labels to 0-based indexing

In [None]:
# Split the data (80:20 for training and validation, but using 100:0 for training in this case)
split = int(images.shape[0]*1)  # Full dataset used for training
train_images = images[:split]
train_labels = labels[:split]
val_images = images[split:]
val_labels = labels[split:]

In [None]:
# Print the shapes of training and validation labels
print(train_labels.shape)
print(val_labels.shape)

In [None]:
# Instantiate the CNN model and optimizer
model = CNN()  # Custom CNN model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)  # Adam optimizer
model.to(device)  # Move model to GPU

In [None]:
# Validation function to calculate loss on the validation set
def val_loss():
    val_losses = []  # List to store loss for each validation sample
    model.eval()  # Set the model to evaluation mode
    for i in range(val_images.shape[0]):
        # Get a single validation sample
        x = val_images[i:i+1]
        y = val_labels[i:i+1]

        # Move data to GPU
        x = x.to(device)
        y = y.to(device)

        # Forward pass
        logits = model(x.permute(0, 3, 1, 2))  # Rearrange image dimensions for PyTorch model
        loss = loss_function(logits, y)  # Compute loss
        val_losses.append(loss.item())  # Append loss to list
    model.train()  # Set the model back to training mode
    return sum(val_losses) / len(val_losses)  # Return the average validation loss

In [None]:
# Training loop for the model
n_epochs = 50  # Number of training epochs
batch_size = 16  # Batch size

In [None]:
losses = []  # List to store training loss values
for epoch in range(n_epochs):
    permutation = torch.randperm(train_images.shape[0])  # Shuffle training data
    for i in range(0, permutation.shape[0], batch_size):
        optimizer.zero_grad()  # Clear gradients from previous iteration

        indices = permutation[i:i+batch_size]  # Select batch indices
        x, y = train_images[indices], train_labels[indices]  # Get batch data

        # Move data to GPU
        x = x.to(device)
        y = y.to(device)

        # Forward pass
        logits = model(x.permute(0, 3, 1, 2))  # Rearrange image dimensions
        loss = loss_function(logits, y)  # Compute loss
        losses.append(loss.item())  # Store loss value

        # Backpropagation
        loss.backward()
        optimizer.step()  # Update model weights

In [None]:
# Plot the training losses over time
plt.plot(losses)
plt.title('Training Loss')
plt.xlabel('Iterations')
plt.ylabel('Loss')
plt.show()

In [None]:
# Final evaluation on the validation set
model.eval()  # Set the model to evaluation mode
val_losses = []  # List to store validation losses
for i in range(val_images.shape[0]):
    x = val_images[i:i+1]
    y = val_labels[i:i+1]

    # Move data to GPU
    x = x.to(device)
    y = y.to(device)

    # Forward pass
    logits = model(x.permute(0, 3, 1, 2))  # Rearrange image dimensions
    loss = loss_function(logits, y)  # Compute loss
    val_losses.append(loss.item())  # Append loss to list

In [None]:
# Print average validation loss
print(f'Validation Loss: {sum(val_losses) / len(val_losses)}')

In [None]:
# Save the trained model
torch.save(model.state_dict(), "models/cnn.pt")