In [None]:
import torch  # PyTorch library for deep learning
import torch.nn as nn  # For neural network functionalities
import numpy as np  # For numerical operations
from unet import UNet  # U-Net model
from cnn import CNN  # Custom CNN model
import matplotlib.pyplot as plt  # For plotting
import torchvision.ops as ops  # For image processing operations
import cv2 as cv  # OpenCV library for image manipulation
import os  # For interacting with the file system
import csv  # For writing CSV files

In [None]:
# Set the device to GPU (cuda) for faster processing if available
torch.cuda.set_device(0)
device = torch.device('cuda')  # Ensures using GPU

In [None]:
for file in os.listdir(directory):
    # Read each image
    img = cv.imread(os.path.join(directory, file))
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)  # Convert from BGR to RGB color format
    img = cv.resize(img, (224, 224))  # Resize the image to 224x224
    images.append(img)  # Append the image to the list

In [None]:
# Crop images based on mask bounding boxes
cropped_images = []
for i in range(images.shape[0]):
    # Convert the mask to bounding boxes and crop the images
    points = ops.masks_to_boxes(masks[i].unsqueeze(0)).int().tolist()[0]  # Get bounding box coordinates
    img = images[i][points[1]:points[3], points[0]:points[2]]  # Crop image using bounding box
    img = cv.resize(img.numpy(), (64, 64))  # Resize image to 64x64
    cropped_images.append(img)  # Append cropped image to the list

In [None]:
# Normalize image pixels (RGB channels are normalized with mean and std)
images = np.stack(images) / 255  # Stack images into a numpy array and scale pixel values

In [None]:
# Split the image array into R, G, and B channels
r = images[:, :, :, 0]
g = images[:, :, :, 1]
b = images[:, :, :, 2]

In [None]:
# Normalize each channel using ImageNet's pre-trained values
r = (r - 0.485) / 0.229  # Normalize Red channel
g = (g - 0.456) / 0.224  # Normalize Green channel
b = (b - 0.406) / 0.225  # Normalize Blue channel

In [None]:
# Stack the normalized channels back together
images = np.stack([r, g, b], axis=3)

In [None]:
# Load the pre-trained CNN model
cnn = CNN()  # Instantiate the CNN model
cnn.load_state_dict(torch.load("models/cnn.pt", weights_only=True))  # Load pre-trained weights
cnn.to(device)  # Move the model to the GPU
cnn.eval()  # Set the model to evaluation mode

In [None]:
# Prepare the test dataset
test_set = torch.tensor(images, dtype=torch.float32)  # Convert images to a PyTorch tensor
predictions = []  # List to store predicted labels

In [None]:
# Iterate over the test set and make predictions
for i in range(test_set.shape[0]):
    t = test_set[i:i+1].to(device)  # Get one image and move it to the GPU
    l = cnn(t.permute(0, 3, 1, 2))  # Permute the dimensions of the image (from HWC to CHW)
    predictions.append(torch.argmax(torch.softmax(l, dim=1)).item() + 1)  # Get the predicted class index (add 1 for 1-based indexing)

In [None]:
# Prepare the predictions to be written into a CSV file
files = os.listdir("test")  # List of filenames in the 'test' directory
dictionary = []  # List to store the file names and corresponding predictions
for i in range(len(files)):
    dictionary.append([files[i], predictions[i]])  # Append the file name and prediction to the list

In [None]:
# Write the predictions to a CSV file for submission
with open("submission.csv", mode='w', newline='') as file:
    writer = csv.writer(file)  # Create a CSV writer object
    writer.writerows(dictionary)  # Write the rows to the CSV file