In [26]:
# imports
import os
import torch
from torch import flatten
import torch.nn as nn
from torch.nn import ReLU
from torch.nn import LogSoftmax
import torchvision.transforms as transforms
import torch.optim as optim
from PIL import Image
import numpy as np

# AS USED IN TRAINING
class CNN(nn.Module):
    """TODO: Refine the Convolution Neural Network"""
    def __init__(self, in_channel=3, num_classes=200):
        super(CNN, self).__init__()
        # first set of convolution layer
        self.conv1 = nn.Conv2d(in_channel, out_channels=8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu1 = ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=(2, 2),stride=(2,2))

        # second set of convolution layer
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.relu2 = ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=(2, 2),stride=(2,2))

        self.dropout1 = nn.Dropout2d(0.25)
        self.dropout2 = nn.Dropout2d(0.5)

        # first set of fc layer
        self.fc1 = nn.Linear(16 * 8 * 8, out_features = 500)
        self.relu3 = ReLU()

        # the softmax classifier
        self.fc2 = nn.Linear(in_features = 500, out_features = num_classes)
        self.logSoftmax = LogSoftmax(dim = 1)

    def forward(self, x):
        # pass the input through the first layer
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pool1(x)

        # pass output from layer 1 t layer 2
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pool2(x)

        # flatten the output and pass it to the nxt layer
        x = flatten(x,1)
        x = self.fc1(x)
        x = self.relu3(x)

        # finally, pass output to the softmax classifier to make predictions
        x = self.fc2(x)
        output = self.logSoftmax(x)

        return output

    

In [27]:
def preprocess_image(image_path):
    """Preprocess the image to prepare it for model input."""
    my_transforms = transforms.Compose([
        transforms.Resize((36, 36)), 
        
        # randomly crop the center of he image and flip it with some degree, to add more generalizability
        transforms.RandomCrop((32, 32)), 
        transforms.RandomHorizontalFlip(p=0.5),

        transforms.ToTensor(),           
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  
    ])
    image = Image.open(image_path).convert('RGB')  
    return my_transforms(image).unsqueeze(0)  

def predict(image_path, model, class_names):
    """Predict the class of a given image."""
    input_tensor = preprocess_image(image_path)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)
    input_tensor = input_tensor.to(device)

    with torch.no_grad():
        outputs = model(input_tensor)
        _, predicted_class = outputs.max(1)  
    
    return class_names[predicted_class.item()], predicted_class.item()


# Load Class Dictionary
class_dictionary = np.load("aml-2024-feather-in-focus/class_names.npy", allow_pickle=True).item()
class_names = [name.split('.', 1)[1] for name in class_dictionary.keys()]

# Image Folder
image_folder = "aml-2024-feather-in-focus/train_images/train_images/"

# Initialize Model (ensure the number of classes matches your class dictionary)
model = CNN(in_channel=3, num_classes=len(class_names))

# Iterate through images
for image_name in os.listdir(image_folder):
    if image_name.endswith('.jpg'):
        image_path = os.path.join(image_folder, image_name)

        # Predict
        predicted_class, predicted_class_number = predict(image_path, model, class_names)

        # Output result
        print(f"Image: {image_name} -> Predicted Class: {predicted_class}, Class Number: {predicted_class_number + 1}")


Image: 1.jpg -> Predicted Class: Pied_billed_Grebe, Class Number: 52
Image: 10.jpg -> Predicted Class: Indigo_Bunting, Class Number: 14
Image: 100.jpg -> Predicted Class: Gray_crowned_Rosy_Finch, Class Number: 34
Image: 1000.jpg -> Predicted Class: Indigo_Bunting, Class Number: 14
Image: 1001.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1002.jpg -> Predicted Class: Indigo_Bunting, Class Number: 14
Image: 1003.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1004.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1005.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1006.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1007.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1008.jpg -> Predicted Class: Gray_crowned_Rosy_Finch, Class Number: 34
Image: 1009.jpg -> Predicted Class: Indigo_Bunting, Class Number: 14
Image: 101.jpg -> Predicted Class: Horned_Grebe, Class Number: 51
Image: 1010.jpg -> Predicted Class

In [28]:
# load model
"""model = load_model(checkpoint_path, num_classes=len(class_names))

for i in range(1,1000):
    image_path = os.path.join(image_folder, f"{i}.jpg")

    if not os.path.exists(image_path):
        print(f"Image {image_path} not found, skipping.")
        continue

    predicted_class, predicted_class_number = predict(image_path, model, class_names)
    print(f"Image: {image_path} -> Predicted Class: {predicted_class}, Class Number: {predicted_class_number + 1}")

    """

'model = load_model(checkpoint_path, num_classes=len(class_names))\n\nfor i in range(1,1000):\n    image_path = os.path.join(image_folder, f"{i}.jpg")\n\n    if not os.path.exists(image_path):\n        print(f"Image {image_path} not found, skipping.")\n        continue\n\n    predicted_class, predicted_class_number = predict(image_path, model, class_names)\n    print(f"Image: {image_path} -> Predicted Class: {predicted_class}, Class Number: {predicted_class_number + 1}")\n\n    '