# Train A Shape Classifier Model



In [None]:
import json
import os

train_data_root = "../datasets/train"
test_data_root = "../datasets/test"

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transformations (including resizing and normalization)
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),  # Convert to grayscale (black and white images)
    transforms.Resize((64, 64)),  # Resize images to 64x64 pixels
    transforms.ToTensor(),  # Convert the image to a tensor
    transforms.Normalize((0.5,), (0.5,))  # Normalize the images (mean=0.5, std=0.5 for grayscale)
])

# Load the dataset
train_dataset = datasets.ImageFolder(root=train_data_root, transform=transform)
test_dataset = datasets.ImageFolder(root=test_data_root, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Check class names (optional)
print(f'Classes: {train_dataset.classes}')

# 2. Define a simple CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(32 * 16 * 16, 128)
        self.fc2 = nn.Linear(128, 3)  # 3 classes: circle, triangle, rectangle
        
    def forward(self, x):
        x = F.relu(self.conv1(x))   # First Conv Layer
        x = F.max_pool2d(x, 2)      # Max Pooling
        x = F.relu(self.conv2(x))   # Second Conv Layer
        x = F.max_pool2d(x, 2)      # Max Pooling
        x = x.view(x.size(0), -1)   # Flatten
        x = F.relu(self.fc1(x))     # Fully Connected Layer 1
        x = self.fc2(x)             # Fully Connected Layer 2 (output)
        return x

model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()   #set to train mode
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Statistics
            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, Accuracy: {accuracy:.2f}%')


# Train

In [None]:
train_model(model, train_loader, criterion, optimizer, epochs=15)

# Test 

In [None]:
# def test(model, test_loader):
#     """Print the Precision, Recall and F1-score for the trained model
#     """
#     pass

from typing_extensions import Doc, Annotated
from sklearn.metrics import classification_report

# precision, recall and f1-score (classification) --> using the classification_report 



def test(model: Annotated[torch.nn.Module, Doc("Model wants to be tested")],
         test_loader: Annotated[torch.utils.data.DataLoader, Doc('Test data loader')]) -> dict:
    """Evaluate model performance and return classification metrics

    Args:
        model (Annotated[torch.nn.Module, Doc): Trained Pytorch model
        test_loader (Annotated[torch.utils.data.DataLoader, Doc): DataLoader(test set)

    Returns:
        dict: classification metrics
    """
    
    # set to evaluation
    model.eval()
    
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # create a empty list for predictions and correct answer
    predictions = []
    true_labels = []
    
    for images, labels in test_loader:
        images = images.to(device)
        
        # run the image through the model to get predictions
        
        outputs = model(images)
        
        #get the highest prob prediction for each image
        _, predicted = torch.max(outputs, 1)
        
        #add to list of predictions
        predictions.extend(predicted.numpy())
        
        #add to list of labels
        true_labels.extend(labels.numpy())
    
    report = classification_report(
        true_labels,
        predictions,
        target_names=test_loader.dataset.classes,   # use actual class names ('circle', 'diamond', 'triangle')
        output_dict=True   
    ) 
    
    for class_name in test_loader.dataset.classes:
        metric = report[class_name]
        
        # print the classes name
        print(f"Class Name: {class_name}")
        
        #Precision 
        print(f"Precision: {metric['precision']:.4f}")
        
        #Recall
        print(f"Recall: {metric['recall']:.4f}")
        
        #f1-score
        print(f"F1-score: {metric['f1-score']:.4f}")
    
    return report

In [None]:
# test the test function
print(test(model, test_loader))

# Show Predictions


In [None]:
# def show_prediction(model, image):
#     """Pass the image to the model and overlay the predicted shape and confidence on the input
#     image and display it
#     """
#     pass

from typing_extensions import Union
from PIL import Image
import matplotlib.pyplot as plt 

def show_prediction(model: Annotated[torch.nn.Module, Doc("Model wants to show predictions")],
                    image: Annotated[Union[str, Image.Image], Doc("Path or Image")]) -> None:
    """Predict class for an image and display results

    Args:
        model (Annotated[torch.nn.Module, Doc): Trained Pytorch Model
        image (Annotated[Union[str, Image.Image], Doc): input image to test
    """
    
    # convert the path
    if isinstance(image, str):
        image = Image.open(image)
    
    #set to eval mode
    model.eval()
    
    # Set device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    #take a copy of original image
    original_image = image.copy()
    
    # Define transformations (including resizing and normalization)
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convert to grayscale (black and white images)
        transforms.Resize((64, 64)),  # Resize images to 64x64 pixels
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize((0.5,), (0.5,))  # Normalize the images (mean=0.5, std=0.5 for grayscale)
])
    
    #process the image and add batch dimension
    image_tensor = transform(image).unsqueeze(0).to(device)
    
    #run image
    outputs = model(image_tensor)
    
    #convert raw output to prob(0-1)
    probs = F.softmax(outputs, dim=1)
    
    #get the confidence and predicted
    confidence, predicted = torch.max(probs, 1)
    
    #convert confidence to normal number
    confidence_score = confidence.item()
    
    #get the number of possible shapes from models
    class_names = model.fc2.out_features
    
    # create a display 
    plt.figure(figsize=(8,8))
    
    #show the image 
    plt.imshow(original_image)
    
    # add title
    plt.title(f"Predict {predicted} \nConfidence: {confidence_score:.1%}", fontsize=12, pad=10)

In [None]:
print(show_prediction(model, "test.png"))