In [13]:
import os
import pathlib
import pprint

import tqdm
import numpy as np
import pandas as pd
import neptune

from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim 
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torchvision import models
from torch.utils.data import DataLoader
from torchsummary import summary
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.manual_seed(42)

<torch._C.Generator at 0x1a5f8c0e3f0>

In [2]:
image_folder = ImageFolder(root="Rsp2023Train")

In [10]:
# Set the root directory where the image data is located
root = 'Rsp2023Train'
apply_data_augmentation = True
classes = sorted(os.listdir(root))
# Define the transformation to apply to the images
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),   # convert to grayscale
    transforms.Resize((100, 150)),  # Resize the images to a consistent size
    transforms.ToTensor(),           # Convert images to tensors
    transforms.Normalize(mean=[0.5], std=[0.225])  # Normalize the images
])

augmentation_transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.RandomResizedCrop(300),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation((-5, 5)),
    transforms.Resize((20, 30)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.225])
])

# Create data loaders for training and validation sets with data augmentation
transform = augmentation_transform if apply_data_augmentation else transform

# Load the dataset using ImageFolder
dataset = ImageFolder(root=root, transform=augmentation_transform)
num_classes = len(dataset.classes)

# Define the split ratio for training and validation
train_ratio = 0.8  # 80% of the data for training, 20% for validation
dataset_size = len(dataset)
train_size = int(train_ratio * dataset_size)
val_size = dataset_size - train_size

# Split the dataset into training and validation sets
train_set, val_set = torch.utils.data.random_split(dataset, [train_size, val_size])

# Print the sizes of the training and validation sets
print(f"Training set size: {len(train_set)}")
print(f"Validation set size: {len(val_set)}")
print(f"Number of classes: {num_classes}")


# Define the batch size for training and validation
batch_size = 64

# get number of workers on current machine
num_workers_available = torch.multiprocessing.cpu_count()
# Create data loaders for training and validation sets
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)  #, num_workers=num_workers_available)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)  #, num_workers=num_workers_available)


Training set size: 3967
Validation set size: 992
Number of classes: 4


In [11]:
class LeNet(nn.Module):
    def __init__(self, numChannels, classes):
        # call the parent constructor
        super(LeNet, self).__init__()
        # initialize first set of CONV => RELU => POOL layers
        self.conv1 = nn.Conv2d(in_channels=numChannels, out_channels=20,
            kernel_size=(5, 5))
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize second set of CONV => RELU => POOL layers
        self.conv2 = nn.Conv2d(in_channels=20, out_channels=50,
            kernel_size=(5, 5))
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        # initialize first (and only) set of FC => RELU layers
        self.fc1 = nn.Linear(in_features=800, out_features=500)
        self.relu3 = nn.ReLU()
        # initialize our softmax classifier
        self.fc2 = nn.Linear(in_features=500, out_features=classes)
        self.logSoftmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = torch.flatten(x, 1)
        # Calculate the input size for the first fully connected layer
        fc1_input_size = x.size(1)
        self.fc1 = nn.Linear(in_features=fc1_input_size, out_features=500)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        output = self.logSoftmax(x)
        return output


# Create an instance of the LeNet model
model = LeNet(numChannels=1, classes=num_classes)

# model = get_model(num_classes=num_classes)
model = model.to(device)
print(summary(model, (1, 224, 224)))
print(model)

# Define the loss function
criterion = nn.CrossEntropyLoss()
print(criterion)

# Define the optimizer with weight decay
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
print(optimizer)

Layer (type:depth-idx)                   Output Shape              Param #
├─Conv2d: 1-1                            [-1, 20, 220, 220]        520
├─ReLU: 1-2                              [-1, 20, 220, 220]        --
├─MaxPool2d: 1-3                         [-1, 20, 110, 110]        --
├─Conv2d: 1-4                            [-1, 50, 106, 106]        25,050
├─ReLU: 1-5                              [-1, 50, 106, 106]        --
├─MaxPool2d: 1-6                         [-1, 50, 53, 53]          --
├─ReLU: 1-7                              [-1, 500]                 --
├─Linear: 1-8                            [-1, 4]                   2,004
├─LogSoftmax: 1-9                        [-1, 4]                   --
Total params: 27,574
Trainable params: 27,574
Non-trainable params: 0
Total mult-adds (M): 305.10
Input size (MB): 0.19
Forward/backward pass size (MB): 11.67
Params size (MB): 0.11
Estimated Total Size (MB): 11.97
Layer (type:depth-idx)                   Output Shape              Param

In [12]:
# Set the number of training epochs
num_epochs = 10

# Initialize variables for early stopping
best_val_loss = float('inf')
best_epoch = 0
patience = 3
counter = 0

# log the run
run = neptune.init_run(
    project='DLO-U8', tags=['train', 'pytorch', 'rsp2023'],
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI4Y2M2OTQxMC02ODFlLTQ4MTMtYWM5Yy1jNWIyNThmZWIyZjAifQ==",
)

run["dataloader_params"] = {
    "train_ratio": train_ratio,
    "train_size": len(train_set),
    "val_size": len(val_set),
    "num_classes": num_classes,
    "classes": pprint.pformat(classes),
    "augmentation": apply_data_augmentation,
}

run["transforms"] = pprint.pformat(transform.__dict__)

run["parameters"] = {
    "learning_rate": optimizer.param_groups[0]["lr"],
    "batch_size": batch_size,
    "epochs": num_epochs,
    "weight_decay": optimizer.param_groups[0]["weight_decay"],
    "patience": patience,
}

run["model_summary"] = pprint.pformat(model, indent=4)
run["criterion_summary"] = pprint.pformat(criterion, indent=4)
run["optimizer_summary"] = pprint.pformat(optimizer, indent=4)
run["data_transforms"] = pprint.pformat(transform, indent=4)
run["augmentation_transforms"] = pprint.pformat(augmentation_transform, indent=4)

# Training loop
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    
    running_loss = 0.0
    correct_predictions = 0
    total_predictions = 0
    
    # Iterate over the training batches
    for images, labels in tqdm.tqdm(train_loader, total=len(train_loader), desc="Train Batch", position=1, leave=False):
        images = images.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Compute training accuracy
        _, predicted = torch.max(outputs.data, 1)
        total_predictions += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()
        
        running_loss += loss.item() * images.size(0)
    
    # Compute average training loss and accuracy
    epoch_loss = running_loss / len(train_set)
    epoch_accuracy = correct_predictions / total_predictions
    
    run["train/loss"].log(epoch_loss)
    run["train/accuracy"].log(epoch_accuracy)
    
    # Evaluate the model on the validation set
    model.eval()  # Set the model to evaluation mode
    
    val_loss = 0.0
    val_correct_predictions = 0
    val_total_predictions = 0
    
    # Disable gradient calculation to speed up the inference
    with torch.no_grad():
        for val_images, val_labels in tqdm.tqdm(val_loader, total=len(val_loader), desc="Val Batch", position=2, leave=False):
            val_images = val_images.to(device)
            val_labels = val_labels.to(device)
            
            val_outputs = model(val_images)
            val_loss += criterion(val_outputs, val_labels).item() * val_images.size(0)
            
            # Compute validation accuracy
            _, val_predicted = torch.max(val_outputs.data, 1)


            _, val_predicted = torch.max(val_outputs.data, 1)
            val_total_predictions += val_labels.size(0)
            val_correct_predictions += (val_predicted == val_labels).sum().item()
    
    # Compute average validation loss and accuracy
    val_loss /= len(val_set)
    val_accuracy = val_correct_predictions / val_total_predictions
    
    run["eval/loss"].log(val_loss)
    run["eval/accuracy"].log(val_accuracy)
    
    # Print training and validation statistics for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}] - "
          f"Train Loss: {epoch_loss:.4f}, Train Accuracy: {epoch_accuracy:.4f} - "
          f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")
    
    # Check if the validation loss has improved
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        counter = 0
    else:
        counter += 1
        
        # Early stopping condition
        if counter == patience:
            print("Early stopping triggered.")
            break


# Defining Labels and Predictions
truelabels = []
predictions = []
model.eval()
print("Getting predictions from test set...")
for data, target in val_loader:
    for label in target.data.numpy():
        truelabels.append(label)
    for prediction in model(data).data.numpy().argmax(1):
        predictions.append(prediction) 

try:
    # Plot the confusion matrix
    cm = confusion_matrix(truelabels, predictions)
    tick_marks = np.arange(len(classes))
    df_cm = pd.DataFrame(cm, index = classes, columns = classes)
    f = plt.figure(figsize = (7,7))
    sns.heatmap(df_cm, annot=True, cmap=plt.cm.Blues, fmt='g')
    plt.xlabel("Predicted Class", fontsize = 20)
    plt.ylabel("True Class", fontsize = 20)
    run["confusion_matrix"].upload(f)
except:
    run["eval_confusion_matrix"].log("Eval Confusion Matrix Error")
    pass

run_id = run["sys/id"].fetch()
run["best_epoch"] = best_epoch
filename = f"model_cnn_{run_id}.pt"
torch.save(model, filename)
run["model_checkpoints/model"].upload(filename)
run.stop()

https://app.neptune.ai/radlfabs/DLO-U8/e/DLOU-20



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Epoch [1/10] - Train Loss: 1.3778, Train Accuracy: 0.2755 - Val Loss: 1.3672, Val Accuracy: 0.2873



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

Epoch [2/10] - Train Loss: 1.3649, Train Accuracy: 0.2730 - Val Loss: 1.3513, Val Accuracy: 0.2843




KeyboardInterrupt: 