Custom ResNet18 Model

In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torchvision
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

Set data directory, wanted batch size and the number of epochs for the training process

In [None]:
dataDir = "Dataset"
batchSize = 16
numEpochs = 10

Try to utilizes CUDA-cores for training, otherwise the CPU will be used. The path for the model is also determined

In [None]:
modelPath = "resnet18_screw_classifier.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

The imagetransformation is defined and the images will be resized to 224x224, as ResNet18 was trained on that size.

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [None]:
train_dataset = datasets.ImageFolder(os.path.join(dataDir, "train"), transform=transform)
val_dataset = datasets.ImageFolder(os.path.join(dataDir, "val"), transform=transform)

train_loader = DataLoader(train_dataset, batch_size=batchSize, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=batchSize, shuffle=False, num_workers=4)

num_classes = len(train_dataset.classes)
print("Classes:", train_dataset.classes)

In [None]:
def imshow(img):
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.axis('off')
    plt.show()

# Get some random training images from your custom dataset
dataiter = iter(train_loader)
images, labels = next(dataiter)

# Show images as a grid
imshow(torchvision.utils.make_grid(images))

The model is loaded/downloaded

In [None]:
model = models.resnet18(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, num_classes)
model = model.to(device)

With the model loaded, the training of the ResNet18 model using the images taken can be initialized

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# ----- TRAINING LOOP -----
print("Training...")
for epoch in range(numEpochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        
    print(f"Epoch {epoch+1}/{numEpochs}, Loss: {running_loss/len(train_loader):.4f}")


When the model i trained, the validation images will be used to find the accuracy of the ResNet model

In [None]:
model.eval()
correct = 0
total = 0
all_preds = []
all_labels = []

with torch.no_grad():
    for inputs, labels in val_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Store for confusion matrix
        all_preds.extend(predicted.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

print(f"Validation Accuracy: {100 * correct / total:.2f}%")

To visualize the class prediction a confusion matrix is made

In [None]:
# Confusion matrix
cm = confusion_matrix(all_labels, all_preds)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=train_dataset.classes)

plt.figure(figsize=(10, 8))
disp.plot(cmap='Blues', xticks_rotation=45)
plt.title("Confusion Matrix - Validation Set")
plt.show()

Save the model

In [None]:
torch.save(model.state_dict(), modelPath)
print(f"Model saved to {modelPath}")