The task is to detect the presence of AAR cracks on the surface of concrete walls. These cracks are quite thin, web-shaped, and subtle to detect precisely.
We have recently extracted a dataset of small image patches (128*128px), which is suitable for binary classification. Data is already split in train/val/test folds.
Note that the patches were extracted from large wall images (which you can deduce from the filenames), and each train/val/test split contains patches from different walls to ensure proper evaluation. Also note that the dataset is unbalanced (about 13% of positive samples), this has to be taken into account for training and evaluation.
I see following tasks:
1. Train at least two different neural network classifiers, one being a CNN (e.g., ResNet, VGG, AlexNet...), and the other being a vision transformer (ViT).
2. Optimize their performance by adjusting the model size, learning parameters, use of pretrained weights, data augmentations, sampling strategies to tackle the imbalance, etc.
3. Visualize and analyze the predictions, in particular the mistakes of the model. For ViT, visualize the attention maps.
As a deliverable, along with the 4 page report and a working code, also include 3 text files with the final predictions of each model over the three data splits.

### Imports

In [6]:
# imports
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

import os
from PIL import Image
import random

### Data Loading

In [None]:
# Image preprocessing transformations
transform = transforms.Compose([
    # Convert images to tensors (necessary for PyTorch models)
    transforms.ToTensor(),  
])

# Loading datasets from specified directories
train_dir = r'C:\Users\nathan\Documents\EPFL\Cours_MA1\ML\ML_course\projects\aar_crack_wall_ds.rar\train'  # Directory containing training images
val_dir = r'C:\Users\nathan\Documents\EPFL\Cours_MA1\ML\ML_course\projects\aar_crack_wall_ds.rar\validation'  # Directory containing validation images

# Create a dataset for the training and validation data with the specified transformations
train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
val_dataset = datasets.ImageFolder(root=val_dir, transform=transform)

# Load the datasets into DataLoader for batching
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)

# Extract one image from class 0 and one from class 1
def get_image_by_label(dataset, label):
    for img, lbl in dataset:
        if lbl == label:
            return img, lbl
    return None, None  # In case no image is found

# Get an image with label 0 and label 1 from the train dataset
image_0, label_0 = get_image_by_label(train_dataset, 0)
image_1, label_1 = get_image_by_label(train_dataset, 1)

# Display the images with their labels
fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(image_0.permute(1, 2, 0))  # Convert tensor shape for display (C, H, W) to (H, W, C)
axes[0].set_title(f"Label: {label_0}")
axes[0].axis('off')

# Display image with label 1
axes[1].imshow(image_1.permute(1, 2, 0))  # Convert tensor shape for display
axes[1].set_title(f"Label: {label_1}")
axes[1].axis('off')

plt.show()




FileNotFoundError: [WinError 3] The system cannot find the path specified: 'C:\\Users\\nathan\\Documents\\EPFL\\Cours_MA1\\ML\\ML_course\\projects\\aar_crack_wall_ds.rar\\train'

In [None]:
# Set device for training (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load a pre-trained ResNet model and modify the final layer for binary classification (2 classes)
model = models.resnet18(pretrained=True)

# Modify the last fully connected layer for 2 classes (binary classification)
model.fc = nn.Linear(model.fc.in_features, 2)

# Move the model to the appropriate device (GPU or CPU)
model = model.to(device)

# Define loss function (CrossEntropyLoss) and optimizer (Adam)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Function to train the model for 2 epochs
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=2):
    for epoch in range(num_epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            # Zero the parameter gradients
            optimizer.zero_grad()

            # Forward pass
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            # Backward pass and optimize
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Print the average loss for the epoch
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

        # Evaluate the model on the validation set
        evaluate_model(model, val_loader)

# Function to evaluate the model
def evaluate_model(model, val_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Validation Accuracy: {accuracy:.2f}%')

# Train the model for 2 epochs
train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=2)

In [None]:
def evaluate_model(model, val_loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    
    with torch.no_grad():  # Turn off gradients for evaluation
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            
            # Forward pass
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)  # Get the class with the highest score
            
            total += labels.size(0)  # Update total number of samples
            correct += (predicted == labels).sum().item()  # Count correct predictions

    accuracy = 100 * correct / total  # Calculate accuracy as a percentage
    print(f'Validation Accuracy: {accuracy:.2f}%')

# After training, evaluate the model on the validation set
evaluate_model(model, val_loader)