In [1]:
## 01_data_prep_and_baseline.ipynb

# --- Setup and Imports ---
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets, models
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
import numpy as np

# --- Configuration ---
DATA_DIR = 'dataset'  # Base directory for data
IMAGE_SIZE = 224
BATCH_SIZE = 32
VAL_SPLIT_RATIO = 0.1 
NUM_CLASSES = 1 # Binary classification
LEARNING_RATE = 0.001
NUM_EPOCHS_HEAD = 5
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
IMAGENET_MEAN = [0.485, 0.456, 0.406]  # Standard ImageNet mean/std
IMAGENET_STD = [0.229, 0.224, 0.225]

print(f"Using device: {DEVICE}")

# --- 1. Data Preprocessing and Loading (Goal 1.3) ---

# Define Image Transforms
train_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3), 
    transforms.Resize(256),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15), 
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

# Transforms for Validation/Test Sets (No Augmentation)
test_transforms = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),
    transforms.Resize(256),
    transforms.CenterCrop(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

# Load Datasets
full_train_dataset = datasets.ImageFolder(
    root=f'{DATA_DIR}/train',
    transform=train_transforms # NOTE: Augmentation applied to val split here
)

test_dataset = datasets.ImageFolder(
    root=f'{DATA_DIR}/test',
    transform=test_transforms
)

# Split Training Data
train_size = int((1 - VAL_SPLIT_RATIO) * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4)

print(f"Train/Val/Test Sizes: {len(train_dataset)} / {len(val_dataset)} / {len(test_dataset)}")
print(f"Class Mapping: {full_train_dataset.class_to_idx}")


# --- 2. Model Initialization and Head Training (Goal 1.4) ---

def initialize_model(num_classes):
    # Load pre-trained MobileNetV2
    model = models.mobilenet_v2(weights=models.MobileNet_V2_Weights.IMAGENET1K_V1)

    # Freeze the base layers (Feature Extractor)
    for param in model.parameters():
        param.requires_grad = False
            
    # Replace the final fully connected layer (the "head")
    num_ftrs = model.classifier[1].in_features
    model.classifier[1] = nn.Sequential(
        nn.Linear(num_ftrs, num_classes),
        nn.Sigmoid() # Sigmoid for binary output
    )

    return model.to(DEVICE)

def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    
    print(f"Starting training for {num_epochs} epochs...")

    for epoch in range(num_epochs):
        # Training Phase
        model.train()
        running_loss = 0.0
        train_iter = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", leave=False)
        
        for inputs, labels in train_iter:
            inputs = inputs.to(DEVICE)
            labels = labels.float().unsqueeze(1).to(DEVICE) 
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)
            train_iter.set_postfix({'loss': loss.item()})

        epoch_loss = running_loss / len(train_loader.dataset)
        
        # Validation Phase
        model.eval()
        val_running_loss = 0.0
        corrects = 0
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs = inputs.to(DEVICE)
                labels = labels.float().unsqueeze(1).to(DEVICE)
                outputs = model(inputs)
                val_running_loss += criterion(outputs, labels).item() * inputs.size(0)
                preds = (outputs > 0.5).int() 
                corrects += torch.sum(preds == labels.int().data)

        val_epoch_loss = val_running_loss / len(val_loader.dataset)
        val_acc = corrects.double() / len(val_loader.dataset)

        print(f"Epoch {epoch+1} Complete. Train Loss: {epoch_loss:.4f}, Val Loss: {val_epoch_loss:.4f}, Val Acc: {val_acc:.4f}")
        
    print("Baseline Head Training Complete.")
    return model

# Initialize Model, Loss, and Optimizer
baseline_model = initialize_model(NUM_CLASSES)
criterion = nn.BCELoss() # Binary Cross-Entropy
optimizer = optim.Adam(baseline_model.parameters(), lr=LEARNING_RATE) 

# Start Training the Head
final_model_head_trained = train_model(
    baseline_model, 
    train_loader, 
    val_loader, 
    criterion, 
    optimizer,
    NUM_EPOCHS_HEAD
)

# Save the model artifact
torch.save(final_model_head_trained.state_dict(), 'model_artifact_phase1_head_trained.pt')
print("Model head trained and saved.")

Using device: cpu
Train/Val/Test Sizes: 5969 / 664 / 715
Class Mapping: {'def_front': 0, 'ok_front': 1}
Starting training for 5 epochs...


                                                                                

Epoch 1 Complete. Train Loss: 0.3947, Val Loss: 0.2663, Val Acc: 0.9217


                                                                                

Epoch 2 Complete. Train Loss: 0.2572, Val Loss: 0.2376, Val Acc: 0.9142


                                                                                

Epoch 3 Complete. Train Loss: 0.2295, Val Loss: 0.1869, Val Acc: 0.9443


                                                                                 

Epoch 4 Complete. Train Loss: 0.2174, Val Loss: 0.1619, Val Acc: 0.9458


                                                                                    

Epoch 5 Complete. Train Loss: 0.1911, Val Loss: 0.1587, Val Acc: 0.9548
Baseline Head Training Complete.
Model head trained and saved.
