In [1]:
import os
import random
from collections import defaultdict

import cv2
import numpy as np
import torch

from tqdm import tqdm
from torch import optim
from torchvision import transforms
from torch.utils.data import DataLoader

from helpers.classifier_with_pretrained_features import Resnext50BasedClassifierForProposals
from helpers.datasets import custom_collate_fn, CrackDataset
from helpers.early_stopping import EarlyStopping

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize
])

In [3]:
proposal_cache = defaultdict(list)


def perform_selective_search(image: np.ndarray, image_path: str, batch_size: int = 70) -> torch.Tensor:
    """
    Perform selective search and return the largest proposals (by area) in three batches after shuffling.
    Cache the proposals based on the image path to avoid redundant computation.
    
    Args:
    - image: The input image as a numpy array.
    - image_path: The file path to the image (used as the cache key).
    - batch_size: The size of each batch (default 50).
    
    Yields:
    - A batch of proposals as a torch.Tensor of shape [batch_size, 4].
    """
    if image_path in proposal_cache:
        for start in range(0, len(proposal_cache[image_path]), batch_size):
            yield torch.tensor(proposal_cache[image_path][start:start + batch_size], dtype=torch.float32).to(device)
    else:
        ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()

        ss.setBaseImage(image)
        ss.switchToSelectiveSearchFast()

        rects = ss.process()
        boxes = []

        for (x, y, w, h) in rects:
            area = w * h

            boxes.append((x, y, x + w, y + h, area))

        boxes = sorted(boxes, key=lambda b: b[4], reverse=True)
        boxes = [(x1, y1, x2, y2) for x1, y1, x2, y2, area in boxes]
        num_proposals = min(len(boxes), 2 * batch_size)
        top_proposals = boxes[:num_proposals]

        random.shuffle(top_proposals)

        proposal_cache[image_path] = top_proposals

        for start in range(0, num_proposals, batch_size):
            yield torch.tensor(top_proposals[start:start + batch_size], dtype=torch.float32).to(device)

In [4]:
def get_loaders() -> tuple[DataLoader, DataLoader]:
    train_images_dir = os.path.join("data", "train_small", "images")
    valid_images_dir = os.path.join("data", "valid_small", "images")
    train_coco_path = os.path.join("data", "train", "coco_annotations.json")
    valid_coco_path = os.path.join("data", "valid", "coco_annotations.json")
    train_dataset = CrackDataset(
        train_coco_path,
        train_images_dir
    )
    valid_dataset = CrackDataset(
        valid_coco_path,
        valid_images_dir
    )
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        collate_fn=custom_collate_fn
    )
    valid_dataloader = DataLoader(
        valid_dataset,
        batch_size=32,
        shuffle=True,
        num_workers=4,
        collate_fn=custom_collate_fn
    )

    return train_dataloader, valid_dataloader

In [5]:
def get_loop_objects() -> tuple[Resnext50BasedClassifierForProposals, EarlyStopping, torch.nn.BCEWithLogitsLoss, optim.Adam]:
    model = Resnext50BasedClassifierForProposals()
    criterion = torch.nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    early_stopping = EarlyStopping(patience=2, verbose=True, delta=0)

    model.to(device)

    return model, early_stopping, criterion, optimizer

In [6]:
num_epochs = 25


def validate(
        model: Resnext50BasedClassifierForProposals,
        valid_loader: DataLoader,
        criterion: torch.nn.BCEWithLogitsLoss,
        history: dict[str, list[float]]
) -> tuple[float, float]:
    model.eval()

    valid_loss = 0.0
    correct_valid = 0
    total_valid = 0

    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device).float()
            outputs = model(images).squeeze(1)
            loss = criterion(outputs, labels)
            valid_loss += loss.item()
            predicted = (outputs > 0.5).float()
            correct_valid += predicted.eq(labels).sum().item()
            total_valid += labels.size(0)

    valid_loss /= len(valid_loader.dataset)
    history["valid_loss"].append(valid_loss)

    return 100. * correct_valid / total_valid, valid_loss


def run_training_loop() -> tuple[dict, float]:
    checkpoint_path = os.path.join("checkpoints", f"resnext50_32x4d_classifier_with_proposals.pt")
    train_loader, valid_loader = get_loaders()
    model, early_stopping, criterion, optimizer = get_loop_objects()
    history = {
        "train_loss": [],
        "valid_loss": []
    }
    valid_accuracy = 0

    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        correct_train = 0
        total_train = 0

        with tqdm(train_loader, unit="batch") as tepoch:
            tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")

            for images, labels in tepoch:
                images = torch.Tensor(images).to(device)
                #images, labels = torch.Tensor(images).to(device), torch.Tensor(labels).to(device).float()

                optimizer.zero_grad()

                outputs: torch.Tensor = model(images)
                # Assuming we have a single label per batch, for example, maximum label
                # Modify based on the specific logic for proposal classification
                loss = criterion(outputs.max(dim=1)[0], labels)

                loss.backward()
                optimizer.step()

                train_loss += loss.item()
                predicted = (outputs > 0.5).float()
                correct_train += (predicted == labels.unsqueeze(1)).sum().item()
                total_train += labels.numel()

                tepoch.set_postfix(loss=train_loss / total_train, accuracy=100. * correct_train / total_train)

        valid_accuracy, valid_loss = validate(model, valid_loader, criterion, history)
        train_loss /= len(train_loader.dataset)
        history["train_loss"].append(train_loss)

        print(f"Validation Loss: {valid_loss:.4f}, Validation Accuracy: {valid_accuracy:.2f}%")
        early_stopping(valid_loss, model, checkpoint_path)

        if early_stopping.early_stop:
            print("Early stopping triggered")
            break

    return history, valid_accuracy

In [7]:
history, valid_accuracy = run_training_loop()

Epoch 1/25:   0%|          | 0/3 [00:14<?, ?batch/s]


ValueError: too many values to unpack (expected 2)