In [7]:
on_colab = False

if on_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    path = "/content/drive/My Drive/Raod_Crack_256"
else:
    path = "/home/lorenzo/Documenti/Machine Learning/Computer Vision/Final Project/data/Raod_Crack_256-20241015T162221Z-001/Raod_Crack_256"

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
import torchvision.models as models
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os

class RoadCrackDetectionDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []  # 0: Crack, 1: Pothole
        self.boxes = []   # Bounding box: [x_min, y_min, x_max, y_max]

        for label in ['Cracks', 'Pothole']:
            label_dir = os.path.join(image_dir, label)
            for img_file in os.listdir(label_dir):
                img_path = os.path.join(label_dir, img_file)
                self.image_paths.append(img_path)
                
                if label == 'Cracks':
                    self.labels.append(0)
                    # Bounding box di esempio per una crepa. Deve essere sostituito con dati reali.
                    self.boxes.append([30, 40, 200, 150])  
                elif label == 'Pothole':
                    self.labels.append(1)
                    # Bounding box di esempio per una buca. Deve essere sostituito con dati reali.
                    self.boxes.append([50, 60, 220, 160])  

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert("RGB")
        label = self.labels[idx]
        box = self.boxes[idx]

        if self.transform:
            image = self.transform(image)

        return image, label, torch.tensor(box, dtype=torch.float32)

In [3]:
import torchvision.models.detection

def get_object_detection_model(num_classes):
    # Carichiamo il modello pre-addestrato su COCO
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
    
    # Cambiamo il classificatore per il numero di classi specificato (Cracks + Potholes + background)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
    
    return model

In [4]:
from tqdm import tqdm

def train_detection_model(model, train_loader, num_epochs=10, lr=0.005):
    optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=0.0005)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

    model.train()

    for epoch in range(num_epochs):
        running_loss = 0.0
        progress_bar = tqdm(enumerate(train_loader), total=len(train_loader), desc=f"Epoch {epoch+1}/{num_epochs}")

        for i, (images, labels, boxes) in progress_bar:
            images = list(image for image in images)
            targets = []
            for j in range(len(labels)):
                d = {"boxes": boxes[j].unsqueeze(0), "labels": torch.tensor([labels[j]])}
                targets.append(d)

            # Calcola la loss
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())

            # Ottimizzazione
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            running_loss += losses.item()
            avg_loss = running_loss / (i + 1)

            # Aggiorna la barra di progresso con la loss media
            progress_bar.set_postfix(loss=avg_loss)

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")
        lr_scheduler.step()

In [5]:
def show_predictions(model, test_loader):
    model.eval()
    with torch.no_grad():
        for images, labels, boxes in test_loader:
            images = list(image for image in images)
            predictions = model(images)

            for i in range(len(images)):
                img = images[i].permute(1, 2, 0).numpy()
                plt.imshow(img)

                for box in predictions[i]['boxes']:
                    x_min, y_min, x_max, y_max = box
                    plt.gca().add_patch(plt.Rectangle(
                        (x_min, y_min), x_max - x_min, y_max - y_min, 
                        fill=False, edgecolor='red', linewidth=2
                    ))

                plt.show()
                break

In [9]:
# Trasformazioni delle immagini (normalizzazione e ridimensionamento)
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

# Caricamento del dataset
train_dataset = RoadCrackDetectionDataset(path + '/Training', transform=transform)
test_dataset  = RoadCrackDetectionDataset(path + '/Testing', transform=transform)

train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader  = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

# Inizializzazione del modello
model = get_object_detection_model(num_classes=3)  # Background, Cracks, Potholes

# Addestramento del modello
train_detection_model(model, train_loader, num_epochs=1, lr=0.1)

# Visualizzazione dei risultati
show_predictions(model, test_loader)

Epoch 1/1:   0%|          | 1/741 [00:23<4:44:31, 23.07s/it, loss=1.78]


KeyboardInterrupt: 