# **1-Install required packages**

In [None]:
!pip install torch torchvision torchsummary opencv-python matplotlib numpy scikit-learn albumentations kaggle -q

import os
from tqdm import tqdm
import cv2
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import train_test_split
from torch.amp import GradScaler, autocast
from torchsummary import summary
import time

# **2-Set up Kaggle API for dataset download**

In [None]:
from google.colab import files
print("Upload kaggle.json for API access")
files.upload()  # Upload kaggle.json
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# **3-Download Car Damage Detection YOLO Seg 8k dataset**

In [None]:
print("Downloading Car Damage Detection YOLO Seg 8k dataset...")
!kaggle datasets download -d chiayinlee/car-damage-detection-yolo-seg-8k -p ./car_data
!unzip -q ./car_data/car-damage-detection-yolo-seg-8k.zip -d ./car_data
!rm ./car_data/car-damage-detection-yolo-seg-8k.zip

# **4-Inspect dataset structure**

In [None]:
print("Dataset structure:")
!ls -R ./car_data

# Define dataset paths
TRAIN_IMAGE_ROOT = './car_data/train/images'
TRAIN_LABEL_ROOT = './car_data/train/labels'
VAL_IMAGE_ROOT = './car_data/valid/images'
VAL_LABEL_ROOT = './car_data/valid/labels'

# **5-Collect image paths**

In [None]:
def get_image_paths(root_dir):
    image_paths = []
    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith(('.jpg', '.png')):
                image_paths.append(os.path.join(subdir, file))
    return sorted(image_paths)

train_image_paths = get_image_paths(TRAIN_IMAGE_ROOT)
val_image_paths = get_image_paths(VAL_IMAGE_ROOT)
print(f"Found {len(train_image_paths)} training images and {len(val_image_paths)} validation images")

# **6-Convert YOLO format to binary mask**

In [None]:
def yolo_to_mask(label_path, img_shape):
    """Convert YOLO segmentation annotations to binary mask"""
    mask = np.zeros(img_shape[:2], dtype=np.uint8)
    if not os.path.exists(label_path):
        return mask
    with open(label_path, 'r') as f:
        lines = f.readlines()
    for line in lines:
        parts = line.strip().split()
        # Ignore class_id (combine all classes into one binary mask)
        points = np.array([float(x) for x in parts[1:]]).reshape(-1, 2)  # Normalized coordinates
        points = points * np.array([img_shape[1], img_shape[0]])  # Denormalize to image size
        points = points.astype(np.int32)
        cv2.fillPoly(mask, [points], 1)  # Fill polygon with 1
    return mask

# **7-Match image and label files (handle rf.<hash> naming)**

In [None]:
def get_label_path(img_path, label_root):
    """Find corresponding label file, handling rf.<hash> naming"""
    img_id = os.path.basename(img_path).split('.rf.')[0]  # Extract base name (e.g., Img64_jpg)
    for label_file in os.listdir(label_root):
        if label_file.startswith(img_id) and label_file.endswith('.txt'):
            return os.path.join(label_root, label_file)
    return None  # Return None if no matching label found

# **8-Visualize sample images and masks**

In [None]:
def plot_sample(image_path, mask):
    """Display image with its segmentation mask"""
    img = cv2.imread(image_path, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    fig, ax = plt.subplots(1, 2, figsize=(10, 5))
    ax[0].imshow(img)
    ax[0].set_title('Original Image')
    ax[0].axis('off')
    ax[1].imshow(mask, cmap='gray')
    ax[1].set_title('Damage Mask')
    ax[1].axis('off')
    plt.show()

# Show first 3 samples
for img_path in train_image_paths[:3]:
    label_path = get_label_path(img_path, TRAIN_LABEL_ROOT)
    if label_path:
        img = cv2.imread(img_path)
        mask = yolo_to_mask(label_path, img.shape)
        plot_sample(img_path, mask)
    else:
        print(f"No label found for {img_path}")

# **9-Custom Dataset for Car Damage Detection**

In [None]:
class CarDamageDataset(Dataset):
    def __init__(self, image_paths, label_root, transform=None, augment=False):
        self.image_paths = image_paths
        self.label_root = label_root
        self.transform = transform
        self.augment = augment
        self.aug_transform = A.Compose([
            A.HorizontalFlip(p=0.5),
            A.RandomRotate90(p=0.3),
            A.RandomBrightnessContrast(p=0.2),
            A.GaussNoise(p=0.1),
            A.CoarseDropout(max_holes=8, max_height=16, max_width=16, p=0.3)
        ])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label_path = get_label_path(img_path, self.label_root)

        # Load image
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if image is None:
            raise FileNotFoundError(f"Image not found: {img_path}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Load or create empty mask
        mask = yolo_to_mask(label_path, image.shape) if label_path else np.zeros(image.shape[:2], dtype=np.uint8)
        mask = mask.astype(np.float32)

        # Apply augmentations
        if self.augment:
            augmented = self.aug_transform(image=image, mask=mask)
            image, mask = augmented['image'], augmented['mask']

        # Apply transforms
        if self.transform:
            transformed = self.transform(image=image, mask=mask)
            image, mask = transformed['image'], transformed['mask']

        return image, mask

# **10-Define transforms**

In [None]:
base_transform = A.Compose([
    A.Resize(256, 256),  # Resize to fit Colab GPU memory
    A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
    ToTensorV2()
])

# **11-Prepare dataset**

In [None]:
train_dataset = CarDamageDataset(train_image_paths, TRAIN_LABEL_ROOT, transform=base_transform, augment=True)
val_dataset = CarDamageDataset(val_image_paths, VAL_LABEL_ROOT, transform=base_transform, augment=False)

# **12-Create dataloaders**

In [None]:
BATCH_SIZE = 4  # Optimized for Colab free tier
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, pin_memory=True)

def show_batch(dataloader):
    images, masks = next(iter(dataloader))
    plt.figure(figsize=(15, 5))
    for i in range(min(4, BATCH_SIZE)):
        plt.subplot(2, 4, i+1)
        plt.imshow(images[i].permute(1, 2, 0).numpy() * 0.5 + 0.5)  # Unnormalize
        plt.title('Image')
        plt.axis('off')
        plt.subplot(2, 4, i+5)
        plt.imshow(masks[i].squeeze().numpy(), cmap='gray')
        plt.title('Mask')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

print("Training batch samples:")
show_batch(train_loader)

# **13-U-Net Model Definition**

In [None]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels, mid_channels=None):
        super().__init__()
        if not mid_channels:
            mid_channels = out_channels
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(mid_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    def forward(self, x):
        return self.double_conv(x)

class Down(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.maxpool_conv = nn.Sequential(
            nn.MaxPool2d(2),
            DoubleConv(in_channels, out_channels)
        )
    def forward(self, x):
        return self.maxpool_conv(x)

class Up(nn.Module):
    def __init__(self, in_channels, out_channels, bilinear=True):
        super().__init__()
        if bilinear:
            self.up = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
            self.conv = DoubleConv(in_channels, out_channels, in_channels // 2)
        else:
            self.up = nn.ConvTranspose2d(in_channels, in_channels // 2, kernel_size=2, stride=2)
            self.conv = DoubleConv(in_channels, out_channels)
    def forward(self, x1, x2):
        x1 = self.up(x1)
        diffY = x2.size()[2] - x1.size()[2]
        diffX = x2.size()[3] - x1.size()[3]
        x1 = F.pad(x1, [diffX // 2, diffX - diffX // 2, diffY // 2, diffY - diffY // 2])
        x = torch.cat([x2, x1], dim=1)
        return self.conv(x)

class OutConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
    def forward(self, x):
        return self.conv(x)

class UNet(nn.Module):
    def __init__(self, n_channels=3, n_classes=1, bilinear=True):
        super(UNet, self).__init__()
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.bilinear = bilinear
        self.inc = DoubleConv(n_channels, 64)
        self.down1 = Down(64, 128)
        self.down2 = Down(128, 256)
        self.down3 = Down(256, 512)
        factor = 2 if bilinear else 1
        self.down4 = Down(512, 1024 // factor)
        self.up1 = Up(1024, 512 // factor, bilinear)
        self.up2 = Up(512, 256 // factor, bilinear)
        self.up3 = Up(256, 128 // factor, bilinear)
        self.up4 = Up(128, 64, bilinear)
        self.outc = OutConv(64, n_classes)
    def forward(self, x):
        x1 = self.inc(x)
        x2 = self.down1(x1)
        x3 = self.down2(x2)
        x4 = self.down3(x3)
        x5 = self.down4(x4)
        x = self.up1(x5, x4)
        x = self.up2(x, x3)
        x = self.up3(x, x2)
        x = self.up4(x, x1)
        logits = self.outc(x)
        return logits

# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = UNet().to(device)
print(f"Model initialized on {device}")
print(f"Total parameters: {sum(p.numel() for p in model.parameters())/1e6:.2f}M")
summary(model, input_size=(3, 256, 256))

# **14-Loss function combining Dice and BCEWithLogits**

In [None]:
class DiceBCELoss(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, inputs, targets, smooth=1):
        bce = F.binary_cross_entropy_with_logits(inputs, targets, reduction='mean')
        inputs = torch.sigmoid(inputs)
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        intersection = (inputs * targets).sum()
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)
        return bce + dice_loss


# Training components
criterion = DiceBCELoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2, factor=0.5)
scaler = GradScaler()

# **15-Training and validation functions**

In [None]:
def train_epoch(model, loader, optimizer, criterion, scaler):
    model.train()
    running_loss = 0.0
    for images, masks in tqdm(loader, desc="Training"):
        images, masks = images.to(device), masks.to(device)
        optimizer.zero_grad()
        with autocast('cuda'):
            outputs = model(images)
            loss = criterion(outputs, masks.unsqueeze(1))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        running_loss += loss.item()
    return running_loss / len(loader)

def validate(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    iou_score = 0.0
    with torch.no_grad():
        for images, masks in tqdm(loader, desc="Validating"):
            images, masks = images.to(device), masks.to(device)
            with autocast('cuda'):
                outputs = model(images)
                loss = criterion(outputs, masks.unsqueeze(1))
            running_loss += loss.item()
            preds = torch.sigmoid(outputs) > 0.5
            targets = masks.unsqueeze(1).bool()
            intersection = (preds & targets).float().sum()
            union = (preds | targets).float().sum()
            iou_score += (intersection + 1e-6) / (union + 1e-6)
    return running_loss / len(loader), iou_score / len(loader)

# **16-Training loop**

In [None]:
def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, scaler, epochs=10):
    best_iou = 0.0
    history = {'train_loss': [], 'val_loss': [], 'val_iou': []}
    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        start_time = time.time()
        train_loss = train_epoch(model, train_loader, optimizer, criterion, scaler)
        val_loss, val_iou = validate(model, val_loader, criterion)
        scheduler.step(val_loss)
        history['train_loss'].append(float(train_loss))
        history['val_loss'].append(float(val_loss))
        history['val_iou'].append(float(val_iou.cpu()))
        if val_iou > best_iou:
            best_iou = val_iou
            torch.save(model.state_dict(), 'best_model.pth')
            print(f"New best model saved with IoU: {float(val_iou):.4f}")
        print(f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val IoU: {float(val_iou):.4f}")
        print(f"Time: {time.time()-start_time:.2f}s")
    return history

# Start training
history = train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, scaler, epochs=10)

# **17-Plot training history**

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.title('Training History')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history['val_iou'], label='Validation IoU')
plt.title('IoU Score Progress')
plt.xlabel('Epoch')
plt.ylabel('IoU')
plt.legend()
plt.tight_layout()
plt.show()

# **18-Visualize predictions**

In [None]:
def visualize_predictions(dataset, model, num_samples=3):
    plt.figure(figsize=(15, 5*num_samples))
    indices = np.random.choice(len(dataset), num_samples, replace=False)
    for i, idx in enumerate(indices):
        image, mask = dataset[idx]
        image = image.unsqueeze(0).to(device)
        with torch.no_grad():
            pred = torch.sigmoid(model(image)) > 0.5
        plt.subplot(num_samples, 3, i*3+1)
        plt.imshow(image[0].cpu().permute(1, 2, 0).numpy() * 0.5 + 0.5)
        plt.title('Input Image')
        plt.axis('off')
        plt.subplot(num_samples, 3, i*3+2)
        plt.imshow(mask.squeeze().cpu().numpy(), cmap='gray')
        plt.title('Ground Truth')
        plt.axis('off')
        plt.subplot(num_samples, 3, i*3+3)
        plt.imshow(pred.squeeze().cpu().numpy(), cmap='gray')
        plt.title('Prediction')
        plt.axis('off')
    plt.tight_layout()
    plt.show()

print("Validation samples predictions:")
visualize_predictions(val_dataset, model, num_samples=3)

# **19-Export model**

In [None]:
scripted_model = torch.jit.script(model)
scripted_model.save('car_defect_detection.pt')
print("Model exported successfully!")