# Vehicle Damage Detection System - Improved Version V6

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torchvision import models, transforms
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import os
from collections import Counter

# Configuration
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 32
NUM_EPOCHS = 50
MIN_SAMPLES_PER_CLASS = 20  # Increased from 15 to 20

# Label mappings (unchanged from previous version)
label_to_cls_piezas = {...}
label_to_cls_danos = {...}
label_to_cls_sugerencia = {...}

## Enhanced Dataset Class with Improved Class Filtering

In [2]:
class EnhancedVehicleDamageDataset(Dataset):
    def __init__(self, csv_path, img_dir, transform=None):
        self.data = pd.read_csv(csv_path, sep='|')
        self.img_dir = img_dir
        self.transform = transform
        
        # Filter rare classes and group some vehicle parts
        self._filter_and_group_classes()
        
    def _filter_and_group_classes(self):
        """Filter rare classes and group similar vehicle parts"""
        # Group rare vehicle parts into broader categories
        def group_parts(part_id):
            rare_parts = [4,5,7,8,9,19,20,21,22,23,24,25,27,28,29,30,31,32,33,34,35,36,37,51,52,54,59,60,61,62]
            return 99 if part_id in rare_parts else part_id
            
        self.data['Piezas del Vehículo'] = self.data['Piezas del Vehículo'].apply(group_parts)
        
        # Filter classes with insufficient samples
        for task in ['Tipos de Daño', 'Piezas del Vehículo', 'Sugerencia']:
            class_counts = self.data[task].value_counts()
            valid_classes = class_counts[class_counts >= MIN_SAMPLES_PER_CLASS].index
            self.data = self.data[self.data[task].isin(valid_classes)]
            
    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        
        labels = {
            'damage': torch.tensor(self.data.iloc[idx, 1] - 1, dtype=torch.long),
            'part': torch.tensor(self.data.iloc[idx, 2] - 1, dtype=torch.long),
            'suggestion': torch.tensor(self.data.iloc[idx, 3] - 1, dtype=torch.long)
        }
        
        if self.transform:
            image = self.transform(image)
            
        return image, labels

## Simplified Model Architecture

In [3]:
class SimplifiedDamageClassifier(nn.Module):
    def __init__(self, num_damage_types, num_parts, num_suggestions):
        super().__init__()
        
        # Simpler backbone
        self.backbone = models.resnet18(pretrained=True)
        in_features = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        
        # Shared layers with more regularization
        self.shared = nn.Sequential(
            nn.Linear(in_features, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.5)
        )
        
        # Task-specific heads with intermediate layers
        self.damage_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, num_damage_types)
        )
        self.part_head = nn.Sequential(
            nn.Linear(256, 128),
            nn.ReLU(), 
            nn.Linear(128, num_parts)
        )
        self.suggestion_head = nn.Linear(256, num_suggestions)
        
    def forward(self, x):
        features = self.backbone(x)
        shared = self.shared(features)
        
        return {
            'damage': self.damage_head(shared),
            'part': self.part_head(shared),
            'suggestion': self.suggestion_head(shared)
        }

## Enhanced Training Loop with Early Stopping

In [4]:
def train_enhanced_model(model, train_loader, val_loader, num_epochs):
    # Class-weighted loss functions
    damage_weights = get_class_weights(train_dataset, 'damage')
    part_weights = get_class_weights(train_dataset, 'part')
    suggestion_weights = get_class_weights(train_dataset, 'suggestion')
    
    criterion = {
        'damage': nn.CrossEntropyLoss(weight=damage_weights),
        'part': nn.CrossEntropyLoss(weight=part_weights),
        'suggestion': nn.CrossEntropyLoss(weight=suggestion_weights)
    }
    
    # Optimizer with weight decay
    optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
    
    # Learning rate scheduler with warmup
    scheduler = optim.lr_scheduler.OneCycleLR(
        optimizer,
        max_lr=1e-3,
        steps_per_epoch=len(train_loader),
        epochs=num_epochs
    )
    
    # Early stopping
    best_val_loss = float('inf')
    patience = 5
    patience_counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        
        for inputs, labels in train_loader:
            inputs = inputs.to(DEVICE)
            labels = {k: v.to(DEVICE) for k, v in labels.items()}
            
            optimizer.zero_grad()
            outputs = model(inputs)
            
            # Weighted multi-task loss
            loss = 0.4 * criterion['damage'](outputs['damage'], labels['damage']) + \
                   0.4 * criterion['part'](outputs['part'], labels['part']) + \
                   0.2 * criterion['suggestion'](outputs['suggestion'], labels['suggestion'])
            
            loss.backward()
            # Gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            running_loss += loss.item()
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_metrics = evaluate_enhanced_model(model, val_loader)
        
        # Print epoch statistics
        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Loss: {running_loss/len(train_loader):.4f}')
        for task in val_metrics:
            print(f'{task} Accuracy: {val_metrics[task]["accuracy"]:.4f}')
            print(f'{task} F1-Score: {val_metrics[task]["f1"]:.4f}')
        
        # Early stopping check
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch+1}')
                break
    
    return model

## Enhanced Evaluation with Comprehensive Metrics

In [5]:
def evaluate_enhanced_model(model, loader):
    """Enhanced evaluation with comprehensive metrics"""
    model.eval()
    metrics = {}
    
    with torch.no_grad():
        for task in ['damage', 'part', 'suggestion']:
            all_preds = []
            all_labels = []
            
            for inputs, labels in loader:
                inputs = inputs.to(DEVICE)
                outputs = model(inputs)
                
                _, preds = torch.max(outputs[task], 1)
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels[task].cpu().numpy())
            
            # Calculate multiple metrics
            metrics[task] = {
                'accuracy': accuracy_score(all_labels, all_preds),
                'precision': precision_score(all_labels, all_preds, average='weighted', zero_division=0),
                'recall': recall_score(all_labels, all_preds, average='weighted', zero_division=0),
                'f1': f1_score(all_labels, all_preds, average='weighted', zero_division=0)
            }
            
            # Generate detailed classification report
            print(f'\n=== {task.upper()} Evaluation ===')
            print(classification_report(
                all_labels, 
                all_preds, 
                target_names=[label_to_cls_danos[i+1] if task=='damage' else 
                             label_to_cls_piezas[i+1] if task=='part' else
                             label_to_cls_sugerencia[i+1] for i in np.unique(all_labels)],
                zero_division=0
            ))
            
            # Plot confusion matrix
            cm = confusion_matrix(all_labels, all_preds)
            plt.figure(figsize=(10,8))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
            plt.title(f'Confusion Matrix - {task}')
            plt.ylabel('True Label')
            plt.xlabel('Predicted Label')
            plt.show()
    
    return metrics

## Main Execution

In [6]:
# Data transforms with enhanced augmentation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224, scale=(0.4, 1.0)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.2),
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
        transforms.RandomRotation(45),
        transforms.RandomPerspective(distortion_scale=0.3, p=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

# Create datasets
train_dataset = EnhancedVehicleDamageDataset(
    'data/fotos_siniestros/datasets/train.csv',
    'data/fotos_siniestros/',
    data_transforms['train']
)

val_dataset = EnhancedVehicleDamageDataset(
    'data/fotos_siniestros/datasets/val.csv',
    'data/fotos_siniestros/',
    data_transforms['val']
)

# Create data loaders with balanced sampling
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    sampler=WeightedRandomSampler(
        weights=get_sample_weights(train_dataset),
        num_samples=len(train_dataset),
        replacement=True
    ),
    num_workers=4
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4
)

# Initialize model
model = SimplifiedDamageClassifier(
    num_damage_types=len(label_to_cls_danos),
    num_parts=len(label_to_cls_piezas),
    num_suggestions=len(label_to_cls_sugerencia)
).to(DEVICE)

# Train model
trained_model = train_enhanced_model(model, train_loader, val_loader, NUM_EPOCHS)

# Save model
torch.save(trained_model.state_dict(), 'enhanced_damage_classifier_by_blackboxAI.pth')

NameError: name 'get_sample_weights' is not defined