In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import torch
import torchvision.transforms as transforms
from torchvision import models
from PIL import Image
!pip install thop



In [2]:
import time
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader, random_split
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.preprocessing import label_binarize
import os
import copy
import json
import gc  # For memory management

# Helper function for formatting large numbers
def format_units(num):
    """Format large numbers with units (K, M, G, etc.)"""
    magnitude = 0
    while abs(num) >= 1000:
        magnitude += 1
        num /= 1000.0
    return f"{num:.2f} {['', 'K', 'M', 'G', 'T', 'P'][magnitude]}"

# Device Configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Clean up CUDA memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()

# Parameters
batch_size = 16
standard_img_size = 224  # Standard size for most models (DenseNet, ResNeXt, Wide ResNet)
num_epochs = 50
learning_rate = 1e-4
split_ratio = [0.7, 0.15, 0.15]  # 70% training, 15% validation, 15% test

# Dataset Directory
dataset_dir = "/kaggle/input/drone-usat/DIAT-uSAT_dataset"  # Directory for DIAT-uSAT dataset

# Data Transformations
transform = transforms.Compose([
    transforms.Resize((standard_img_size, standard_img_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Function to create data loaders with proper dataset splits and consistent indices
def create_data_loaders(full_dataset):
    # Get a generator with fixed seed for consistent splits
    generator = torch.Generator().manual_seed(42)
    
    # Split into Train, Validation, and Test
    train_size = int(split_ratio[0] * len(full_dataset))
    val_size = int(split_ratio[1] * len(full_dataset))
    test_size = len(full_dataset) - train_size - val_size
    
    train_dataset, val_dataset, test_dataset = random_split(
        full_dataset, 
        [train_size, val_size, test_size],
        generator=generator
    )
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset

# Load Full Dataset
try:
    full_dataset = datasets.ImageFolder(root=dataset_dir, transform=transform)
    print(f"Successfully loaded dataset with {len(full_dataset)} images")
except Exception as e:
    print(f"Error loading dataset: {str(e)}")
    print("Please verify the dataset path and format")
    raise

# Create data loaders
train_loader, val_loader, test_loader, train_dataset, val_dataset, test_dataset = create_data_loaders(full_dataset)

# Print dataset information
print(f"Total number of samples: {len(full_dataset)}")
class_to_idx = full_dataset.class_to_idx
print("Class to index mapping:", class_to_idx)
for class_name, idx in class_to_idx.items():
    class_samples = len([x for x, y in full_dataset.samples if y == idx])
    print(f"Class {class_name}: {class_samples} samples")

# Number of Classes
num_classes = len(full_dataset.classes)
class_names = full_dataset.classes
print(f"Number of classes: {num_classes}")
print(f"Class Names: {class_names}")

# Custom model summary function that works with any architecture
def get_model_summary(model, input_size=(1, 3, 224, 224)):
    """
    A custom function to generate model summary that works with all architectures.
    
    Args:
        model: PyTorch model
        input_size: Input tensor size (batch_size, channels, height, width)
        
    Returns:
        model_info: String with model information
    """
    # Make sure the model is on the same device for summary
    device = next(model.parameters()).device
    model_info = f"Model device: {device}\n"
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    model_info += f"Total Parameters: {total_params:,}\n"
    model_info += f"Trainable Parameters: {trainable_params:,}\n\n"
    
    # Try to get layer info without causing errors
    try:
        # Create a dummy input on the same device as the model
        x = torch.rand(input_size).to(device)
        
        # Record important layers
        model_info += "Key Layers:\n"
        model_info += "-" * 80 + "\n"
        model_info += f"{'Layer Type':<25} {'Parameters':<15}\n"
        model_info += "-" * 80 + "\n"
        
        # Get important modules
        modules = []
        for name, module in model.named_modules():
            if isinstance(module, (nn.Conv2d, nn.Linear, nn.BatchNorm2d, nn.MaxPool2d, 
                                  nn.AdaptiveAvgPool2d)) and not any(name.startswith(n + ".") 
                                                                   for n in [m[0] for m in modules if m[0]]):
                modules.append((name, module))
        
        # Display layer info
        for name, module in modules:
            params = sum(p.numel() for p in module.parameters())
            module_type = module.__class__.__name__
            model_info += f"{module_type:<25} {params:,}\n"
    
    except Exception as e:
        model_info += f"Could not generate detailed layer information due to: {str(e)}\n"
        
    # Add model structure summary (but not the full structure to save memory)
    model_info += "\nModel Architecture Type:\n"
    model_info += str(model.__class__.__name__)
    
    return model_info
        
# Define a function to train and evaluate a model
def train_and_evaluate_model(model_name, model, train_loader, val_loader, test_loader, test_dataset, full_dataset):
    print(f"\n{'='*50}")
    print(f"Training and Evaluating {model_name}")
    print(f"{'='*50}")
    
    # Create a results directory for this model
    results_dir = os.path.join("results", model_name)
    os.makedirs(results_dir, exist_ok=True)
    
    # Move model to device
    model = model.to(device)
    
    # Print model summary using our custom function
    print(f"\n{model_name} Summary:")
    print(get_model_summary(model, input_size=(1, 3, standard_img_size, standard_img_size)))
    
    # Loss and Optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)
    
    # Training Loop
    best_val_acc = 0.0
    history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
    early_stop_counter = 0
    early_stop_patience = 5
    best_model_weights = None
    
    print(f"\nStarting training {model_name}...")
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        
        for i, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            
            # Zero the parameter gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            
            # Free up memory
            del images, labels, outputs
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
            
        avg_train_loss = running_loss / len(train_loader)
        history['train_loss'].append(avg_train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                
                # Free up memory
                del images, labels, outputs, predicted
                if torch.cuda.is_available():
                    torch.cuda.empty_cache()
        
        avg_val_loss = val_loss / len(val_loader)
        val_accuracy = correct / total
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_accuracy)
        
        # Update learning rate scheduler
        scheduler.step(avg_val_loss)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
        
        # Save the best model
        if val_accuracy > best_val_acc:
            best_val_acc = val_accuracy
            best_model_weights = copy.deepcopy(model.state_dict())
            torch.save(model.state_dict(), os.path.join(results_dir, f"{model_name.lower().replace('-', '_')}_best.pth"))
            print(f"Model saved with validation accuracy: {val_accuracy:.4f}")
            early_stop_counter = 0
        else:
            early_stop_counter += 1
        
        # Early stopping
        if early_stop_counter >= early_stop_patience:
            print(f"Early stopping triggered after {epoch+1} epochs")
            break
            
        # Clean up memory
        gc.collect()
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
    
    # Plot training history
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 2, 1)
    plt.plot(history['train_loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Validation Loss')
    plt.legend()
    plt.title(f'{model_name} - Loss Over Epochs')
    
    plt.subplot(1, 2, 2)
    plt.plot(history['val_acc'], label='Validation Accuracy')
    plt.legend()
    plt.title(f'{model_name} - Accuracy Over Epochs')
    plt.savefig(os.path.join(results_dir, f'{model_name.lower().replace("-", "_")}_training_history.png'))
    plt.close()
    
    # Load the best model for evaluation
    model.load_state_dict(best_model_weights)
    model.eval()
    
    # Testing the Model
    print(f"\nEvaluating {model_name} on test set...")
    y_true = []
    y_pred = []
    y_scores = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs.data, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
            y_scores.extend(torch.nn.functional.softmax(outputs, dim=1).cpu().numpy())
            
            # Free up memory
            del images, labels, outputs, preds
            if torch.cuda.is_available():
                torch.cuda.empty_cache()
    
    # Classification Report
    cls_report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
    print(f"\n{model_name} Classification Report:")
    print(classification_report(y_true, y_pred, target_names=class_names))
    
    # Confusion Matrix
    conf_matrix = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.xlabel("Predicted Labels")
    plt.ylabel("True Labels")
    plt.title(f"{model_name} Confusion Matrix")
    plt.savefig(os.path.join(results_dir, f"{model_name.lower().replace('-', '_')}_confusion_matrix.png"))
    plt.close()
    
    # ROC Curve (for multi-class classification)
    roc_auc = None
    if num_classes > 2:
        try:
            y_true_bin = label_binarize(y_true, classes=np.arange(num_classes))
            y_scores_array = np.array(y_scores)
            
            # Calculate ROC AUC
            roc_auc = roc_auc_score(y_true_bin, y_scores_array, multi_class="ovr")
            print(f"\n{model_name} Multi-class ROC AUC Score: {roc_auc:.4f}")
        
            # Plot ROC curves
            plt.figure(figsize=(10, 8))
            for i in range(num_classes):
                fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_scores_array[:, i])
                auc_score = roc_auc_score(y_true_bin[:, i], y_scores_array[:, i])
                plt.plot(fpr, tpr, label=f"Class {class_names[i]} (AUC = {auc_score:.2f})")
        
            plt.plot([0, 1], [0, 1], "k--")
            plt.xlabel("False Positive Rate")
            plt.ylabel("True Positive Rate")
            plt.title(f"{model_name} ROC Curve")
            plt.legend()
            plt.savefig(os.path.join(results_dir, f"{model_name.lower().replace('-', '_')}_roc_curve.png"))
            plt.close()
        except Exception as e:
            print(f"Error generating ROC curve: {str(e)}")
    
    # Inference Time Calculation
    sample_input = torch.randn(1, 3, standard_img_size, standard_img_size).to(device)
    
    # Warm-up runs
    with torch.no_grad():
        for _ in range(10): 
            _ = model(sample_input)
            
    # Actual timing runs
    num_samples = 100
    start_time = time.time()
    with torch.no_grad():
        for _ in range(num_samples):
            _ = model(sample_input)
    inference_time = (time.time() - start_time) / num_samples
    
    # Number of Parameters
    num_params = sum(p.numel() for p in model.parameters())
    
    # FLOPs & MACs Calculation
    try:
        from thop import profile
        flops, macs = profile(model, inputs=(sample_input,), verbose=False)
    except Exception as e:
        print(f"Error calculating FLOPs and MACs: {str(e)}")
        flops, macs = 0, 0
    
    # Convert inference time to milliseconds
    inference_time_ms = inference_time * 1000
    
    print(f"\n{model_name} Total Number of Parameters: {num_params:,}")
    print(f"{model_name} Average Inference Time per Sample: {inference_time_ms:.3f} ms")
    print(f"{model_name} FLOPs: {flops:,} ({format_units(flops)})")
    print(f"{model_name} MACs: {macs:,} ({format_units(macs)})\n")
    
    # Per-class accuracy
    class_accuracy = conf_matrix.diagonal() / conf_matrix.sum(axis=1)
    class_acc_dict = {}
    for i, acc in enumerate(class_accuracy):
        print(f"✅ {model_name} Accuracy for class '{class_names[i]}': {acc:.2%}")
        class_acc_dict[class_names[i]] = float(acc)
    
    # Calculate and display test accuracy with 3 decimal places
    test_correct = sum([1 for i, j in zip(y_true, y_pred) if i == j])
    test_total = len(y_true)
    test_accuracy = test_correct / test_total
    print(f"\n✅ {model_name} Test Set Accuracy: {test_accuracy:.3f}")
    
    # Calculate and display model size in MB
    # Each parameter is typically stored as a 32-bit float (4 bytes)
    model_size_bytes = num_params * 4
    model_size_mb = model_size_bytes / (1024 * 1024)
    print(f"📊 {model_name} Model Size: {model_size_mb:.2f} MB")
    
    # Model characteristics based on model name
    characteristics = []
    if model_name == "DenseNet-121":
        characteristics = [
            "Dense connectivity pattern with direct connections from any layer to all subsequent layers",
            "Excellent feature reuse through dense connections",
            "Requires fewer parameters due to feature reuse",
            "Good performance with reduced overfitting",
            "Efficient gradient flow during training"
        ]
    elif model_name == "ResNeXt-50":
        characteristics = [
            "Extension of ResNet that aggregates residual transformations",
            "Uses split-transform-merge strategy with grouped convolutions",
            "Better performance than ResNet with similar complexity",
            "Higher accuracy-to-computation ratio than many models",
            "Cardinality dimension provides a more effective way to adjust model capacity"
        ]
    elif model_name == "Wide ResNet-50":
        characteristics = [
            "Modification of ResNet with increased width (channel count) and reduced depth",
            "Wider networks often achieve better performance than deeper ones",
            "More efficient to train than very deep networks",
            "Better feature extraction capability with wider channels",
            "Strong classification performance with reasonable computational cost"
        ]
    
    # Print characteristics
    print(f"\nKey characteristics of {model_name}:")
    for char in characteristics:
        print(f"- {char}")
    
    # Save all metrics to a JSON file
    metrics = {
        "model_name": model_name,
        "test_accuracy": float(test_accuracy),
        "inference_time_ms": float(inference_time_ms),
        "model_size_mb": float(model_size_mb),
        "parameters": int(num_params),
        "flops": int(flops),
        "macs": int(macs),
        "roc_auc_score": float(roc_auc) if roc_auc is not None else None,
        "per_class_accuracy": class_acc_dict,
        "classification_report": cls_report,
        "characteristics": characteristics
    }
    
    with open(os.path.join(results_dir, f"{model_name.lower().replace('-', '_')}_metrics.json"), "w") as f:
        json.dump(metrics, f, indent=4)
    
    print(f"\nMetrics saved to {os.path.join(results_dir, model_name.lower().replace('-', '_') + '_metrics.json')}")
    
    # Clean up memory
    del best_model_weights
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        
    return model

# Train models sequentially with improved memory handling
try:
    # Create the results directory
    if not os.path.exists("results"):
        os.makedirs("results")
        
    # 1. DenseNet-121
    print("\n\n" + "="*80)
    print("TRAINING DENSENET-121")
    print("="*80)
    densenet = models.densenet121(weights=models.DenseNet121_Weights.IMAGENET1K_V1)
    densenet.classifier = nn.Linear(densenet.classifier.in_features, num_classes)
    train_and_evaluate_model('DenseNet-121', densenet, train_loader, val_loader, test_loader, test_dataset, full_dataset)
    
    # Clear memory before next model
    del densenet
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # 2. ResNeXt-50
    print("\n\n" + "="*80)
    print("TRAINING RESNEXT-50")
    print("="*80)
    resnext = models.resnext50_32x4d(weights=models.ResNeXt50_32X4D_Weights.IMAGENET1K_V1)
    resnext.fc = nn.Linear(resnext.fc.in_features, num_classes)
    train_and_evaluate_model('ResNeXt-50', resnext, train_loader, val_loader, test_loader, test_dataset, full_dataset)
    
    # Clear memory before next model
    del resnext
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    
    # 3. Wide ResNet-50
    print("\n\n" + "="*80)
    print("TRAINING WIDE RESNET-50")
    print("="*80)
    wide_resnet = models.wide_resnet50_2(weights=models.Wide_ResNet50_2_Weights.IMAGENET1K_V1)
    wide_resnet.fc = nn.Linear(wide_resnet.fc.in_features, num_classes)
    train_and_evaluate_model('Wide ResNet-50', wide_resnet, train_loader, val_loader, test_loader, test_dataset, full_dataset)
    
    print("\n\nAll models have been trained and evaluated!")
    print("Results have been saved to individual folders in the 'results' directory")
    
except Exception as e:
    print(f"An error occurred during execution: {str(e)}")
    import traceback
    traceback.print_exc()

Using device: cuda
Successfully loaded dataset with 4849 images
Total number of samples: 4849
Class to index mapping: {'3_long_blade_rotor': 0, '3_short_blade_rotor_1': 1, '3_short_blade_rotor_2': 2, 'Bird': 3, 'Bird+mini-helicopter_1': 4, 'Bird+mini-helicopter_2': 5, 'RC plane_1': 6, 'RC plane_2': 7, 'drone_1': 8, 'drone_2': 9}
Class 3_long_blade_rotor: 799 samples
Class 3_short_blade_rotor_1: 400 samples
Class 3_short_blade_rotor_2: 400 samples
Class Bird: 800 samples
Class Bird+mini-helicopter_1: 415 samples
Class Bird+mini-helicopter_2: 400 samples
Class RC plane_1: 400 samples
Class RC plane_2: 400 samples
Class drone_1: 400 samples
Class drone_2: 435 samples
Number of classes: 10
Class Names: ['3_long_blade_rotor', '3_short_blade_rotor_1', '3_short_blade_rotor_2', 'Bird', 'Bird+mini-helicopter_1', 'Bird+mini-helicopter_2', 'RC plane_1', 'RC plane_2', 'drone_1', 'drone_2']


TRAINING DENSENET-121

Training and Evaluating DenseNet-121

DenseNet-121 Summary:
Model device: cuda:0
Tot

Downloading: "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d-7cdf4587.pth
100%|██████████| 95.8M/95.8M [00:00<00:00, 207MB/s]



Training and Evaluating ResNeXt-50

ResNeXt-50 Summary:
Model device: cuda:0
Total Parameters: 23,000,394
Trainable Parameters: 23,000,394

Key Layers:
--------------------------------------------------------------------------------
Layer Type                Parameters     
--------------------------------------------------------------------------------
Conv2d                    9,408
BatchNorm2d               128
MaxPool2d                 0
Conv2d                    8,192
BatchNorm2d               256
Conv2d                    4,608
BatchNorm2d               256
Conv2d                    32,768
BatchNorm2d               512
Conv2d                    16,384
BatchNorm2d               512
Conv2d                    32,768
BatchNorm2d               256
Conv2d                    4,608
BatchNorm2d               256
Conv2d                    32,768
BatchNorm2d               512
Conv2d                    32,768
BatchNorm2d               256
Conv2d                    4,608
BatchNorm2d         

Downloading: "https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth" to /root/.cache/torch/hub/checkpoints/wide_resnet50_2-95faca4d.pth
100%|██████████| 132M/132M [00:08<00:00, 16.9MB/s] 



Training and Evaluating Wide ResNet-50

Wide ResNet-50 Summary:
Model device: cuda:0
Total Parameters: 66,854,730
Trainable Parameters: 66,854,730

Key Layers:
--------------------------------------------------------------------------------
Layer Type                Parameters     
--------------------------------------------------------------------------------
Conv2d                    9,408
BatchNorm2d               128
MaxPool2d                 0
Conv2d                    8,192
BatchNorm2d               256
Conv2d                    147,456
BatchNorm2d               256
Conv2d                    32,768
BatchNorm2d               512
Conv2d                    16,384
BatchNorm2d               512
Conv2d                    32,768
BatchNorm2d               256
Conv2d                    147,456
BatchNorm2d               256
Conv2d                    32,768
BatchNorm2d               512
Conv2d                    32,768
BatchNorm2d               256
Conv2d                    147,456
BatchN