In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
        # print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install torch torchvision pillow numpy 



In [3]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("alirezachahardoli/vehicle-detection")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/vehicle-detection


In [4]:
!ls /kaggle/input/vehicle-detection/Vehicle_Detection/

test  train  valid  vehicles.yaml


In [5]:
import os

# Path to your dataset directory
dataset_path = "/kaggle/input/vehicle-detection/Vehicle_Detection"

# List files and directories in the dataset path
for split in ["train", "valid", "test"]:
    split_path = os.path.join(dataset_path, split, "images")
    print(f"{split.upper()} - Number of images: {len(os.listdir(split_path))}")


TRAIN - Number of images: 12000
VALID - Number of images: 2692
TEST - Number of images: 1141


In [6]:
import yaml

# Path to the YAML file
yaml_path = "/kaggle/input/vehicle-detection/Vehicle_Detection/vehicles.yaml"

# Read the YAML file
with open(yaml_path, 'r') as file:
    config = yaml.safe_load(file)

# Display the contents
print("Contents of vehicles.yaml:")
print(config)


Contents of vehicles.yaml:
{'path': '/Users/alireza/Desktop/', 'train': '/Users/alireza/Desktop/', 'val': '/Users/alireza/Desktop/', 'names': {0: 'Bus', 1: 'Truck', 2: 'Motorcycle', 3: 'Car'}}


In [7]:
import yaml

# Define the updated configuration
updated_config = {
    'path': '/kaggle/input/vehicle-detection/Vehicle_Detection',  # Base path to the dataset
    'train': 'train/images',  # Path to training images relative to the base path
    'val': 'valid/images',  # Path to validation images relative to the base path
    'test': 'test/images',  # Path to test images relative to the base path
    'names': {
        0: 'Bus',
        1: 'Truck',
        2: 'Motorcycle',
        3: 'Car'
    }
}

# Path to save the updated YAML file
output_yaml_path = '/kaggle/working/vehicles.yaml'

# Write the updated YAML file
with open(output_yaml_path, 'w') as file:
    yaml.dump(updated_config, file)

print(f"Updated vehicles.yaml saved to {output_yaml_path}")


Updated vehicles.yaml saved to /kaggle/working/vehicles.yaml


In [8]:
# Verify the updated YAML file
with open(output_yaml_path, 'r') as file:
    config = yaml.safe_load(file)

print("Updated YAML contents:")
print(config)


Updated YAML contents:
{'names': {0: 'Bus', 1: 'Truck', 2: 'Motorcycle', 3: 'Car'}, 'path': '/kaggle/input/vehicle-detection/Vehicle_Detection', 'test': 'test/images', 'train': 'train/images', 'val': 'valid/images'}


In [9]:
def verify_dataset(dataset_path):
    splits = ['train', 'valid', 'test']
    for split in splits:
        images_dir = os.path.join(dataset_path, split, 'images')
        labels_dir = os.path.join(dataset_path, split, 'labels')
        
        images = os.listdir(images_dir)
        labels = os.listdir(labels_dir)
        
        print(f"{split.upper()} DATASET:")
        print(f"  Images: {len(images)}")
        print(f"  Labels: {len(labels)}")
        print(f"  Mismatch: {len(images) - len(labels)}\n")

# Run the verification
dataset_path = "/kaggle/input/vehicle-detection/Vehicle_Detection"
verify_dataset(dataset_path)


TRAIN DATASET:
  Images: 12000
  Labels: 12000
  Mismatch: 0

VALID DATASET:
  Images: 2692
  Labels: 2692
  Mismatch: 0

TEST DATASET:
  Images: 1141
  Labels: 1141
  Mismatch: 0



In [10]:
from PIL import Image
import torch

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, label_dir, transforms=None):
        self.image_dir = image_dir
        self.label_dir = label_dir
        self.image_files = sorted([f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))])
        self.label_files = sorted([f for f in os.listdir(label_dir) if f.endswith('.txt')])
        self.transforms = transforms

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Load image
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        img = Image.open(img_path).convert("RGB")
        img_width, img_height = img.size  # Get image dimensions
        
        # Load labels
        label_path = os.path.join(self.label_dir, self.label_files[idx])
        boxes = []
        labels = []
        with open(label_path, "r") as f:
            for line in f.readlines():
                data = list(map(float, line.strip().split()))
                labels.append(int(data[0]))  # Class label
                cx, cy, w, h = data[1:]  # Center coordinates and dimensions
                # Convert [cx, cy, w, h] to [xmin, ymin, xmax, ymax]
                xmin = (cx - w / 2) * img_width
                ymin = (cy - h / 2) * img_height
                xmax = (cx + w / 2) * img_width
                ymax = (cy + h / 2) * img_height
                boxes.append([xmin, ymin, xmax, ymax])



        # Convert to tensors
        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

                # Skip samples without bounding boxes
        if boxes.size(0) == 0:
            return None
        
        # Create target dictionary
        target = {"boxes": boxes, "labels": labels}

        # Apply transformations
        if self.transforms:
            img = self.transforms(img)
        
        return img, target



In [11]:
from torch.utils.data import DataLoader

# Define transformations
import torchvision.transforms as T

transform = T.Compose([
    T.ToTensor(),  # Convert image to PyTorch tensor
])

# Create datasets
dataset = CustomDataset(
    image_dir="/kaggle/input/vehicle-detection/Vehicle_Detection/train/images",
    label_dir="/kaggle/input/vehicle-detection/Vehicle_Detection/train/labels",
    transforms=transform
)

# Split dataset into training and validation sets
indices = torch.randperm(len(dataset)).tolist()
train_dataset = torch.utils.data.Subset(dataset, indices[:-50])
valid_dataset = torch.utils.data.Subset(dataset, indices[-50:])

# Define custom collate function
def collate_fn(batch):
    batch = [b for b in batch if b is not None]
    return tuple(zip(*batch))

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn)
valid_loader = DataLoader(valid_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn)

print(f"Train DataLoader has {len(train_loader)} batches")
print(f"Valid DataLoader has {len(valid_loader)} batches")


Train DataLoader has 747 batches
Valid DataLoader has 4 batches


In [12]:
for images, targets in train_loader:
    print(images[0].shape)  # Should be a tensor with shape (C, H, W)
    print(targets[0]["boxes"])  # Bounding boxes tensor
    print(targets[0]["labels"])  # Labels tensor
    break


torch.Size([3, 640, 640])
tensor([[ 85.,  73., 539., 508.]])
tensor([3])


In [13]:
for images, targets in train_loader:
    print("Image size:", images[0].shape)
    print("Bounding boxes:", targets[0]["boxes"])
    print("Labels:", targets[0]["labels"])
    break  # Remove this to check more samples


Image size: torch.Size([3, 416, 416])
Bounding boxes: tensor([[ 29.2500,  28.0000, 381.7500, 398.0000]])
Labels: tensor([1])


In [14]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

# Load pre-trained Faster R-CNN model
model = fasterrcnn_resnet50_fpn(pretrained=True)

# Number of classes (your dataset classes + background)
num_classes = 4  # For example, 3 classes (vehicles) + background

# Get the number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# Replace the head of the model with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)


Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth
100%|██████████| 160M/160M [00:00<00:00, 197MB/s]  


In [15]:
# if torch.cuda.device_count() > 1:
#     print(f"Using {torch.cuda.device_count()} GPUs!")
#     model = torch.nn.DataParallel(model)  # Wrap the model for multi-GPU support

In [16]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(device)

cuda


In [17]:
import torch

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())

# Check the number of GPUs
print("Number of GPUs:", torch.cuda.device_count())

# Print GPU names
for i in range(torch.cuda.device_count()):
    print(f"GPU {i}: {torch.cuda.get_device_name(i)}")

CUDA available: True
Number of GPUs: 2
GPU 0: Tesla T4
GPU 1: Tesla T4


In [18]:
# Optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [19]:
num_epochs = 1

for epoch in range(num_epochs):
    print(f"Starting epoch {epoch + 1}/{num_epochs}...")  # Debug

    model.train()
    train_loss = 0.0

    for batch_idx, (images, targets) in enumerate(train_loader):
        print(f"Processing batch {batch_idx + 1}/{len(train_loader)}...")  # Debug
        
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        optimizer.zero_grad()

        # Forward pass
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        # Backward pass
        losses.backward()
        optimizer.step()

        train_loss += losses.item()

        # Print loss for the current batch
        if (batch_idx + 1) % 10 == 0:  # Adjust frequency as needed
            print(f"Batch {batch_idx + 1}/{len(train_loader)}, Loss: {losses.item():.4f}")
            torch.cuda.empty_cache()

    # Update learning rate
    lr_scheduler.step()

    print(f"Epoch {epoch + 1}/{num_epochs} completed, Average Loss: {train_loss / len(train_loader):.4f}")



Starting epoch 1/1...
Processing batch 1/747...
Processing batch 2/747...
Processing batch 3/747...
Processing batch 4/747...
Processing batch 5/747...
Processing batch 6/747...
Processing batch 7/747...
Processing batch 8/747...
Processing batch 9/747...
Processing batch 10/747...
Batch 10/747, Loss: 0.2894
Processing batch 11/747...
Processing batch 12/747...
Processing batch 13/747...
Processing batch 14/747...
Processing batch 15/747...
Processing batch 16/747...
Processing batch 17/747...
Processing batch 18/747...
Processing batch 19/747...
Processing batch 20/747...
Batch 20/747, Loss: 0.1654
Processing batch 21/747...
Processing batch 22/747...
Processing batch 23/747...
Processing batch 24/747...
Processing batch 25/747...
Processing batch 26/747...
Processing batch 27/747...
Processing batch 28/747...
Processing batch 29/747...
Processing batch 30/747...
Batch 30/747, Loss: 0.2157
Processing batch 31/747...
Processing batch 32/747...
Processing batch 33/747...
Processing batc

In [20]:
torch.cuda.empty_cache()

In [28]:
# Save the model's state dictionary
model_path = "/kaggle/working/fasterrcnn_model.pth"
torch.save(model.state_dict(), model_path)
print(f"Model saved to {model_path}")


Model saved to /kaggle/working/fasterrcnn_model.pth


In [29]:
from IPython.display import FileLink

# Generate a download link for the saved model
FileLink(model_path)


# Evaluation

In [30]:
import torch
import numpy as np
from sklearn.metrics import precision_recall_curve, auc
import matplotlib.pyplot as plt


In [41]:
model = fasterrcnn_resnet50_fpn(weights=None)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model.load_state_dict(torch.load('/kaggle/working/fasterrcnn_model.pth'))
model = model.to(device)
model.eval()
print(device)

  model.load_state_dict(torch.load('/kaggle/working/fasterrcnn_model.pth'))


cuda


In [31]:
from torchvision.ops import box_iou

def compute_iou(pred_boxes, gt_boxes):
    """
    Computes IoU between predicted and ground-truth boxes.
    Args:
        pred_boxes (Tensor): Predicted bounding boxes [N, 4]
        gt_boxes (Tensor): Ground-truth bounding boxes [M, 4]
    Returns:
        iou (Tensor): IoU matrix [N, M]
    """
    return box_iou(pred_boxes, gt_boxes)


In [36]:
def evaluate_model(model, data_loader, device, iou_threshold=0.5):
    """
    Evaluates the model using mAP and recall metrics.
    Args:
        model: Trained Faster R-CNN model.
        data_loader: DataLoader for validation/test dataset.
        device: Device (CPU or GPU).
        iou_threshold: IoU threshold for considering a positive match.
    Returns:
        mean_average_precision (float): mAP for the dataset.
        recall (float): Recall metric for the dataset.
    """
    model.eval()
    all_precisions, all_recalls = [], []
    total_true_positives, total_false_positives, total_false_negatives = 0, 0, 0

    with torch.no_grad():
        for images, targets in data_loader:
            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Forward pass
            outputs = model(images)

            for target, output in zip(targets, outputs):
                gt_boxes = target["boxes"]
                gt_labels = target["labels"]
                pred_boxes = output["boxes"]
                pred_scores = output["scores"]
                pred_labels = output["labels"]

                # Match predicted boxes to ground-truth boxes
                ious = compute_iou(pred_boxes, gt_boxes)
                true_positives = (ious > iou_threshold).sum().item()
                false_positives = pred_boxes.size(0) - true_positives
                false_negatives = gt_boxes.size(0) - true_positives

                total_true_positives += true_positives
                total_false_positives += false_positives
                total_false_negatives += false_negatives

                # Compute precision and recall for the batch
                precision = true_positives / (true_positives + false_positives + 1e-6)
                recall = true_positives / (true_positives + false_negatives + 1e-6)

                all_precisions.append(precision)
                all_recalls.append(recall)

    # Overall metrics
    mean_average_precision = np.mean(all_precisions)
    recall = np.mean(all_recalls)

    print(f"mAP: {mean_average_precision:.4f}, Recall: {recall:.4f}")
    return mean_average_precision, recall


In [34]:
def plot_precision_recall_curve(all_labels, all_scores, class_names):
    """
    Plots precision-recall curves for all classes.
    Args:
        all_labels: List of ground truth labels.
        all_scores: List of predicted scores for each class.
        class_names: List of class names.
    """
    for i, class_name in enumerate(class_names):
        y_true = [int(l == i) for l in all_labels]
        y_scores = [s[i] for s in all_scores]
        precision, recall, _ = precision_recall_curve(y_true, y_scores)
        pr_auc = auc(recall, precision)
        plt.plot(recall, precision, label=f"{class_name} (AUC={pr_auc:.2f})")

    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision-Recall Curve")
    plt.legend()
    plt.show()


In [35]:
# Move model to device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Evaluate on validation/test set
mAP, recall = evaluate_model(model, valid_loader, device)

# Precision-recall curve (optional)
# Assuming you collect `all_labels` and `all_scores` during evaluation
plot_precision_recall_curve(all_labels, all_scores, ["background", "Bus", "Truck", "Motorcycle", "Car"])


mAP: 0.7398, Recall: 1.2000


NameError: name 'all_labels' is not defined