In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/muid-iitr-train-val-test-split/labels.csv
/kaggle/input/muid-iitr-train-val-test-split/split_data/data.yaml
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/hp_32.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/en_43.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/hp_68.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/hp_171.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/lp_52.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/mn_5.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/hp_203.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/hp_41.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/hp_143.txt
/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels/ln_30.txt
/kaggle/input/muid-iitr-train-val-te

In [2]:
# Upgrade PyTorch and Torchvision if needed
!pip install --upgrade torch torchvision

# Install PIL (Pillow) and NumPy if not already present
!pip install --upgrade pillow numpy

# If you need a specific version of Torchvision that supports the model you're using, specify it like this:
# !pip install --upgrade torch==2.0.0 torchvision==0.15.0

Collecting torch
  Downloading torch-2.7.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision
  Downloading torchvision-0.22.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.6.77 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.6.77 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.6.80 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.5.1.17 (from torch)
  Downloading nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas

In [3]:
import os
import numpy as np
import torch
import torch.utils.data
import torchvision
from torchvision import transforms
from PIL import Image

# ---------------------------
# Helper Functions
# ---------------------------
def read_yolo_labels(label_path, img_width, img_height):
    """
    Reads a YOLO-format label file and converts boxes from normalized (x_center, y_center, w, h)
    to absolute coordinates [xmin, ymin, xmax, ymax] (in pixels).
    Returns: boxes (list of lists) and labels (list of int).
    """
    boxes = []
    labels = []
    if not os.path.exists(label_path):
        return boxes, labels
    with open(label_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 5:
                continue
            cls = int(parts[0])
            x_center, y_center, w, h = map(float, parts[1:5])
            # Convert normalized coordinates to absolute
            x_center *= img_width
            y_center *= img_height
            w *= img_width
            h *= img_height
            xmin = x_center - w / 2
            ymin = y_center - h / 2
            xmax = x_center + w / 2
            ymax = y_center + h / 2
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(cls + 1)  # add 1 if 0 is reserved for background
    return boxes, labels

def compute_iou_np(box1, box2):
    """
    Compute IoU between two boxes in [xmin, ymin, xmax, ymax] format.
    """
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height
    box1_area = (box1[2]-box1[0]) * (box1[3]-box1[1])
    box2_area = (box2[2]-box2[0]) * (box2[3]-box2[1])
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area > 0 else 0

# ---------------------------
# Custom Dataset for Faster R-CNN
# ---------------------------
class YoloDataset(torch.utils.data.Dataset):
    def __init__(self, images_dir, labels_dir, transforms=None):
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.transforms = transforms
        self.image_files = sorted([f for f in os.listdir(images_dir) if f.lower().endswith(('.jpg', '.png', '.jpeg'))])
    
    def __getitem__(self, idx):
        # Load image
        img_filename = self.image_files[idx]
        img_path = os.path.join(self.images_dir, img_filename)
        img = Image.open(img_path).convert("RGB")
        img_width, img_height = img.size
        
        # Load label file with the same base name
        base = os.path.splitext(img_filename)[0]
        label_path = os.path.join(self.labels_dir, base + ".txt")
        boxes, labels = read_yolo_labels(label_path, img_width, img_height)
        
        # If no boxes were found, create empty tensors with the correct shape
        if len(boxes) == 0:
            boxes = torch.empty((0, 4), dtype=torch.float32)
            labels = torch.empty((0,), dtype=torch.int64)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
        
        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = torch.tensor([idx])
        
        if self.transforms is not None:
            img = self.transforms(img)
        
        return img, target

    def __len__(self):
        return len(self.image_files)

def collate_fn(batch):
    return tuple(zip(*batch))

# ---------------------------
# Set Up Dataset and DataLoader
# ---------------------------
train_images_dir = "/kaggle/input/muid-iitr-train-val-test-split/split_data/train/images"
train_labels_dir = "/kaggle/input/muid-iitr-train-val-test-split/split_data/train/labels"
val_images_dir = "/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/images"
val_labels_dir = "/kaggle/input/muid-iitr-train-val-test-split/split_data/validation/labels"

# Define transforms: here we simply convert images to tensor.
data_transforms = transforms.Compose([
    transforms.ToTensor()
])

train_dataset = YoloDataset(train_images_dir, train_labels_dir, transforms=data_transforms)
val_dataset = YoloDataset(val_images_dir, val_labels_dir, transforms=data_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=collate_fn, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=4, shuffle=False, collate_fn=collate_fn, num_workers=2)

# ---------------------------
# Set Up Faster R-CNN Model
# ---------------------------
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Number of classes (include background as class 0)
num_classes = 2  # For example, 1 object class ("phone") + background

# Load a pretrained Faster R-CNN model
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# Replace the classifier head with a new one for our number of classes.
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
model.to(device)

for name, param in model.backbone.body.named_parameters():
    param.requires_grad = False

# ---------------------------
# Define Optimizer and Learning Rate Scheduler
# ---------------------------
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

# ---------------------------
# Training Loop with Validation, Best Model Saving, and Validation Accuracy
# ---------------------------
num_epochs = 15  # Change as needed
best_val_loss = float('inf')

for epoch in range(num_epochs):
    # ---- Training Phase ----
    model.train()
    running_loss = 0.0
    for images, targets in train_loader:
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        running_loss += losses.item()
        
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    
    lr_scheduler.step()
    train_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_loss:.4f}")
    
    # ---- Validation Phase (Compute Loss) ----
    # We use model.train() with no grad to force the model to return losses.
    model.train()
    val_running_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            val_running_loss += losses.item()
    
    val_loss = val_running_loss / len(val_loader)
    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Loss: {val_loss:.4f}")
    
    # ---- Compute Validation "Accuracy" ----
    # We switch to evaluation mode (which returns predictions) and compute a simple metric:
    # For each validation image, we count the fraction of ground truth boxes that have a matching predicted box (IoU >= 0.5).
    model.eval()
    all_acc = []
    with torch.no_grad():
        for images, targets in val_loader:
            images = [img.to(device) for img in images]
            predictions = model(images)  # This returns predictions in eval mode
            for i in range(len(predictions)):
                gt_boxes = targets[i]['boxes'].cpu().numpy()
                pred_boxes = predictions[i]['boxes'].cpu().numpy()
                # Skip images with no ground truth boxes
                if len(gt_boxes) == 0:
                    continue
                count = 0
                for gt in gt_boxes:
                    matched = False
                    for pb in pred_boxes:
                        if compute_iou_np(gt, pb) >= 0.5:
                            matched = True
                            break
                    if matched:
                        count += 1
                acc = count / len(gt_boxes)
                all_acc.append(acc)
    val_acc = np.mean(all_acc) if all_acc else 0.0
    print(f"Epoch [{epoch+1}/{num_epochs}], Validation Accuracy (IoU>=0.5): {val_acc:.4f}")
    
    # ---- Save Best Model Based on Validation Loss ----
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), "best.pt")
        print(f"Saved best model at epoch {epoch+1} with validation loss: {best_val_loss:.4f}")

print("Training complete.")

# ---------------------------
# Helper IoU Function for Validation Accuracy
# ---------------------------
def compute_iou_np(box1, box2):
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area > 0 else 0



Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


100%|██████████| 160M/160M [00:00<00:00, 205MB/s]


Epoch [1/15], Training Loss: 0.1653
Epoch [1/15], Validation Loss: 0.1312
Epoch [1/15], Validation Accuracy (IoU>=0.5): 0.9506
Saved best model at epoch 1 with validation loss: 0.1312
Epoch [2/15], Training Loss: 0.1311
Epoch [2/15], Validation Loss: 0.1186
Epoch [2/15], Validation Accuracy (IoU>=0.5): 0.9321
Saved best model at epoch 2 with validation loss: 0.1186
Epoch [3/15], Training Loss: 0.1217
Epoch [3/15], Validation Loss: 0.1150
Epoch [3/15], Validation Accuracy (IoU>=0.5): 0.9383
Saved best model at epoch 3 with validation loss: 0.1150
Epoch [4/15], Training Loss: 0.1127
Epoch [4/15], Validation Loss: 0.1111
Epoch [4/15], Validation Accuracy (IoU>=0.5): 0.9136
Saved best model at epoch 4 with validation loss: 0.1111
Epoch [5/15], Training Loss: 0.1105
Epoch [5/15], Validation Loss: 0.1122
Epoch [5/15], Validation Accuracy (IoU>=0.5): 0.9136
Epoch [6/15], Training Loss: 0.1095
Epoch [6/15], Validation Loss: 0.1106
Epoch [6/15], Validation Accuracy (IoU>=0.5): 0.9136
Saved best

In [4]:
!pip install torch torchvision opencv-python-headless pandas numpy scipy

#!/usr/bin/env python3
import os
import glob
import shutil
import cv2
import pandas as pd
import numpy as np
import torch
import torchvision
from torchvision.transforms import functional as F
from scipy.optimize import linear_sum_assignment  # For optimal matching

# ---------------------------
# Helper Functions
# ---------------------------

def rename_test_images_and_labels_in_new_folder(src_images_dir, src_labels_dir, dst_images_dir, dst_labels_dir):
    """
    Copies all jpg images from src_images_dir to dst_images_dir and renames them sequentially (1.jpg, 2.jpg, etc).
    Also copies the corresponding label files from src_labels_dir to dst_labels_dir, renaming them accordingly.
    """
    os.makedirs(dst_images_dir, exist_ok=True)
    os.makedirs(dst_labels_dir, exist_ok=True)
    
    image_files = sorted(glob.glob(os.path.join(src_images_dir, '*.jpg')))
    
    for i, image_path in enumerate(image_files, start=1):
        new_image_filename = f"{i}.jpg"
        new_label_filename = f"{i}.txt"
        
        new_image_path = os.path.join(dst_images_dir, new_image_filename)
        new_label_path = os.path.join(dst_labels_dir, new_label_filename)
        
        shutil.copy(image_path, new_image_path)
        
        old_base = os.path.splitext(os.path.basename(image_path))[0]
        old_label_path = os.path.join(src_labels_dir, f"{old_base}.txt")
        
        if os.path.exists(old_label_path):
            shutil.copy(old_label_path, new_label_path)
    
    print(f"Copied and renamed {len(image_files)} images from '{src_images_dir}' to '{dst_images_dir}', "
          f"and corresponding labels from '{src_labels_dir}' to '{dst_labels_dir}'.")

def read_ground_truth_labels(label_file):
    """
    Reads a YOLO ground-truth label file (no confidence) and returns a list of boxes in
    normalized [x_min, y_min, x_max, y_max] format.
    Each line: class_id center_x center_y width height
    """
    if not os.path.exists(label_file):
        return []
    
    boxes = []
    with open(label_file, "r") as f:
        for line in f:
            parts = list(map(float, line.split()))
            if len(parts) != 5:
                continue
            _, cx, cy, w, h = parts  # YOLO format (normalized)
            # Convert center/width/height to x_min, y_min, x_max, y_max
            x_min = cx - (w / 2)
            y_min = cy - (h / 2)
            x_max = cx + (w / 2)
            y_max = cy + (h / 2)
            boxes.append([x_min, y_min, x_max, y_max])
    return boxes

def read_prediction_labels(label_file):
    """
    Reads a prediction label file (with confidence) and returns a list of boxes in
    normalized [x_min, y_min, x_max, y_max, confidence] format.
    Each line: class_id center_x center_y width height confidence
    """
    if not os.path.exists(label_file):
        return []
    
    boxes = []
    with open(label_file, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) < 6:
                continue
            cls_id = float(parts[0])
            cx = float(parts[1])
            cy = float(parts[2])
            w = float(parts[3])
            h = float(parts[4])
            conf = float(parts[5])
            # Convert to x_min, y_min, x_max, y_max
            x_min = cx - (w / 2)
            y_min = cy - (h / 2)
            x_max = cx + (w / 2)
            y_max = cy + (h / 2)
            boxes.append([x_min, y_min, x_max, y_max, conf])
    return boxes

def compute_iou(box1, box2):
    """
    Computes Intersection over Union (IoU) between two boxes in [x1, y1, x2, y2] format.
    """
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_width = max(0, xi2 - xi1)
    inter_height = max(0, yi2 - yi1)
    inter_area = inter_width * inter_height

    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area

    return inter_area / union_area if union_area > 0 else 0

def process_failure_cases(test_images_dir, ground_truth_dir, predictions_labels_dir, annotated_base_dir, iou_threshold):
    """
    For each test image, compares predicted and ground truth boxes.
    A failure is flagged if:
      1. The number of predicted boxes != ground truth boxes (label mismatch).
      2. Any corresponding pair's IoU (from the best matching) is below the threshold (IoU failure).

    In failure cases:
      - Annotated images are saved in subfolders of `annotated_base_dir`:
         * iou_threshold/      (if the best-matched IoU is below threshold)
         * label_mismatch/     (if the number of boxes mismatches)
      - A CSV is generated (failure_cases.csv) listing each failed image,
        its IoU values, counts, threshold, and in which folder(s) it was stored.
    """
    iou_failure_subfolder = os.path.join(annotated_base_dir, "iou_threshold")
    mismatch_failure_subfolder = os.path.join(annotated_base_dir, "label_mismatch")
    os.makedirs(iou_failure_subfolder, exist_ok=True)
    os.makedirs(mismatch_failure_subfolder, exist_ok=True)
    
    failure_data = []
    image_files = sorted(glob.glob(os.path.join(test_images_dir, '*.jpg')))
    print(f"Processing {len(image_files)} images in {test_images_dir}.")
    
    for image_file in image_files:
        base = os.path.splitext(os.path.basename(image_file))[0]
        gt_file = os.path.join(ground_truth_dir, base + '.txt')
        pred_file = os.path.join(predictions_labels_dir, base + '.txt')
        
        gt_boxes = read_ground_truth_labels(gt_file)
        pred_boxes = read_prediction_labels(pred_file)
        
        label_mismatch = (len(gt_boxes) != len(pred_boxes))
        low_iou = False
        iou_values = []
        
        if gt_boxes and pred_boxes:
            cost_matrix = np.zeros((len(gt_boxes), len(pred_boxes)))
            for i, gt in enumerate(gt_boxes):
                for j, pred in enumerate(pred_boxes):
                    cost_matrix[i, j] = -compute_iou(gt, pred[:4])
            row_ind, col_ind = linear_sum_assignment(cost_matrix)
            for i, j in zip(row_ind, col_ind):
                iou_val = compute_iou(gt_boxes[i], pred_boxes[j][:4])
                iou_values.append(iou_val)
                if iou_val < iou_threshold:
                    low_iou = True
        
        if label_mismatch or low_iou:
            img = cv2.imread(image_file)
            if img is None:
                print(f"Could not load image: {image_file}")
                continue

            height, width = img.shape[:2]
            for box_gt in gt_boxes:
                x_min, y_min, x_max, y_max = box_gt
                pt1 = (int(x_min * width), int(y_min * height))
                pt2 = (int(x_max * width), int(y_max * height))
                cv2.rectangle(img, pt1, pt2, (0, 255, 0), 2)
                cv2.putText(img, "GT", (pt1[0], max(pt1[1]-5, 0)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
            for box_pred in pred_boxes:
                x_min, y_min, x_max, y_max, conf = box_pred
                pt1 = (int(x_min * width), int(y_min * height))
                pt2 = (int(x_max * width), int(y_max * height))
                cv2.rectangle(img, pt1, pt2, (0, 0, 255), 2)
                cv2.putText(img, f"{conf:.2f}", (pt1[0], max(pt1[1]-5, 0)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            
            failure_reasons = []
            if label_mismatch:
                failure_reasons.append("label_mismatch")
                mismatch_path = os.path.join(mismatch_failure_subfolder, os.path.basename(image_file))
                cv2.imwrite(mismatch_path, img)
            if low_iou:
                failure_reasons.append("iou_threshold")
                iou_path = os.path.join(iou_failure_subfolder, os.path.basename(image_file))
                cv2.imwrite(iou_path, img)
            
            failure_data.append({
                'image': os.path.basename(image_file),
                'iou_values': iou_values,
                'ground_truth_count': len(gt_boxes),
                'predicted_count': len(pred_boxes),
                'iou_threshold': iou_threshold,
                'failure_folders': ",".join(failure_reasons)
            })
    
    df = pd.DataFrame(failure_data)
    csv_filename = 'failure_cases.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Failure cases processed. CSV saved as {csv_filename}.")

def predict_test_images(model_path, source_dir, predictions_output_dir, conf_threshold=0.5, num_classes=2):
    """
    Runs Faster R-CNN prediction on all images in source_dir using the checkpoint at model_path.
    Saves prediction .txt files in YOLO format (normalized [class center_x center_y width height confidence])
    and annotated images with bounding boxes.
    
    The Faster R-CNN outputs boxes in absolute pixel coordinates; these are converted to normalized coordinates.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    # Instantiate the model with the correct number of classes.
    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
    checkpoint = torch.load(model_path, map_location=device)
    model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()
    
    # Prepare output directories.
    images_output = os.path.join(predictions_output_dir, "images")
    labels_output = os.path.join(predictions_output_dir, "labels")
    os.makedirs(images_output, exist_ok=True)
    os.makedirs(labels_output, exist_ok=True)
    
    image_paths = sorted(glob.glob(os.path.join(source_dir, '*.jpg')))
    
    with torch.no_grad():
        for image_path in image_paths:
            # Read image and convert to RGB.
            orig_img = cv2.imread(image_path)
            if orig_img is None:
                print(f"Could not load image: {image_path}")
                continue
            img_rgb = cv2.cvtColor(orig_img, cv2.COLOR_BGR2RGB)
            pil_img = torchvision.transforms.functional.to_pil_image(img_rgb)
            img_tensor = F.to_tensor(pil_img).to(device)
            
            # Inference.
            outputs = model([img_tensor])
            output = outputs[0]
            
            # Get image dimensions.
            height, width = orig_img.shape[:2]
            predictions = []
            
            # Process each detected box.
            for box, score, label in zip(output['boxes'], output['scores'], output['labels']):
                if score < conf_threshold:
                    continue
                # Convert box from [x1, y1, x2, y2] (absolute) to YOLO format normalized:
                x1, y1, x2, y2 = box.cpu().numpy()
                box_width = x2 - x1
                box_height = y2 - y1
                center_x = x1 + box_width / 2
                center_y = y1 + box_height / 2
                # Normalize
                center_x_norm = center_x / width
                center_y_norm = center_y / height
                box_width_norm = box_width / width
                box_height_norm = box_height / height
                predictions.append([float(label), center_x_norm, center_y_norm, box_width_norm, box_height_norm, float(score)])
                # Draw the predicted box.
                pt1 = (int(x1), int(y1))
                pt2 = (int(x2), int(y2))
                cv2.rectangle(orig_img, pt1, pt2, (0, 0, 255), 2)
                cv2.putText(orig_img, f"{score:.2f}", (pt1[0], max(pt1[1]-5, 0)),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
            
            # Save annotated image.
            base_name = os.path.basename(image_path)
            cv2.imwrite(os.path.join(images_output, base_name), orig_img)
            
            # Save predictions in txt file (YOLO format: class center_x center_y width height confidence).
            txt_filename = os.path.splitext(base_name)[0] + ".txt"
            with open(os.path.join(labels_output, txt_filename), "w") as f:
                for pred in predictions:
                    f.write(" ".join(map(str, pred)) + "\n")
    
    print(f"Predicted annotated images saved in {images_output} and prediction labels in {labels_output}")
    return {"images": images_output, "labels": labels_output}

# ---------------------------
# Main Execution
# ---------------------------

if __name__ == "__main__":
    # Update these paths as needed.
    test_images_dir = '/kaggle/working/split_data/test/images'          # Original test images
    ground_truth_dir = '/kaggle/working/split_data/test/labels'         # Original ground truth labels
    predictions_base_dir = '/kaggle/working/predicted'                  # Base folder for prediction outputs
    annotated_failures_dir = '/kaggle/working/annotated_failure'        # Base folder for annotated failure images
    test_new_images_dir = '/kaggle/working/test_new/images'             # Folder for renamed test images
    test_new_labels_dir = '/kaggle/working/test_new/labels'             # Folder for renamed label files
    
    # Set IoU threshold.
    iou_threshold = 0.5

    # Step 1: Rename/copy test images and labels.
    rename_test_images_and_labels_in_new_folder(
        test_images_dir,
        ground_truth_dir,
        test_new_images_dir,
        test_new_labels_dir
    )
    
    # Step 2: Run predictions on the new test images using the Faster R-CNN model.
    model_path = '/kaggle/working/best.pt'  # Path to your Faster R-CNN checkpoint.
    pred_dict = predict_test_images(model_path, test_new_images_dir, predictions_base_dir, conf_threshold=0.5, num_classes=2)
    predicted_images_dir = pred_dict["images"]
    predicted_labels_dir = pred_dict["labels"]
    
    # Step 3: Process predictions vs ground truth, annotate failure cases, and output CSV.
    process_failure_cases(
        test_new_images_dir,
        test_new_labels_dir,
        predicted_labels_dir,
        annotated_failures_dir,
        iou_threshold
    )

Copied and renamed 0 images from '/kaggle/working/split_data/test/images' to '/kaggle/working/test_new/images', and corresponding labels from '/kaggle/working/split_data/test/labels' to '/kaggle/working/test_new/labels'.




Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth


100%|██████████| 97.8M/97.8M [00:00<00:00, 223MB/s]


Predicted annotated images saved in /kaggle/working/predicted/images and prediction labels in /kaggle/working/predicted/labels
Processing 0 images in /kaggle/working/test_new/images.
Failure cases processed. CSV saved as failure_cases.csv.


In [5]:
!cd /kaggle/working && zip -r working.zip . -x "working.zip"

from IPython.display import FileLink
FileLink('/kaggle/working/working.zip')

  adding: predicted/ (stored 0%)
  adding: predicted/images/ (stored 0%)
  adding: predicted/labels/ (stored 0%)
  adding: annotated_failure/ (stored 0%)
  adding: annotated_failure/label_mismatch/ (stored 0%)
  adding: annotated_failure/iou_threshold/ (stored 0%)
  adding: test_new/ (stored 0%)
  adding: test_new/images/ (stored 0%)
  adding: test_new/labels/ (stored 0%)
  adding: best.pt (deflated 7%)
  adding: failure_cases.csv (stored 0%)
  adding: __notebook__.ipynb (deflated 95%)
