In [1]:
<center style="border-radius:10px;
padding: 3rem 2rem;
border: 3px solid #F54257;
">
<h1 style="color:#F54257; 
font-size:3.0rem;
margin:0;
">KITTI Object Detection</h1>
<h2 style="color:#F54257; 
font-size:2.0rem;
margin-top:1rem;
margin-bottom:2.5rem;
">yolov11m | Ultralytics</h2>
<a href="https://kaggle.com/shreydan" style="color: white;
background-color: #F54257;
border-radius: 25px;
padding: 1rem 1.5rem;
text-decoration: none;
">@shreydan</a>
</center>

SyntaxError: unterminated string literal (detected at line 1) (3055808070.py, line 1)

# Imports
<div style="width:100%;height:0;border-bottom: 3px solid #F03A4F;margin-bottom: 1rem;"></div>

In [None]:
!pip install ultralytics -q

In [None]:
%env WANDB_DISABLED=True

In [None]:
from ultralytics import YOLO
# !pip install --force-reinstall numpy scipy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
import json
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
import shutil
from PIL import Image

In [None]:
base_dir = Path('/kaggle/input/kitti-dataset')
img_path = base_dir / 'data_object_image_2' / 'training' / 'image_2'
label_path = Path('/kaggle/input/kitti-dataset-yolo-format/labels')
with open('/kaggle/input/kitti-dataset-yolo-format/classes.json','r') as f:
    classes = json.load(f)

classes

# Preparing Dataset
<div style="width:100%;height:0;border-bottom: 3px solid #F27B40;margin-bottom: 1rem;"></div>

In [None]:
ims = sorted(list(img_path.glob('*')))
labels = sorted(list(label_path.glob('*')))
pairs = list(zip(ims,labels))
pairs[:2]

# Preparing File Structure
<div style="width:100%;height:0;border-bottom: 3px solid #F4B343;margin-bottom: 1rem;"></div>

```
/kaggle/working
    |
    -train
    |   |
    |   -000000.png
    |   -000000.txt
    |   ...
    |
    -val
      |
      -000001.png
      -000001.txt
      ...
```

In [None]:
train, test = train_test_split(pairs,test_size=0.1,shuffle=True)
len(train), len(test)

In [None]:
train_path = Path('train').resolve()
train_path.mkdir(exist_ok=True)
valid_path = Path('valid').resolve()
valid_path.mkdir(exist_ok=True)

In [None]:
for t_img, t_lb in tqdm(train):
    im_path = train_path / t_img.name
    lb_path = train_path / t_lb.name
    shutil.copy(t_img,im_path)
    shutil.copy(t_lb,lb_path)

In [None]:
for t_img, t_lb in tqdm(test):
    im_path = valid_path / t_img.name
    lb_path = valid_path / t_lb.name
    shutil.copy(t_img,im_path)
    shutil.copy(t_lb,lb_path)

# YAML file for the data
<div style="width:100%;height:0;border-bottom: 3px solid #F4CE45;margin-bottom: 1rem;"></div>

In [None]:
!cat kitti.yaml

In [None]:
yaml_file = 'names:\n'
yaml_file += '\n'.join(f'- {c}' for c in classes)
yaml_file += f'\nnc: {len(classes)}'
yaml_file += f'\ntrain: {str(train_path)}\nval: {str(valid_path)}'
with open('kitti.yaml','w') as f:
    f.write(yaml_file)

# Model
<div style="width:100%;height:0;border-bottom: 3px solid #F27B40;margin-bottom: 1rem;"></div>

In [None]:
import os
print(os.listdir('.'))

In [None]:
from ultralytics import YOLO

model = YOLO('/kaggle/input/yolov11m-pt/yolo11m.pt')  # Load YOLOv8 Medium


# Training
<div style="width:100%;height:0;border-bottom: 3px solid #F5E947;margin-bottom: 1rem;"></div>

In [None]:
# train_results = model.train(
#     data='/kaggle/working/kitti.yaml', 
#     epochs=50,
#     patience=3,
#     mixup=0.1,
#     project='yolov11m-kitti',
#     device=0
# )

train_results = model.train(
    data='/kaggle/working/kitti.yaml',
    epochs=50,
    patience=5,
    batch=16,
    imgsz=640,
    mixup=0.1,
    lr0=0.01,
    optimizer='SGD',
    close_mosaic=10,
    project='yolov11m-kitti',
    device=0
)


# Validation
<div style="width:100%;height:0;border-bottom: 3px solid #E7F549;margin-bottom: 1rem;"></div>

In [None]:
valid_results = model.val()

# Results
<div style="width:100%;height:0;border-bottom: 3px solid #F25F3E;margin-bottom: 1rem;"></div>

In [None]:
plt.figure(figsize=(10,20))
plt.imshow(Image.open('/kaggle/working/yolov11m-kitti/train/results.png'))
plt.axis('off')
plt.show()

In [None]:
plt.figure(figsize=(10,20))
plt.imshow(Image.open('/kaggle/working/yolov11m-kitti/val/confusion_matrix.png'))
plt.axis('off')
plt.show()

# Predictions
<div style="width:100%;height:0;border-bottom: 3px solid #CEF64B;margin-bottom: 1rem;"></div>

In [None]:
preds = model.predict([test[idx][0] for idx in np.random.randint(0,len(test),(20,))],save=True)

In [None]:
preds = list(Path('yolov11m-kitti/predict').glob('*'))

In [None]:
def plot_images(images):
    num_images = len(images)
    rows = num_images
    cols = 1
    fig, axes = plt.subplots(rows, cols, figsize=(15, 80))
    for ax in axes.flat:
        ax.axis('off')
    for i, img_path in enumerate(images):
        img = Image.open(img_path)
        axes[i].imshow(img)
    plt.tight_layout()
    plt.show()
plot_images(preds)

### cleanup

In [None]:
!cp /kaggle/working/yolov11m-kitti/train/weights/best.pt /kaggle/working/

In [None]:
!zip -r results.zip yolov11m-kitti

In [None]:
!rm -rf yolov11m-kitti wandb train.cache valid.cache yolov11m.pt train valid

In [None]:
print("Hello")

## Dataset kaam start Collecting 10 missclassified images per class


In [None]:
from pathlib import Path
import json

base_dir = Path('/kaggle/input/kitti-dataset')
img_path = base_dir / 'data_object_image_3' / 'testing' / 'image_3'
label_path = Path('/kaggle/input/kitti-dataset-yolo-format/labels')

with open('/kaggle/input/kitti-dataset-yolo-format/classes.json','r') as f:
    classes = json.load(f)

ims = sorted(list(img_path.glob('*')))
labels = sorted(list(label_path.glob('*')))
pairs = list(zip(ims, labels))


### check loaded dataset

In [None]:
print(ims[:5])  # Print the first 5 image file paths
print(labels[:5])  # Print the first 5 label file paths


In [None]:
print(pairs[:5])  # Print the first 5 paired image-label paths


In [None]:
print(classes)


### import the model

In [None]:
!pip install ultralytics


In [None]:
from ultralytics import YOLO
model_path = '/kaggle/input/yolo11_trained/pytorch/default/1/yolo11m.pt'
model = YOLO(model_path, task='detect')


### test

In [None]:
import random
import cv2
from matplotlib import pyplot as plt

# Randomly pick an image from the test dataset
image_path = random.choice(ims)

# Read and convert the image to RGB
image = cv2.imread(str(image_path))
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Perform inference
results = model(image_path)  # Inference on the selected image
print("Hi")
# Extract predictions from results
predictions = results[0].boxes  # Access the boxes (predictions)

# Print predictions (bounding boxes, class ids, etc.)
print(predictions)

# Optionally, visualize the results
plt.figure(figsize=(10, 6))
plt.imshow(image_rgb)
plt.show()


### visual bounding box

In [None]:
# Access the predicted bounding boxes, class IDs, and confidence scores
boxes = results[0].boxes.xyxy  # bounding box coordinates in xyxy format
class_ids = results[0].boxes.cls  # class IDs of detected objects
confidences = results[0].boxes.conf  # confidence scores of detections

# Print the results
print("Bounding Boxes:", boxes)
print("Class IDs:", class_ids)
print("Confidence Scores:", confidences)


In [None]:
classes = {'Car': 0, 'Pedestrian': 1, 'Van': 2, 'Cyclist': 3, 'Truck': 4, 'Misc': 5, 'Tram': 6, 'Person_sitting': 7}
print(classes)

### directoy creation for misclassiifed images

In [None]:
index_to_label = {
    '0': 'Car',
    '1': 'Pedestrian',
    '2': 'Van',
    '3': 'Cyclist',
    '4': 'Truck',
    '5': 'Misc',
    '6': 'Tram',
    '7': 'Person_sitting'
}
#class_mapping= {v: k for k, v in class_mapping.items()}

In [None]:
#print("Class Mapping: ", class_mapping)
print("index_to_label: ", index_to_label)

## Agin created the misclassified dataset

In [None]:
import shutil
from pathlib import Path

# Path to the misclassified directory
misclassified_dir = Path('/kaggle/working/misclassified')

# Check if the directory exists
if misclassified_dir.exists():
    # Remove the entire misclassified directory and its contents
    shutil.rmtree(misclassified_dir)
    print("Old misclassified dataset has been cleaned.")

# Recreate the misclassified directory
misclassified_dir.mkdir(parents=True, exist_ok=True)
print("Misclassified dataset folder has been recreated.")


In [None]:
from pathlib import Path

# Path to the misclassified directory
misclassified_dir = Path('/kaggle/working/misclassified')

# Create 'images' and 'labels' directories inside 'misclassified'
images_dir = misclassified_dir / 'images'
labels_dir = misclassified_dir / 'labels'

# Create the directories if they don't exist
images_dir.mkdir(parents=True, exist_ok=True)
labels_dir.mkdir(parents=True, exist_ok=True)

print("Subdirectories 'images' and 'labels' have been created inside 'misclassified'.")


In [None]:
# Path to the 'labels' directory inside 'misclassified'
actual_dir = labels_dir / 'actual'
predicted_dir = labels_dir / 'predicted'

# Create the 'actual' and 'predicted' directories if they don't exist
actual_dir.mkdir(parents=True, exist_ok=True)
predicted_dir.mkdir(parents=True, exist_ok=True)

print("Subdirectories 'actual' and 'predicted' have been created inside 'labels'.")


# checking and adding values in misclassified dataset - DEMO Prediction

In [None]:
from ultralytics import YOLO

# Load the trained YOLOv11 model (update path if needed)
model_path = '/kaggle/input/yolo11_trained/pytorch/default/1/yolo11m.pt'  # or wherever your best model is saved
model = YOLO(model_path)

print("✅ Model loaded successfully!")


In [None]:
import random
from pathlib import Path
import matplotlib.pyplot as plt
import cv2

# Define path to your validation images
val_images_path = Path('/kaggle/input/kitti-dataset/data_object_image_3/testing/image_3')  # change if needed

# Get list of image files
val_image_files = list(val_images_path.glob('*.jpg')) + list(val_images_path.glob('*.png'))

# Pick 5 random images
sample_images = random.sample(val_image_files, 5)

# Run inference and display predictions
for image_path in sample_images:
    # Run YOLO prediction
    results = model(image_path)

    # Plot image with prediction
    image = cv2.imread(str(image_path))
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Plot
    plt.figure(figsize=(8, 6))
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.title(f"Predictions for: {image_path.name}")
    results[0].plot(show=True)
    plt.show()


### comparing the values 

In [None]:
import random
from pathlib import Path
import matplotlib.pyplot as plt
import cv2

# Assuming index_to_label is already defined
# Example: index_to_label = {0: 'Car', 1: 'Pedestrian', ...}

# Paths
val_images_path = Path('/kaggle/input/kitti-dataset/data_object_image_3/testing/image_3')
val_labels_path = Path('/kaggle/input/kitti-dataset-yolo-format/labels')

# Get list of images
val_image_files = list(val_images_path.glob('*.jpg')) + list(val_images_path.glob('*.png'))
sample_images = random.sample(val_image_files, 5)

# Process each image
for image_path in sample_images:
    label_path = val_labels_path / (image_path.stem + '.txt')

    print(f"\n🔍 Image: {image_path.name}")

    # Load actual labels from label file
    actual_classes = []
    if label_path.exists():
        with open(label_path, 'r') as f:
            for line in f.readlines():
                class_index = int(line.split()[0])
                actual_classes.append(index_to_label.get(class_index, f"Unknown-{class_index}"))
    else:
        actual_classes.append("Label file missing")

    # Run prediction
    results = model(image_path)
    predicted_classes = []
    if results and results[0].boxes is not None:
        predicted_indices = results[0].boxes.cls.int().tolist()
        predicted_classes = [index_to_label.get(i, f"Unknown-{i}") for i in predicted_indices]
    else:
        predicted_classes.append("No predictions")

    print(f"✅ Actual Labels   : {actual_classes}")
    print(f"🔮 Predicted Labels: {predicted_classes}")

## partial/ complete / multiple Accuracies

In [None]:
import random
from pathlib import Path
import matplotlib.pyplot as plt
import cv2

# Assuming index_to_label is already defined
# Example: index_to_label = {0: 'Car', 1: 'Pedestrian', ...}

# Paths
val_images_path = Path('/kaggle/input/kitti-dataset/data_object_image_3/testing/image_3')
val_labels_path = Path('/kaggle/input/kitti-dataset-yolo-format/labels')

# Get list of images
val_image_files = list(val_images_path.glob('*.jpg')) + list(val_images_path.glob('*.png'))
sample_images = random.sample(val_image_files, 5)

# Process each image
for image_path in sample_images:
    label_path = val_labels_path / (image_path.stem + '.txt')

    print(f"\n🔍 Image: {image_path.name}")

    # Load actual labels from label file
    actual_classes = []
    if label_path.exists():
        with open(label_path, 'r') as f:
            for line in f.readlines():
                class_index = int(line.split()[0])
                actual_classes.append(index_to_label.get(class_index, f"Unknown-{class_index}"))
    else:
        actual_classes.append("Label file missing")

    # Run prediction
    results = model(image_path)
    predicted_classes = []
    if results and results[0].boxes is not None:
        predicted_indices = results[0].boxes.cls.int().tolist()
        predicted_classes = [index_to_label.get(i, f"Unknown-{i}") for i in predicted_indices]
    else:
        predicted_classes.append("No predictions")

    print(f"✅ Actual Labels   : {actual_classes}")
    print(f"🔮 Predicted Labels: {predicted_classes}")

    # Check for partial accuracy (if at least one predicted label matches)
    correct_predictions = []
    incorrect_predictions = []

    for predicted in predicted_classes:
        if predicted in actual_classes:
            correct_predictions.append(predicted)
        else:
            incorrect_predictions.append(predicted)

    print(f"✅ Correct Predictions: {correct_predictions}")
    print(f"❌ Incorrect Predictions: {incorrect_predictions}")

    # Check for overall accuracy (if all predicted labels match)
    if len(predicted_classes) == len(actual_classes) and all(p in actual_classes for p in predicted_classes):
        print("✅ Overall Accuracy: Correct (all labels match)")
    else:
        print("❌ Overall Accuracy: Incorrect (not all labels match)")

    # Check for multiple matching labels
    matched_count = 0
    for predicted in predicted_classes:
        if predicted in actual_classes:
            matched_count += 1

    print(f"🔮 Multiple Matching Labels: {matched_count} out of {len(predicted_classes)} matched labels.")
    
    print("-" * 50)


### filling the misclassification dataset - with 10 img each

In [None]:
import random
from pathlib import Path
import shutil

# Define paths
val_images_path = Path('/kaggle/input/kitti-dataset/data_object_image_3/testing/image_3')
val_labels_path = Path('/kaggle/input/kitti-dataset-yolo-format/labels')

# Output folders
base_dir = Path('/kaggle/working/misclassified')
img_save_dir = base_dir/ 'images'
actual_label_dir = base_dir / 'labels' / 'actual'
pred_label_dir = base_dir / 'labels' / 'predicted'

# Create folders
img_save_dir.mkdir(parents=True, exist_ok=True)
actual_label_dir.mkdir(parents=True, exist_ok=True)
pred_label_dir.mkdir(parents=True, exist_ok=True)

# Index mapping
index_to_label = {
    0: 'Car',
    1: 'Pedestrian',
    2: 'Van',
    3: 'Cyclist',
    4: 'Truck',
    5: 'Misc',
    6: 'Tram',
    7: 'Person_sitting'
}

# Load 250 images
val_image_files = sorted(list(val_images_path.glob('*.png')) + list(val_images_path.glob('*.jpg')))[:250]

misclassified_count = 0
misclassified_limit = 100
image_counter = 1

for image_path in val_image_files:
    if misclassified_count >= misclassified_limit:
        break

    label_path = val_labels_path / (image_path.stem + '.txt')

    # Load actual classes
    actual_classes = []
    if label_path.exists():
        with open(label_path, 'r') as f:
            for line in f.readlines():
                class_index = int(line.split()[0])
                actual_classes.append(index_to_label.get(class_index, f"Unknown-{class_index}"))
    else:
        actual_classes.append("Label file missing")

    # Run prediction
    results = model(image_path)
    predicted_classes = []
    if results and results[0].boxes is not None:
        predicted_indices = results[0].boxes.cls.int().tolist()
        predicted_classes = [index_to_label.get(i, f"Unknown-{i}") for i in predicted_indices]
    else:
        predicted_classes.append("No predictions")

    # Compare sets for misclassification
    if set(actual_classes) != set(predicted_classes):
        misclassified_count += 1
        save_name = f"misclassified_image_{image_counter}.png"
        shutil.copy(image_path, img_save_dir / save_name)

        # Save actual labels
        with open(actual_label_dir / f"misclassified_image_{image_counter}.txt", 'w') as f:
            f.write(", ".join(actual_classes))

        # Save predicted labels
        with open(pred_label_dir / f"misclassified_image_{image_counter}.txt", 'w') as f:
            f.write(", ".join(predicted_classes))

        image_counter += 1

print(f"✅ Saved {misclassified_count} misclassified images.")


In [None]:
from pathlib import Path

# Path to your misclassified image folder
img_path = Path('/kaggle/working/misclassified/images')

# Count .png and .jpg images
image_files = list(img_path.glob('misclassified_image_*.png')) + list(img_path.glob('misclassified_image_*.jpg'))
print(f"📸 Total misclassified images found: {len(image_files)}")


In [None]:
import shutil

# Zip the entire folder
shutil.make_archive("misclassified_dataset", 'zip', "misclassified")


# Misclassification dataset complete

In [None]:
import random
from pathlib import Path

# Paths
image_dir = Path('./misclassified/images')
actual_label_dir = Path('./misclassified/labels/actual')
predicted_label_dir = Path('./misclassified/labels/predicted')

# Get list of all misclassified image files
all_images = list(image_dir.glob('misclassified_image_*.png'))

# Choose 5 random images
sample_images = random.sample(all_images, 5)

# Display comparisons
for image_path in sample_images:
    image_name = image_path.stem  # e.g., "misclassified_image_12"

    actual_label_path = actual_label_dir / f"{image_name}.txt"
    predicted_label_path = predicted_label_dir / f"{image_name}.txt"

    # Read labels
    actual_labels = actual_label_path.read_text().strip().splitlines() if actual_label_path.exists() else ["Missing"]
    predicted_labels = predicted_label_path.read_text().strip().splitlines() if predicted_label_path.exists() else ["Missing"]

    print(f"🖼️ Image: {image_path.name}")
    print(f"✅ Actual Labels   : {actual_labels}")
    print(f"🔮 Predicted Labels: {predicted_labels}")
    print("-" * 50)


In [None]:
import random
from pathlib import Path
import matplotlib.pyplot as plt
import cv2

# Paths
image_dir = Path('./misclassified/images')
actual_label_dir = Path('./misclassified/labels/actual')
predicted_label_dir = Path('./misclassified/labels/predicted')

# Get list of all misclassified images
all_images = list(image_dir.glob('misclassified_image_*.png'))

# Pick 5 random samples
sample_images = random.sample(all_images, 5)

# Plot each image with actual & predicted labels
plt.figure(figsize=(15, 10))

for idx, image_path in enumerate(sample_images):
    image_name = image_path.stem  # misclassified_image_{i}
    
    # Read actual and predicted labels
    actual_label_path = actual_label_dir / f"{image_name}.txt"
    predicted_label_path = predicted_label_dir / f"{image_name}.txt"

    actual_labels = actual_label_path.read_text().strip().splitlines() if actual_label_path.exists() else ["Missing"]
    predicted_labels = predicted_label_path.read_text().strip().splitlines() if predicted_label_path.exists() else ["Missing"]

    # Load and convert image (OpenCV loads as BGR)
    img = cv2.imread(str(image_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Plot
    plt.subplot(2, 3, idx + 1)
    plt.imshow(img)
    plt.axis('off')
    plt.title(f"{image_name}\nActual: {actual_labels}\nPred: {predicted_labels}", fontsize=10)

plt.tight_layout()
plt.show()


In [None]:
from pathlib import Path

actual_dir = Path('./misclassified/labels/actual')
predicted_dir = Path('./misclassified/labels/predicted')

complete = 0
partial = 0
multiple = 0

total = 0

for actual_file in actual_dir.glob('*.txt'):
    name = actual_file.stem
    predicted_file = predicted_dir / f"{name}.txt"

    if not predicted_file.exists():
        continue

    actual_labels = actual_file.read_text().strip().splitlines()
    predicted_labels = predicted_file.read_text().strip().splitlines()

    total += 1

    if set(actual_labels) == set(predicted_labels):
        complete += 1
    elif any(label in actual_labels for label in predicted_labels):
        partial += 1
    else:
        multiple += 1

# 🔢 Print Results
print(f"Total Misclassified Images Analyzed: {total}")
print(f"✅ Complete Matches     : {complete}")
print(f"🟡 Partial Matches      : {partial}")
print(f"❌ Multiple Mismatches  : {multiple}")

# Optional: Accuracy %
print("\n📊 Accuracy Breakdown:")
print(f"Complete Accuracy : {100 * complete / total:.2f}%")
print(f"Partial Accuracy  : {100 * partial / total:.2f}%")
print(f"Multiple Accuracy : {100 * multiple / total:.2f}%")


# Now We'l Start Grad-CAM

# Image Classifier 

In [None]:
!pip install ultralytics


In [None]:
from ultralytics import YOLO
# Load the YOLOv11 model
model = YOLO("/kaggle/input/yolov11m-pt/yolo11m.pt")
# Predict on a sample image
image_path =   "/kaggle/input/kitti-dataset/data_object_image_2/testing/image_2/000002.png" 
# image_path = "/kaggle/input/kitti-dataset/data_object_image_3/testing/image_3/000004.png"
results = model.predict(source=image_path, conf=0.25, save=False)
# Display predictions inline (optional)
results[0].show()

# Acessing Results


In [None]:
# Access the first result
result = results[0]

# Accessing the bounding boxes (contains bounding box coordinates in x1, y1, x2, y2 format)
print("**& Boxes (x1, y1, x2, y2 format):", result.boxes)

# Accessing the class IDs (numeric)
#print("**& Class IDs (numeric):", result.cls)

# Accessing confidence scores
#print("**& Confidence Scores:", result.conf)

# Accessing class names (mapping numeric class IDs to human-readable class names)
print("**& Class Names:", result.names)

# Accessing the original image
print("**& Original Image:", result.orig_img)

# Accessing the original image shape
print("**& Original Image Shape:", result.orig_shape)

# Speed metrics (time taken for preprocessing, inference, and postprocessing)
print("**& Speed metrics:", result.speed)


### Acessing Names from results - CLASSS ID


In [None]:
# Access the first result
result = results[0]

# Accessing the class names dictionary
class_names = result.names

# Example: If you want to print the class name corresponding to a class ID
for class_id in result.boxes.cls:
    class_name = class_names[int(class_id)]  # Convert class_id to a string using the 'names' dictionary
    print(f"Class ID: {class_id} corresponds to Class Name: {class_name}")


### Acessing prob from results - CONFIDENCE

In [None]:
# Access the first result
result = results[0]

# Access confidence scores
confidence_scores = result.boxes.conf

# Access class IDs
class_ids = result.boxes.cls

# Display the confidence score and class ID for each detection
for i, (score, cls) in enumerate(zip(confidence_scores, class_ids)):
    print(f"Detection {i} - Confidence Score: {score:.4f}, Class ID: {cls.item()}")


### which car to which probability

In [None]:
result = results[0]

# Get everything
boxes = result.boxes
confidences = boxes.conf
class_ids = boxes.cls
coords = boxes.xyxy  # or boxes.xywh if you prefer center format

# Map class IDs to class names
class_names = result.names

# Print detailed info
for i, (score, cls, coord) in enumerate(zip(confidences, class_ids, coords)):
    class_name = class_names[int(cls)]
    x1, y1, x2, y2 = coord.tolist()
    print(f"Detection {i}:")
    print(f"  Class       : {class_name} (ID {int(cls)})")
    print(f"  Confidence  : {score:.4f}")
    print(f"  BBox [xyxy] : ({x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f})\n")


In [None]:
from PIL import Image, ImageDraw, ImageFont
from IPython.display import display

# Load original image
img = Image.open(result.path).convert("RGB")
draw = ImageDraw.Draw(img)

# Try to use a nicer font, fall back to default
try:
    font = ImageFont.truetype("arial.ttf", size=14)
except:
    font = ImageFont.load_default()

# Draw each detection with bounding box, class name, score, and detection index
for i, (score, cls, coord) in enumerate(zip(confidences, class_ids, coords)):
    x1, y1, x2, y2 = coord.tolist()
    class_name = class_names[int(cls)]
    label = f"[{i}] {class_name} {score:.2f}"

    draw.rectangle([x1, y1, x2, y2], outline="red", width=2)
    draw.text((x1, max(0, y1 - 15)), label, fill="yellow", font=font)

# Display inline in notebook
display(img)


In [None]:
!pip install -q grad-cam
from torchvision.transforms import ToTensor
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

In [None]:
!pip install ultralytics -q
from ultralytics import YOLO

In [None]:
# Print the class labels to verify the correct class ID for 'Car'
print("Class labels:", result.names)


## Model 1.0

In [None]:
# i = 7
import torch
import torch.nn.functional as F
from torchvision import transforms
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
# while i<17:
# Load YOLOv11 model
# print(i)
model_path = "/kaggle/input/yolo11_trained/pytorch/default/1/yolo11m.pt"
model = YOLO(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Load and preprocess image
image_path = "/kaggle/input/kitti-dataset/data_object_image_2/testing/image_2/000007.png"
original_image = Image.open(image_path).convert("RGB")

preprocess = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor()
])
input_tensor = preprocess(original_image).unsqueeze(0).to(device)
input_tensor.requires_grad_()

# Get class names and find 'Car' index
class_names = model.names
car_class_index = [k for k, v in class_names.items() if v.lower() == "car"]
if not car_class_index:
    raise ValueError("Car class not found.")
car_class_index = car_class_index[0]

# Hook Grad-CAM
activations = None
gradients = None

def forward_hook(module, input, output):
    global activations
    activations = output

def backward_hook(module, grad_input, grad_output):
    global gradients
    gradients = grad_output[0]
# Use full backward hook (for modern PyTorch)




target_layer = model.model.model[16] # Should be a deep convolutional layer
#i+=1
target_layer.register_forward_hook(forward_hook)
target_layer.register_full_backward_hook(backward_hook)

# Forward pass through raw model
output = model.model(input_tensor)[0]  # shape: (1, N, 5 + num_classes)
output = output[0]  # Remove batch dim

# Apply sigmoid to confidence and softmax to class scores
obj_conf = output[:, 4]
class_probs = output[:, 5:]
# objectness × class score, which gives high precision Grad-CAM
car_scores = obj_conf * class_probs[:, car_class_index]

# Get top car prediction index
if car_scores.max() == 0:
    raise ValueError("No car detected in the raw output.")

top_idx = torch.argmax(car_scores)
score = car_scores[top_idx]
# score = car_scores.mean()####################################################


# Backward from the top car score
model.model.zero_grad()
score.backward(retain_graph=True)

# Compute Grad-CAM
weights = gradients.mean(dim=(2, 3), keepdim=True)
cam = (weights * activations).sum(dim=1, keepdim=True)
cam = F.relu(cam)

# Normalize and resize CAM
cam = cam.squeeze().detach().cpu().numpy()
cam = cv2.resize(cam, (640, 640))
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)

# Create overlay
original_np = np.array(original_image.resize((640, 640))) / 255.0
heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
overlay = heatmap + original_np
overlay = overlay / np.max(overlay)

# Display

plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.title("Original")
plt.imshow(original_np)
plt.axis("off")

plt.subplot(1, 3, 2)
plt.title("Grad-CAM Heatmap")
plt.imshow(cam, cmap="jet")
plt.axis("off")


plt.subplot(1, 3, 3)
plt.title("Overlay")
plt.imshow(overlay)
plt.axis("off")

plt.tight_layout()
plt.show()
results[0].show()

## Version 2.0

In [None]:
# i = 7
import torch
import torch.nn.functional as F
from torchvision import transforms
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2

# Load YOLOv11 model
model_path = "/kaggle/input/yolov11m-pt/yolo11m.pt"
model = YOLO(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Load and preprocess image
image_path = "/kaggle/input/kitti-dataset/data_object_image_2/testing/image_2/000002.png"
original_image = Image.open(image_path).convert("RGB")

preprocess = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor()
])
input_tensor = preprocess(original_image).unsqueeze(0).to(device)
input_tensor.requires_grad_()

# Get class names and find 'Car' index
class_names = model.names
car_class_index = [k for k, v in class_names.items() if v.lower() == "car"]
if not car_class_index:
    raise ValueError("Car class not found.")
car_class_index = car_class_index[0]

# Hook Grad-CAM
activations = None
gradients = None

def forward_hook(module, input, output):
    global activations
    activations = output

def backward_hook(module, grad_input, grad_output):
    global gradients
    gradients = grad_output[0]

# Register hooks
target_layer = model.model.model[16]  # Should be a deep conv layer
target_layer.register_forward_hook(forward_hook)
target_layer.register_full_backward_hook(backward_hook)

# Forward pass through raw model
output = model.model(input_tensor)[0]  # shape: (1, N, 5 + num_classes)
output = output[0]  # Remove batch dim

# Compute car class scores
obj_conf = output[:, 4]
class_probs = output[:, 5:]
car_scores = obj_conf * class_probs[:, car_class_index]

# Select top car
if car_scores.max() == 0:
    raise ValueError("No car detected in raw output.")
    
top_idx = torch.argmax(car_scores)
score = car_scores[top_idx]
# Get top-k car detections
# top_k = 5  # You can tune this number
# topk_scores, topk_indices = torch.topk(car_scores, k=top_k)

# # Filter zero-score predictions
# non_zero = topk_scores > 0
# if non_zero.sum() == 0:
#     raise ValueError("No high-scoring car predictions.")
# filtered_scores = topk_scores[non_zero]

# # Take mean score for Grad-CAM
# score = filtered_scores.mean()



# Backward pass for Grad-CAM
model.model.zero_grad()
score.backward(retain_graph=True)

# Grad-CAM computation
weights = gradients.mean(dim=(2, 3), keepdim=True)
cam = (weights * activations).sum(dim=1, keepdim=True)
cam = F.relu(cam)

# Normalize CAM using Clipping Percentiles
cam = cam.squeeze().detach().cpu().numpy()
cam = cv2.resize(cam, (640, 640))

# Compute 1st and 99th percentiles
p1, p99 = np.percentile(cam, 1), np.percentile(cam, 99)

# Clip and rescale to [0, 1]
cam = np.clip(cam, p1, p99)
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)

# Create heatmap overlay
original_np = np.array(original_image.resize((640, 640))) / 255.0
heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
overlay = heatmap + original_np
overlay = overlay / np.max(overlay)

# Display
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.title("Original")
plt.imshow(original_np)
plt.axis("off")

plt.subplot(1, 3, 2)
plt.title("Grad-CAM Heatmap")
plt.imshow(cam, cmap="jet")
plt.axis("off")

plt.subplot(1, 3, 3)
plt.title("Overlay")
plt.imshow(overlay)
plt.axis("off")

plt.tight_layout()
plt.show()

# Show detection result
results[0].show()


In [None]:
# i = 7
import torch
import torch.nn.functional as F
from torchvision import transforms
from ultralytics import YOLO
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2

# Load YOLOv11 model
model_path = "/kaggle/input/yolov11m-pt/yolo11m.pt"
model = YOLO(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Load and preprocess image
image_path = "/kaggle/input/kitti-dataset/data_object_image_2/testing/image_2/000002.png"
original_image = Image.open(image_path).convert("RGB")

preprocess = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor()
])
input_tensor = preprocess(original_image).unsqueeze(0).to(device)
input_tensor.requires_grad_()

# Get class names and find 'Car' index
class_names = model.names
car_class_index = [k for k, v in class_names.items() if v.lower() == "car"]
if not car_class_index:
    raise ValueError("Car class not found.")
car_class_index = car_class_index[0]

# Hook Grad-CAM
activations = None
gradients = None

def forward_hook(module, input, output):
    global activations
    activations = output

def backward_hook(module, grad_input, grad_output):
    global gradients
    gradients = grad_output[0]

# Register hooks
target_layer = model.model.model[16]  # Should be a deep conv layer
target_layer.register_forward_hook(forward_hook)
target_layer.register_full_backward_hook(backward_hook)

# Forward pass through raw model
output = model.model(input_tensor)[0]  # shape: (1, N, 5 + num_classes)
output = output[0]  # Remove batch dim

# Compute car class scores
obj_conf = output[:, 4]
class_probs = output[:, 5:]
car_scores = obj_conf * class_probs[:, car_class_index]

# Select top car
if car_scores.max() == 0:
    raise ValueError("No car detected in raw output.")
    
# top_idx = torch.argmax(car_scores)
# score = car_scores[top_idx]
# Get top-k car detections
top_k = 5  # You can tune this number
topk_scores, topk_indices = torch.topk(car_scores, k=top_k)

# Filter zero-score predictions
non_zero = topk_scores > 0
if non_zero.sum() == 0:
    raise ValueError("No high-scoring car predictions.")
filtered_scores = topk_scores[non_zero]

# Take mean score for Grad-CAM
score = filtered_scores.mean()



# Backward pass for Grad-CAM
model.model.zero_grad()
score.backward(retain_graph=True)

# Grad-CAM computation
weights = gradients.mean(dim=(2, 3), keepdim=True)
cam = (weights * activations).sum(dim=1, keepdim=True)
cam = F.relu(cam)

# Normalize CAM using Clipping Percentiles
cam = cam.squeeze().detach().cpu().numpy()
cam = cv2.resize(cam, (640, 640))

# Compute 1st and 99th percentiles
p1, p99 = np.percentile(cam, 1), np.percentile(cam, 99)

# Clip and rescale to [0, 1]
cam = np.clip(cam, p1, p99)
cam = (cam - cam.min()) / (cam.max() - cam.min() + 1e-6)

# Create heatmap overlay
original_np = np.array(original_image.resize((640, 640))) / 255.0
heatmap = cv2.applyColorMap(np.uint8(255 * cam), cv2.COLORMAP_JET)
heatmap = np.float32(heatmap) / 255
overlay = heatmap + original_np
overlay = overlay / np.max(overlay)

# Display
plt.figure(figsize=(15, 5))
plt.subplot(1, 3, 1)
plt.title("Original")
plt.imshow(original_np)
plt.axis("off")

plt.subplot(1, 3, 2)
plt.title("Grad-CAM Heatmap")
plt.imshow(cam, cmap="jet")
plt.axis("off")

plt.subplot(1, 3, 3)
plt.title("Overlay")
plt.imshow(overlay)
plt.axis("off")

plt.tight_layout()
plt.show()

# Show detection result
results[0].show()

In [None]:
print(model.names)


# Integrated Gradients

In [None]:
!pip install captum

In [None]:
# ✅ Step 1: Setup – Imports & Model Loading
import torchvision.transforms as T
import numpy as np
import matplotlib.pyplot as plt
from captum.attr import IntegratedGradients

model.eval()

In [None]:
# ✅ Step 2: Prepare a Test Image
from PIL import Image

# Load and preprocess the image
image_path = "/kaggle/input/kitti-dataset/data_object_image_2/testing/image_2/000002.png"
raw_image = Image.open(image_path).convert("RGB")

transform = T.Compose([
    T.Resize((640, 640)),
    T.ToTensor(),
])

input_image = transform(raw_image).unsqueeze(0)  # Shape: [1, 3, H, W]
input_image.requires_grad = True

In [None]:
input_image.size()

In [None]:
predictions = model(input_image)

In [None]:
def forward_for_car_class(input_tensor, target_box_idx):
    """
    Returns the confidence score for class 'Car' (index 2) for a specific detection box.
    """
    with torch.no_grad():
        predictions = model(input_tensor)[0]  # Shape: [num_boxes, 5 + num_classes]

    car_class_index = 2  # Focus only on 'Car'
    return predictions[target_box_idx,  5+car_class_index].unsqueeze(0)  # Shape: [1]


In [None]:
import matplotlib.pyplot as plt
import torch

# Example tensor: shape [3, H, W] or [1, 3, H, W]
image_tensor = input_image.squeeze(0)  # if shape is [1, 3, H, W]

# Convert from [C, H, W] to [H, W, C] and detach from graph
image_np = image_tensor.permute(1, 2, 0).detach().cpu().numpy()

# Optional: if image values are in [0,1], display directly. If in [0,255], cast to uint8
plt.imshow(image_np)
plt.axis('off')
plt.show()

In [None]:
# Set your target class and detection box index manually for now
target_box_index = 0  # e.g., first detection
target_class_index = 2  # e.g., 'car'

# Initialize Integrated Gradients
# ig = IntegratedGradients(lambda x: forward_for_target_class(x, target_box_index, target_class_index))
ig = IntegratedGradients(lambda x: forward_for_car_class(x, target_box_index))
# input_image.size()
# Choose a baseline (black image)
# baseline = torch.zeros_like(input_image)
baseline = torch.zeros_like(input_image)
# print(baseline)
# plt.imshow(input_image)
# baseline.size()
# print("Hello")
# Compute attributions
# attributions = ig.attribute(input_image, baseline, target=0, n_steps=50)
attributions = ig.attribute(input_image, baseline, target=0, n_steps=50)

In [None]:
input_image.size()