In [1]:
import torch
from torchvision.models.detection import retinanet_resnet50_fpn_v2
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
def get_model(num_classes):
    model = retinanet_resnet50_fpn_v2(pretrained=True)
    in_feat = model.head.classification_head.conv[0][0].in_channels
    num_anchors = model.head.classification_head.num_anchors
    model.head.classification_head = RetinaNetClassificationHead(
        in_feat, num_anchors, num_classes
    )
    return model

model = get_model(num_classes=5)
model.load_state_dict(torch.load("retinanet_rsod.pth"))
model.eval()





RuntimeError: Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.

In [None]:
from PIL import Image
import torchvision.transforms as T
img = Image.open("/home/saku/2010776109_RUSHO/RSOD_YOLO-20250708T170752Z-1-001/RSOD_YOLO/train/images/aircraft_34.jpg").convert("RGB")
transform = T.Compose([
    T.Resize((640, 640)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])

img_tensor = transform(img).unsqueeze(0) 
model.to('cuda')
model.eval()
with torch.no_grad():
    preds = model(img_tensor.to('cuda'))

print(preds)


In [None]:
import os
import torch
import torchvision.transforms as T
from PIL import Image

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

image_folder = "/home/saku/2010776109_RUSHO/RSOD_YOLO-20250708T170752Z-1-001/RSOD_YOLO/test/test"

# All overpass images
all_overpass_images = [os.path.join(image_folder, f) for f in os.listdir(image_folder)
                       if f.lower().endswith(('.jpg', '.jpeg', '.png')) and 'overpass' in f.lower()]

transform = T.Compose([
    T.ToTensor()
])

class_names = ["__background__", "aircraft", "oiltank", "overpass", "playground"]
score_thresh = 0.6
overpass_label = class_names.index('overpass')

model.eval()

deleted = 0
kept = 0

for image_path in all_overpass_images:
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img_tensor)[0]

    boxes = output['boxes'].cpu().numpy()
    scores = output['scores'].cpu().numpy()
    labels = output['labels'].cpu().numpy()

    detected_overpass = any(
        (label == overpass_label and score >= score_thresh)
        for label, score in zip(labels, scores)
    )

    if not detected_overpass:
        print(f"Deleting: {image_path}")
        os.remove(image_path)
        deleted += 1
    else:
        kept += 1

print(f"\n✅ Done. Deleted: {deleted} images. Kept: {kept} images.")


In [None]:
import os
import random
import torch
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

image_folder = "/home/saku/2010776109_RUSHO/RSOD_YOLO-20250708T170752Z-1-001/RSOD_YOLO/test/test"

all_aircraft_images = [os.path.join(image_folder, f) for f in os.listdir(image_folder) 
                       if f.lower().endswith(('.jpg', '.jpeg', '.png')) and 'aircraft' in f.lower()]

image_paths = random.sample(all_aircraft_images, 5)

transform = T.Compose([
    T.ToTensor()
])

class_names = ["__background__", "aircraft", "oiltank", "overpass", "playground"]
score_thresh = 0.5

model.eval()

fig, axes = plt.subplots(5, 2, figsize=(12, 25))  # 5 rows, 2 columns

for i, image_path in enumerate(image_paths):
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img_tensor)[0]

    boxes = output['boxes'].cpu().numpy()
    scores = output['scores'].cpu().numpy()
    labels = output['labels'].cpu().numpy()

    # Show original image (left column)
    ax_orig = axes[i, 0]
    ax_orig.imshow(img)
    ax_orig.set_title(f"Original Image {i+1}")
    ax_orig.axis('off')

    # Show prediction image (right column)
    ax_pred = axes[i, 1]
    ax_pred.imshow(img)

    for box, score, label in zip(boxes, scores, labels):
        if score < score_thresh:
            continue
        x1, y1, x2, y2 = box
        width, height = x2 - x1, y2 - y1

        rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='r', facecolor='none')
        ax_pred.add_patch(rect)
        ax_pred.text(x1, y1 - 5, f"{class_names[label]} ({score:.2f})", color='red',
                     fontsize=10, backgroundcolor='white')

    ax_pred.set_title(f"Prediction {i+1}")
    ax_pred.axis('off')

plt.tight_layout()
plt.show()


In [None]:
import os
import random
import torch
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

image_folder = "/home/saku/2010776109_RUSHO/RSOD_YOLO-20250708T170752Z-1-001/RSOD_YOLO/test/test"

all_aircraft_images = [os.path.join(image_folder, f) for f in os.listdir(image_folder) 
                       if f.lower().endswith(('.jpg', '.jpeg', '.png')) and 'oiltank' in f.lower()]

image_paths = random.sample(all_aircraft_images, 5)

transform = T.Compose([
    T.ToTensor()
])

class_names = ["__background__", "aircraft", "oiltank", "overpass", "playground"]
score_thresh = 0.5

model.eval()

fig, axes = plt.subplots(5, 2, figsize=(12, 25))  # 5 rows, 2 columns

for i, image_path in enumerate(image_paths):
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img_tensor)[0]

    boxes = output['boxes'].cpu().numpy()
    scores = output['scores'].cpu().numpy()
    labels = output['labels'].cpu().numpy()

    # Show original image (left column)
    ax_orig = axes[i, 0]
    ax_orig.imshow(img)
    ax_orig.set_title(f"Original Image {i+1}")
    ax_orig.axis('off')

    # Show prediction image (right column)
    ax_pred = axes[i, 1]
    ax_pred.imshow(img)

    for box, score, label in zip(boxes, scores, labels):
        if score < score_thresh:
            continue
        x1, y1, x2, y2 = box
        width, height = x2 - x1, y2 - y1

        rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='r', facecolor='none')
        ax_pred.add_patch(rect)
        ax_pred.text(x1, y1 - 5, f"{class_names[label]} ({score:.2f})", color='red',
                     fontsize=10, backgroundcolor='white')

    ax_pred.set_title(f"Prediction {i+1}")
    ax_pred.axis('off')

plt.tight_layout()
plt.show()


In [None]:
import os
import random
import torch
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

image_folder = "/home/saku/2010776109_RUSHO/RSOD_YOLO-20250708T170752Z-1-001/RSOD_YOLO/test/test"

# All 'overpass' images filenames
all_overpass_images = [os.path.join(image_folder, f) for f in os.listdir(image_folder) 
                      if f.lower().endswith(('.jpg', '.jpeg', '.png')) and 'overpass' in f.lower()]

transform = T.Compose([
    T.ToTensor()
])

class_names = ["__background__", "aircraft", "oiltank", "overpass", "playground"]
score_thresh = 0.7
overpass_label = class_names.index('overpass')  # usually 3

model.eval()

selected_images = []
selected_outputs = []

# To have a pool to sample from (20 or all if fewer)
pool_size = min(20, len(all_overpass_images))
random_pool = random.sample(all_overpass_images, pool_size)

for image_path in random_pool:
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img_tensor)[0]

    boxes = output['boxes'].cpu().numpy()
    scores = output['scores'].cpu().numpy()
    labels = output['labels'].cpu().numpy()

    # Check if any detected box is 'overpass' with score >= threshold
    detected_overpass = any((label == overpass_label and score >= score_thresh) 
                            for label, score in zip(labels, scores))

    if detected_overpass:
        selected_images.append(img)
        selected_outputs.append(output)
    
    if len(selected_images) == 5:
        break

if len(selected_images) == 0:
    print("No overpass detections found with the given threshold.")
else:
    fig, axes = plt.subplots(len(selected_images), 2, figsize=(12, 5 * len(selected_images)))

    for i, (img, output) in enumerate(zip(selected_images, selected_outputs)):
        boxes = output['boxes'].cpu().numpy()
        scores = output['scores'].cpu().numpy()
        labels = output['labels'].cpu().numpy()

        # Show original image (left column)
        ax_orig = axes[i, 0] if len(selected_images) > 1 else axes[0]
        ax_orig.imshow(img)
        ax_orig.set_title(f"Original Image {i+1}")
        ax_orig.axis('off')

        # Show prediction image (right column)
        ax_pred = axes[i, 1] if len(selected_images) > 1 else axes[1]
        ax_pred.imshow(img)

        for box, score, label in zip(boxes, scores, labels):
            if score < score_thresh:
                continue
            if label != overpass_label:
                continue
            x1, y1, x2, y2 = box
            width, height = x2 - x1, y2 - y1

            rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='r', facecolor='none')
            ax_pred.add_patch(rect)
            ax_pred.text(x1, y1 - 5, f"{class_names[label]} ({score:.2f})", color='red',
                         fontsize=10, backgroundcolor='white')

        ax_pred.set_title(f"Prediction {i+1}")
        ax_pred.axis('off')

    plt.tight_layout()
    plt.show()


In [None]:
import os
import random
import torch
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

image_folder = "/home/saku/2010776109_RUSHO/RSOD_YOLO-20250708T170752Z-1-001/RSOD_YOLO/test/test"

# All 'overpass' images filenames
all_overpass_images = [os.path.join(image_folder, f) for f in os.listdir(image_folder) 
                      if f.lower().endswith(('.jpg', '.jpeg', '.png')) and 'playground' in f.lower()]

transform = T.Compose([
    T.ToTensor()
])

class_names = ["__background__", "aircraft", "oiltank", "overpass", "playground"]
score_thresh = 0.5
overpass_label = class_names.index('playground')  # usually 4

model.eval()

selected_images = []
selected_outputs = []

# To have a pool to sample from (20 or all if fewer)
pool_size = min(20, len(all_overpass_images))
random_pool = random.sample(all_overpass_images, pool_size)

for image_path in random_pool:
    img = Image.open(image_path).convert("RGB")
    img_tensor = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(img_tensor)[0]

    boxes = output['boxes'].cpu().numpy()
    scores = output['scores'].cpu().numpy()
    labels = output['labels'].cpu().numpy()

    # Check if any detected box is 'overpass' with score >= threshold
    detected_overpass = any((label == overpass_label and score >= score_thresh) 
                            for label, score in zip(labels, scores))

    if detected_overpass:
        selected_images.append(img)
        selected_outputs.append(output)
    
    if len(selected_images) == 5:
        break

if len(selected_images) == 0:
    print("No overpass detections found with the given threshold.")
else:
    fig, axes = plt.subplots(len(selected_images), 2, figsize=(12, 5 * len(selected_images)))

    for i, (img, output) in enumerate(zip(selected_images, selected_outputs)):
        boxes = output['boxes'].cpu().numpy()
        scores = output['scores'].cpu().numpy()
        labels = output['labels'].cpu().numpy()

        # Show original image (left column)
        ax_orig = axes[i, 0] if len(selected_images) > 1 else axes[0]
        ax_orig.imshow(img)
        ax_orig.set_title(f"Original Image {i+1}")
        ax_orig.axis('off')

        # Show prediction image (right column)
        ax_pred = axes[i, 1] if len(selected_images) > 1 else axes[1]
        ax_pred.imshow(img)

        for box, score, label in zip(boxes, scores, labels):
            if score < score_thresh:
                continue
            if label != overpass_label:
                continue
            x1, y1, x2, y2 = box
            width, height = x2 - x1, y2 - y1

            rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor='r', facecolor='none')
            ax_pred.add_patch(rect)
            ax_pred.text(x1, y1 - 5, f"{class_names[label]} ({score:.2f})", color='red',
                         fontsize=10, backgroundcolor='white')

        ax_pred.set_title(f"Prediction {i+1}")
        ax_pred.axis('off')

    plt.tight_layout()
    plt.show()


In [None]:
import torch
import torchvision.transforms as T
from PIL import Image
import cv2
import os

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

transform = T.Compose([
    T.ToTensor()
])

class_names = ["__background__", "aircraft", "oiltank", "overpass", "playground"]
score_thresh = 0.5

model.eval()

video_path = "/home/saku/2010776109_RUSHO/retinanet_resnet50_fpn/Dubai Airport from Above.mp4"
cap = cv2.VideoCapture(video_path)

fps = int(cap.get(cv2.CAP_PROP_FPS))
width  = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

output_path = "retinanet_output.mp4"
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

frame_count = 0
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    pil_img = Image.fromarray(rgb_frame)

    img_tensor = transform(pil_img).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(img_tensor)[0]

    boxes = output['boxes'].cpu().numpy()
    scores = output['scores'].cpu().numpy()
    labels = output['labels'].cpu().numpy()

    for box, score, label in zip(boxes, scores, labels):
        if score < score_thresh:
            continue
        x1, y1, x2, y2 = box.astype(int)
        label_name = class_names[label]
        cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
        cv2.putText(frame, f"{label_name} {score:.2f}", (x1, y1 - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
    out.write(frame)
    frame_count += 1

    if frame_count % 30 == 0:
        print(f"Processed {frame_count} frames...")

cap.release()
out.release()
print(f"✅ Video saved to {output_path}")
