In [None]:
import sys; sys.path.append("../src")
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
import sys; sys.path.append("../src", "../../src")
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import json
from functools import partial
from pathlib import Path
from PIL import Image
import numpy as np

import cv2
import time
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as T
import torchvision.transforms as transforms
from torch.utils.data import ConcatDataset
from torch.utils.data import Dataset, random_split, DataLoader
from torchvision.utils import draw_bounding_boxes

from contextlib import redirect_stdout
from pathlib import Path
print(torch.cuda.is_available())

In [None]:
from model_inference import compare_two_models, visualize_detections, display_text_block, visualize_and_save, infer_image, infer_fp16_image, visualize_fp16
from compare_videos import play_top_bottom, play_side_by_side

In [None]:
# MODELS - https://github.com/pytorch/vision/tree/main/torchvision/models/detection
from torchvision.models.detection import (
    fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights,
    fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights,
    fasterrcnn_mobilenet_v3_large_320_fpn, FasterRCNN_MobileNet_V3_Large_320_FPN_Weights,
    ssd300_vgg16, SSD300_VGG16_Weights,
    ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights,
    retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights,
    fcos_resnet50_fpn, FCOS_ResNet50_FPN_Weights
)
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.retinanet import RetinaNetClassificationHead
from torchvision.models.detection.fcos import FCOSHead

In [None]:
num_classes = 2  # background + 1 class (adjust if needed)
label_map = {1: 'object'}  # your label map
score_thr = 0.5

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
v1_model_path = Path("../models/v1/v1_rcnn_resnet50.pth")
print(f"\033[91m{v1_model_path}\033[0m")

v1_model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1)
# get number of input features for the classifier
in_features = v1_model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
v1_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

v1_model.load_state_dict(torch.load(v1_model_path, weights_only=False,  map_location=torch.device('cpu')))
v1_model.to(device).eval()

In [None]:
v2_model_path = Path("../models/v2/v2_rcnn_resnet50.pth")
print(f"\033[91m{v2_model_path}\033[0m")

v2_model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1)
# get number of input features for the classifier
in_features = v2_model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
v2_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

v2_model.load_state_dict(torch.load(v2_model_path, weights_only=False,  map_location=torch.device('cpu')))
v2_model.to(device).eval()

In [None]:
kd_model_path = Path("../models/distilled/kd_resize_rot_frcnn_mobilenet_epoch8.pth")
print(f"\033[91m{kd_model_path}\033[0m")

kd_model = fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1)
in_feat = kd_model.roi_heads.box_predictor.cls_score.in_features
kd_model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, num_classes)

kd_model.load_state_dict(torch.load(kd_model_path, weights_only=False,  map_location=torch.device('cpu')))
kd_model.to(device).eval()

### v1 vs v2 model

In [None]:
# -----------------------
# Run on all test images
# -----------------------
test_dir = "../data/test_images"
for fname in os.listdir(test_dir):
    if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")):
        continue
    img_path = os.path.join(test_dir, fname)
    out_path = compare_two_models(img_path, v1_model, "v1_model", v2_model, "v2_model", label_map, score_thr)
    print(f"Saved: {out_path}")

### v2 vs KD model

In [None]:
# -----------------------
# Run on all test images
# -----------------------
test_dir = "../data/test_images"
for fname in os.listdir(test_dir):
    if not fname.lower().endswith((".jpg", ".jpeg", ".png", ".bmp", ".tif", ".tiff")):
        continue
    img_path = os.path.join(test_dir, fname)
    out_path = compare_two_models(img_path, v2_model, "v2_model", kd_model, "kd_model", label_map, score_thr)
    print(f"Saved: {out_path}")

# Qunantized Int8 Model
scripted model

In [None]:
# Paths
save_dir = Path("../models/frcnn_mobilenet_quantized")
scripted_path = save_dir / "model_scripted_quant.pt"   # produced by torch.jit.save(...)
test_images_dir = Path("../data/test_images")          # <-- change to your test folder
out_dir = Path("../outputs/quant_infer")
out_dir.mkdir(parents=True, exist_ok=True)

score_thresh = 0.5   # tweak as needed
max_dets     = None  # e.g., 50

# Device: quantized models run on CPU
device = torch.device("cpu")

In [None]:
# 1) Pick the CPU int8 backend:
# - 'qnnpack' works everywhere and is best for ARM/mac/x86 generally
# - 'fbgemm' is best for x86 AVX2/AVX512 servers
torch.backends.quantized.engine = "qnnpack"  # or "fbgemm"

# 2) Load on CPU (quantized models must run on CPU)
quant_model = torch.jit.load(scripted_path, map_location=device)
quant_model.eval()

# Sanity: make sure the model is on CPU
for n, m in quant_model.named_modules():
    if hasattr(m, "weight"):
        try:
            _ = m.weight.device
        except Exception:
            pass

print("Loaded quantized TorchScript model on CPU with engine:", torch.backends.quantized.engine)

# Preprocessing: Faster R-CNN expects a FloatTensor [0,1], CxHxW
to_tensor = T.ToTensor()

# Optional: class map (index -> name). Edit to your dataset.
# 0 is background by convention; your training 'num_classes' included it.
id2name = {
    1: "object",  # <-- replace with your class names
    # 2: "another_class",
}

# Simple inference helper
to_tensor = T.ToTensor()

In [None]:
# Run on a folder of test images
img_paths = sorted([p for p in test_images_dir.glob("*") if p.suffix.lower() in {".jpg", ".jpeg", ".png"}])
print(f"Found {len(img_paths)} test images.")

for p in img_paths:
    try:
        im = Image.open(p).convert("RGB")
        det = infer_image(quant_model, im, score_thresh=score_thresh, max_dets=max_dets)
        out_path = out_dir / p.name
        visualize_and_save(im, det, out_path)
    except Exception as e:
        print(f"Failed on {p}: {e}")

print(f"Done. Visualized detections saved to: {out_dir.resolve()}")

# Quantized float16 
not scripted model

In [None]:
# Paths
save_dir = Path("../models/frcnn_mobilenet_fp16")  # <- where you saved your FP16-scripted model (if you did)
# scripted_path = save_dir / "model_scripted_fp16.pt"
model_path = save_dir / "model_fp16_state_dict.pth"  
test_images_dir = Path("../data/test_images")          # <-- change to your test folder
out_dir = Path("../outputs/fp16_infer")
out_dir.mkdir(parents=True, exist_ok=True)

# Device: prefer CUDA for FP16; CPU stays float32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[FP16] Using device: {device}")

# Run on a folder of test images
score_thresh = 0.5   # tweak as needed
max_dets     = None  # e.g., 50

In [None]:
model = fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1)
in_feat = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, num_classes)
model.load_state_dict(torch.load(model_path, weights_only=False,  map_location=torch.device('cpu')))
model.to(device).eval()

print(f"Loaded model: {model_path.name}")

# Preprocessing: Faster R-CNN expects a FloatTensor [0,1], CxHxW
to_tensor = T.ToTensor()

# Optional: class map (index -> name). Edit to your dataset.
id2name = {
    1: "object",  # <-- replace/add your class names
}

In [None]:
# Run on a folder of test images
img_paths = sorted([p for p in test_images_dir.glob("*") if p.suffix.lower() in {".jpg", ".jpeg", ".png"}])
print(f"Found {len(img_paths)} test images.")

for p in img_paths:
    try:
        im = Image.open(p).convert("RGB")
        det = infer_fp16_image(model, im, score_thresh=score_thresh, max_dets=max_dets)
        out_path = out_dir / p.name
        visualize_and_save(im, det, out_path, id2name=id2name)
    except Exception as e:
        print(f"Failed on {p}: {e}")

print(f"Done. Visualized detections saved to: {out_dir.resolve()}")

# VIDEO Inference

In [None]:
class_map = {1: "object"}  # your label map
num_classes = 2  # 1 class + background; adjust to your training setup

model_dir = "distilled" # "frcnn_mobilenet_inter"
model_name = "kd_resize_rot_frcnn_mobilenet_epoch8.pth"
model_path = Path(f"../models/{model_dir}/{model_name}")

video_name = "100-crop"
model_type = "KD"
output_video_name = f"output_{video_name}_{model_type}model"

In [None]:
#### mobilenet_v3_large_fpn #####
print(f"\033[91m{model_path}\033[0m")
model = fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1)
in_feat = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, num_classes)
state = torch.load(model_path, weights_only=False, map_location="cpu")  # load to CPU first (safer)
model.load_state_dict(state, strict=True)
model.to(device).eval()

# ##### resnet50_fpn_v2 backbone #####
# print(f"\033[91m{model_path}\033[0m")
# model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.COCO_V1)
# # get number of input features for the classifier
# in_features = model.roi_heads.box_predictor.cls_score.in_features
# # replace the pre-trained head with a new one
# model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# model.load_state_dict(torch.load(model_path, weights_only=False,  map_location=torch.device('cpu')))
# model.to(device).eval()

In [None]:
# ---- video I/O ----
video_path = f"../videos/{video_name}.avi"
cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), f"Failed to open {video_path}"

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

output_video_path = f"../videos/{output_video_name}.avi"
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video_writer_out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

fps_list = []

In [None]:
# ---- main loop ----
while True:
    start = time.time()
    success, frame_bgr = cap.read()
    if not success:
        break

    # OpenCV -> PIL (RGB)
    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(frame_rgb)

    # run detection + visualization (returns PIL)
    frame_vis_pil = visualize_detections(img_pil, model, class_map)

    # PIL -> OpenCV (BGR)
    frame_vis = cv2.cvtColor(np.array(frame_vis_pil), cv2.COLOR_RGB2BGR)

    # keep original size if needed
    if (frame_vis.shape[1], frame_vis.shape[0]) != (width, height):
        frame_vis = cv2.resize(frame_vis, (width, height), interpolation=cv2.INTER_LINEAR)

    # FPS
    elapsed = time.time() - start
    fps_list.append(1.0 / max(elapsed, 1e-6))
    if len(fps_list) > 30:
        fps_list.pop(0)
    running_fps = float(np.mean(fps_list))

    # overlay stats
    frame_vis = display_text_block(frame_vis, [f"FPS : {running_fps:.2f}"])

    # write
    video_writer_out.write(frame_vis)

print(f"Mean FPS over last window: {running_fps:.2f}")
cap.release()
video_writer_out.release()
print(f"Output video saved at {output_video_path}")


# Compare videos 
Top-Bottom

In [None]:
play_top_bottom(
    "../videos/output_100-crop_KDmodel.avi",
    "../videos/output_100-crop_Prunedmodel.avi",
    save_path="../videos/output_100-crop_KD_vs_Pruned_topbottom.avi",
    codec="XVID",
    display=False
)



In [None]:
play_side_by_side(
    "../videos/output_100-crop_KDmodel.avi",
    "../videos/output_100-crop_Prunedmodel.avi",
    save_path="../videos/output_100-crop_KD vs Pruned.avi",
    codec="XVID",
    display=False
)

# VIDEO Inference - Quantized float16

In [None]:
class_map = {1: "object"}  # your label map
num_classes = 2  # 1 class + background; adjust to your training setup

model_dir = "frcnn_mobilenet_fp16" # "frcnn_mobilenet_inter"
model_name = "model_fp16_state_dict.pth"
model_path = Path(f"../models/{model_dir}/{model_name}")

video_name = "100-crop"
output_video_name = f"output_{video_name}_fl16_quant_model"


# Device: prefer CUDA for FP16; CPU stays float32
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"[FP16] Using device: {device}")

score_thresh = 0.5   # tweak as needed
max_dets     = None  # e.g., 50

In [None]:
model = fasterrcnn_mobilenet_v3_large_fpn(weights=FasterRCNN_MobileNet_V3_Large_FPN_Weights.COCO_V1)
in_feat = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_feat, num_classes)
model.load_state_dict(torch.load(model_path, weights_only=False,  map_location=torch.device('cpu')))
model.to(device).eval()

# Preprocessing: Faster R-CNN expects a FloatTensor [0,1], CxHxW
to_tensor = T.ToTensor()

# Optional: class map (index -> name). Edit to your dataset.
id2name = {
    1: "object",  # <-- replace/add your class names
}

In [None]:
# ---- video I/O ----
video_path = f"../videos/{video_name}.avi"
cap = cv2.VideoCapture(video_path)
assert cap.isOpened(), f"Failed to open {video_path}"

fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

output_video_path = f"../videos/{output_video_name}.avi"
fourcc = cv2.VideoWriter_fourcc(*"XVID")
video_writer_out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

fps_list = []

In [None]:
# ---- main loop ----
while True:
    start = time.time()
    success, frame_bgr = cap.read()
    if not success:
        break

    # OpenCV -> PIL (RGB)
    frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(frame_rgb)

    det = infer_image(model, img_pil, score_thresh=score_thresh, max_dets=max_dets)
    # run detection + visualization (returns PIL)
    frame_vis_pil = visualize_fp16(img_pil, det, class_map)

    # PIL -> OpenCV (BGR)
    frame_vis = cv2.cvtColor(np.array(frame_vis_pil), cv2.COLOR_RGB2BGR)

    # keep original size if needed
    if (frame_vis.shape[1], frame_vis.shape[0]) != (width, height):
        frame_vis = cv2.resize(frame_vis, (width, height), interpolation=cv2.INTER_LINEAR)

    # FPS
    elapsed = time.time() - start
    fps_list.append(1.0 / max(elapsed, 1e-6))
    if len(fps_list) > 30:
        fps_list.pop(0)
    running_fps = float(np.mean(fps_list))

    # overlay stats
    frame_vis = display_text_block(frame_vis, [f"FPS : {running_fps:.2f}"])

    # write
    video_writer_out.write(frame_vis)

print(f"Mean FPS over last window: {running_fps:.2f}")
cap.release()
video_writer_out.release()
print(f"Output video saved at {output_video_path}")
