In [1]:
from typing import Dict, Tuple
import h5py
import numpy as np


def get_path_in_parent(*args):
    return os.path.abspath(os.path.join(os.getcwd(), '..', *args))

def get_path_in_storage(*args):
    return get_path_in_parent("storage", *args)

def get_model_paths(model_num: int):
    path = get_path_in_parent(f"yolo_model_{model_num}")
    return (
        os.path.join(path, f"yolov4-tiny-logistics_size_416_{model_num}.weights"),
        os.path.join(path, f"yolov4-tiny-logistics_size_416_{model_num}.cfg")
    )

def get_outputs(model_num: int) -> Dict[str, Tuple[np.ndarray, ...]]:
    net = cv2.dnn.readNet(*get_model_paths(model_num))

    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    outputs = {}
    logistics_path = get_path_in_storage("logistics")
    for filename in os.listdir(logistics_path):
        if not filename.lower().endswith(".jpg"):
            continue

        image = np.array(Image.open(os.path.join(logistics_path, filename)))

        blob = cv2.dnn.blobFromImage(image,
                                     scalefactor = 1 / 255.,
                                     size=(416, 416),
                                     mean=(0, 0, 0),
                                     swapRB=True,
                                     crop=False)

        net.setInput(blob)

        outputs[filename[:-4]] = net.forward(output_layers)

    return outputs

def save_outputs(model_num: int, outputs: Dict[str, Tuple[np.ndarray, ...]]) -> None:
    """Save a dict of tuples of arrays to an HDF5 file."""
    path = get_path_in_storage(f"outputs_{model_num}.h5")
    with h5py.File(path, "w") as f:
        for key, tup in outputs.items():
            grp = f.create_group(str(key))
            for i, arr in enumerate(tup):
                grp.create_dataset(
                    f"array_{i}", data=arr, compression="gzip", compression_opts=1
                )

def load_outputs(model_num: int) -> Dict[str, Tuple[np.ndarray, ...]]:
    """Load a dict of tuples of arrays from an HDF5 file."""
    path = get_path_in_storage(f"outputs_{model_num}.h5")
    outputs_loaded = {}
    with h5py.File(path, "r") as f:
        for key in f.keys():
            grp = f[key]
            arrays = tuple(np.array(grp[subkey]) for subkey in sorted(grp.keys()))
            outputs_loaded[key] = arrays
    return outputs_loaded

def apply_nms(outputs: Dict[str, Tuple[np.ndarray, ...]],
              conf_threshold: float = 0.5,
              nms_threshold: float = 0.4
             ) -> Dict[str, Tuple[np.ndarray, np.ndarray, np.ndarray]]:
    """
    Apply Non-Max Suppression (NMS) to YOLO-style outputs.

    Args:
        outputs (dict): filename -> raw outputs from get_outputs()
        conf_threshold (float): confidence threshold
        nms_threshold (float): IoU threshold for NMS

    Returns:
        dict: filename -> (boxes, confidences, class_ids)
    """
    filtered_outputs = {}

    for fname, layer_outputs in outputs.items():
        boxes, confidences, class_ids = [], [], []

        # layer_outputs is a tuple of arrays (from YOLO forward pass)
        for out in layer_outputs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = detection[4] * scores[class_id]

                if confidence > conf_threshold:
                    # YOLO gives relative coords
                    # We don’t know original image size here,
                    # so assume normalization done elsewhere
                    cx, cy, w, h = detection[0:4]
                    x = cx - w / 2
                    y = cy - h / 2
                    boxes.append([int(x), int(y), int(w), int(h)])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Apply NMS
        indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

        final_boxes, final_confs, final_classes = [], [], []
        if len(indices) > 0:
            for i in indices.flatten():
                final_boxes.append(boxes[i])
                final_confs.append(confidences[i])
                final_classes.append(class_ids[i])

        filtered_outputs[fname] = (
            np.array(final_boxes),
            np.array(final_confs),
            np.array(final_classes)
        )

    return filtered_outputs

# First time running model

In [None]:
outputs_1 = get_outputs(1)

In [5]:
save_outputs(1, outputs_1)

In [6]:
outputs_2 = get_outputs(2)
save_outputs(2, outputs_2)

# After first time

In [2]:
outputs_1 = load_outputs(1)

In [3]:
outputs_2 = load_outputs(2)

# Calculate Metrics

In [4]:
def get_ground_truths() -> Dict[str, np.ndarray]:
    logistics_path = get_path_in_storage("logistics")
    res = {}

    for filename in os.listdir(logistics_path):
        if filename.endswith(".txt"):
            filepath = os.path.join(logistics_path, filename)

            with open(filepath, "r") as f:
                lines = []

                for line in f:
                    parts = line.strip().split()
                    if len(parts) == 5:  # expecting 5 values
                        t = (
                            int(parts[0]),
                            float(parts[1]),
                            float(parts[2]),
                            float(parts[3]),
                            float(parts[4]),
                        )
                        lines.append(t)

                res[filename[:-4]] = np.array(lines)

    return res

In [5]:
from torchvision.ops import box_iou
from torch import Tensor
import torch

def cxcywh_to_xyxy(boxes: Tensor) -> Tensor:
    # boxes: (N, 4) in [cx, cy, w, h]
    xyxy = torch.zeros_like(boxes)
    xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2  # x1
    xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2  # y1
    xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2  # x2
    xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2  # y2
    return xyxy

def get_precision_and_recall(outputs: Dict[str, Tuple[np.ndarray, ...]],
                             ground_truths: Dict[str, np.ndarray],
                             confidence_threshold: float,
                             iou_threshold: float) -> Tuple[float, float]:

    correct_predictions = 0
    total_predictions = 0
    total_ground_truths = sum(len(gt) for _, gt in ground_truths.items())

    for key in outputs:
        output = outputs[key]
        gt = ground_truths[key]

        for feature_maps in output:
            for detection in feature_maps:
                box = detection[:4]
                score = detection[4]
                class_scores = detection[5:]
                class_id = np.argmax(class_scores)

                confidence = score * class_scores[class_id]
                if confidence < confidence_threshold:
                    continue

                total_predictions += 1

                # Extract IDs (shape: N,)
                gt_ids = gt[:, 0]

                # Extract boxes (shape: N, 4)
                gt_boxes = gt[:, 1:5]

                boxes_xyxy = cxcywh_to_xyxy(torch.tensor([box]))
                gts_xyxy = cxcywh_to_xyxy(torch.tensor(gt_boxes))

                ious = box_iou(boxes_xyxy, gts_xyxy)[0]
                for iou, gt_id in zip(ious, gt_ids):
                    if iou >= iou_threshold and class_id == gt_id:
                        # This is a correct prediction
                        correct_predictions += 1
                        continue

    print(correct_predictions, total_predictions, total_ground_truths)
    return correct_predictions / total_predictions, correct_predictions / total_ground_truths

In [6]:
gts = get_ground_truths()

In [7]:
get_precision_and_recall(outputs_1, gts, 0.5, 0.4)

  boxes_xyxy = cxcywh_to_xyxy(torch.tensor([box]))


11109 12059 36721


(0.9212206650634381, 0.3025244410555268)

In [8]:
get_precision_and_recall(outputs_2, gts, 0.5, 0.4)

15183 16082 36721


(0.9440989926626041, 0.4134691321042455)

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Sweep over confidence thresholds
thresholds = np.linspace(0, 1, 50)
iou_threshold = 0.4

precisions_1, recalls_1 = [], []
precisions_2, recalls_2 = [], []

for t in thresholds:
    p1, r1 = get_precision_and_recall(outputs_1, gts, t, iou_threshold)
    p2, r2 = get_precision_and_recall(outputs_2, gts, t, iou_threshold)

    precisions_1.append(p1)
    recalls_1.append(r1)
    precisions_2.append(p2)
    recalls_2.append(r2)

# Plot PR curves
plt.figure(figsize=(8,6))
plt.plot(recalls_1, precisions_1, label="Model 1", marker="o", markersize=3, linestyle="-")
plt.plot(recalls_2, precisions_2, label="Model 2", marker="s", markersize=3, linestyle="-")

plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.legend()
plt.grid(True)
plt.show()

# Debugging / Testing

In [6]:

from PIL import Image

# Your class names
classes = [
    "barcode",
    "car",
    "cardboard box",
    "fire",
    "forklift",
    "freight container",
    "gloves",
    "helmet",
    "ladder",
    "license plate",
    "person",
    "qr code",
    "road sign",
    "safety vest",
    "smoke",
    "traffic cone",
    "traffic light",
    "truck",
    "van",
    "wood pallet"
]

def run_single_prediction(model_num: int, image_path: str, output_path: str,
                          conf_threshold: float = 0.5, nms_threshold: float = 0.4):

    # Load YOLO model
    net = cv2.dnn.readNet(*get_model_paths(model_num))

    # Get output layers
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    # Load image
    image = np.array(Image.open(image_path))
    height, width = image.shape[:2]

    # Preprocess image
    blob = cv2.dnn.blobFromImage(image, 1/255.0, (416, 416), (0,0,0), swapRB=True, crop=False)
    net.setInput(blob)

    # Run inference
    layer_outputs = net.forward(output_layers)

    boxes, confidences, class_ids = [], [], []

    # Process YOLO detections
    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > conf_threshold:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply NMS
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

    # Draw boxes
    for i in indices:
        i = i[0] if isinstance(i, (list, np.ndarray)) else i
        x, y, w, h = boxes[i]
        label = classes[class_ids[i]]
        conf = confidences[i]

        color = (0, 255, 0)
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, f"{label} {conf:.2f}", (x, y - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    # Save output
    cv2.imwrite(output_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))


# Usage:
image_file = "0bf5b2b81734d346d8c5034b92b5077e1625209617_jpeg_jpg.rf.04c6b8f7f044626b3f90e76296acb6b4.jpg"
run_single_prediction(1, image_file, "test_pred.jpg")


In [9]:
import cv2
import os
import numpy as np
from typing import List, Tuple

def draw_predictions(outputs: List[Tuple[np.ndarray, ...]],
                     model_num: int,
                     class_names: List[str],
                     conf_threshold: float = 0.5,
                     nms_threshold: float = 0.4):
    logistics_path = get_path_in_storage("logistics")
    prediction_path = get_path_in_storage("prediction")
    os.makedirs(prediction_path, exist_ok=True)

    # Load image filenames in the same order as get_outputs()
    image_files = [f for f in os.listdir(logistics_path) if f.lower().endswith(".jpg")]

    for img_idx, filename in enumerate(image_files):
        image_path = os.path.join(logistics_path, filename)
        image = np.array(Image.open(image_path))
        height, width = image.shape[:2]

        # YOLO raw outputs for this image
        layer_outputs = outputs[img_idx]

        boxes = []
        confidences = []
        class_ids = []

        # Loop through each output layer
        for output in layer_outputs:
            for detection in output:
                scores = detection[5:]  # class scores
                class_id = np.argmax(scores)
                confidence = scores[class_id]

                if confidence > conf_threshold:
                    center_x = int(detection[0] * width)
                    center_y = int(detection[1] * height)
                    w = int(detection[2] * width)
                    h = int(detection[3] * height)

                    x = int(center_x - w / 2)
                    y = int(center_y - h / 2)

                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    class_ids.append(class_id)

        # Apply NMS to filter overlapping boxes
        indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

        # Draw final boxes
        for i in indices:
            i = i[0] if isinstance(i, (list, np.ndarray)) else i
            x, y, w, h = boxes[i]
            label = str(class_names[class_ids[i]])
            conf = confidences[i]

            color = (0, 255, 0)  # Green boxes
            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
            cv2.putText(image, f"{label} {conf:.2f}",
                        (x, y - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

        # Save prediction image
        out_path = os.path.join(prediction_path, filename)
        cv2.imwrite(out_path, cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

draw_predictions(outputs_1, 1, [
    "barcode",
    "car",
    "cardboard box",
    "fire",
    "forklift",
    "freight container",
    "gloves",
    "helmet",
    "ladder",
    "license plate",
    "person",
    "qr code",
    "road sign",
    "safety vest",
    "smoke",
    "traffic cone",
    "traffic light",
    "truck",
    "van",
    "wood pallet"
])

In [14]:
os.listdir(get_path_in_storage("logistics")) == os.listdir(get_path_in_storage("logistics"))

True