In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg

In [2]:
def load_annotations(path: str) -> pd.DataFrame:
    """
    Loads a csv-like annotation file with fields ["frame", "ID", "left", "top",
    "width", "height", "confidence", "null1", "null2", "null3"] into a pandas
    dataframe. Check Nvidia AICity challenge readme for further detail.

    Parameters
    ----------
    path: str
        Path string for the input file.

    Returns
    -------
    pd.DataFrame
        Pandas dataframe which contains the loaded csv with only the needed
        columns ["frame", "ID", "left", "top", "width", "height", "confidence"].
    """
    ann = pd.read_csv(
        path,
        sep=",",
        names=["frame", "ID", "left", "top", "width", "height", "confidence",
               "null1", "null2", "null3"]
    )
    ann = ann[["frame", "ID", "left", "top", "width", "height", "confidence"]]
    return ann


In [3]:
def vectorise_annotations(df: pd.DataFrame) -> np.ndarray:
    """
    From a pandas dataframe with the bounding boxes of a single class and a
    single frame, produce a confidence-ordered array of boxes in xyxy format.
    The input dataframe should have the same row indices as load_annotations.

    Parameters
    ----------
    df: pd.DataFrame
        Pandas dataframe with the same format as produced by the function
        load_annotations.

    Returns
    -------
    ArrayLike
        Array of shape Nx4 where N is the number of boxes and each component
        is a xyxy format bounding box (left, top, right, bottom coordinates).

    See Also
    --------
    load_annotations : load_annotations function.
    """
    df.sort_values("confidence")
    xyxy_format = (
        df["left"],
        df["top"],
        df["left"] + df["width"],
        df["top"] + df["height"],
    )
    return np.asarray(xyxy_format).T

In [4]:
def iou(
        gt: np.ndarray,
        pred: np.ndarray
) -> np.ndarray:
    """
    Returns the Intersection over Union of a given set of "pred" grid-aligned
    rectangles against a set of reference "gt" grid-aligned rectangles.

    Parameters
    ----------
    gt : ArrayLike
        Set of grid-aligned rectangles to compare against. Provided as a Nx4
        matrix of N points of (x1, y1, x2, y2) coordinates.
    pred : ArrayLike
        Set of grid-aligned rectangles to be compared. Provided as a Mx4
        matrix of M points of (x1, y1, x2, y2) coordinates.

    Returns
    -------
    ArrayLike:
        A NxM matrix with the IoU of each cmp rectangle against each reference
        rectangle.
    """
    n, m = gt.shape[0], pred.shape[0]
    s_gt = np.stack([gt] * m, axis=1)
    s_pred = np.stack([pred] * n, axis=0)

    # Intersection
    intr_x = np.min(np.stack((s_gt[:, :, 2], s_pred[:, :, 2]), axis=0), axis=0) - \
             np.max(np.stack((s_gt[:, :, 0], s_pred[:, :, 0]), axis=0), axis=0)
    intr_x = np.maximum(intr_x, 0)

    intr_y = np.min(np.stack((s_gt[:, :, 3], s_pred[:, :, 3]), axis=0), axis=0) - \
             np.max(np.stack((s_gt[:, :, 1], s_pred[:, :, 1]), axis=0), axis=0)
    intr_y = np.maximum(intr_y, 0)

    intr_t = intr_x * intr_y

    # Union
    area_r = (s_gt[:, :, 2] - s_gt[:, :, 0]) * (s_gt[:, :, 3] - s_gt[:, :, 1])
    area_c = (s_pred[:, :, 2] - s_pred[:, :, 0]) * (s_pred[:, :, 3] - s_pred[:, :, 1])

    union = area_r + area_c - intr_t

    return intr_t / union

In [5]:
def select_bboxes(inter: np.ndarray, thresh: float) -> np.ndarray:
    """
    From the output of IoU (an NxM matrix where N is the number of ground truth
    samples and M is the number of predictions and the value it contains is the
    Intersection Over Union between the n-th and m-th box), generates the chosen
    predicted bounding boxes assuming they are above a set threshold.

    Parameters
    ----------
    inter: np.ndarray
        NxM matrix where N is the number of ground truth
        samples and M is the number of predictions and the value it contains is the
        intersection-over-union between the n-th and m-th box.
    thresh: float
        Acceptance value for Intersection over Union.

    Returns
    -------
    np.ndarray
        N-length array with the selected predicted box at each position. If a
        gt box has no corresponding prediction, -1 is returned accordingly.
    """
    inter[inter < np.stack([np.max(inter, axis=0)] * inter.shape[0])] = 0.0
    inter = inter > thresh
    ind_max = np.where(
        np.max(inter, axis=1) > thresh,
        np.argmax(inter, axis=1),
        -1
    )

    return ind_max

In [6]:
def average_precision_frame(
        gt: np.ndarray,
        pred: np.ndarray,
        thresh: float
) -> float:
    """
    Computes the average precision for a single frame and class. The input is
    assumed to be filtered accordingly.

    Parameters
    ----------
    gt: ArrayLike
        Array of shape Nx4 where N is the number of boxes and each component
        is a xyxy format bounding box (left, top, right, bottom coordinates).
        These represent the ground truth boxes.
    pred: ArrayLike
        Array of shape Nx4 where N is the number of boxes and each component
        is a xyxy format bounding box (left, top, right, bottom coordinates).
        These represent a predictions' boxes.
    thresh: float
        Intersection-over-union threshold. Whenever the area is below this
        threshold, the prediction is ignored.

    Returns
    -------
    float
        Average precision for the given bounding boxes.
    """
    inter = iou(gt, pred)
    ind_max = select_bboxes(inter, thresh)

    tp_evol = np.cumsum(ind_max >= 0)
    pre = tp_evol / pred.shape[0]
    rec = tp_evol / gt.shape[0]

    curr_max = -1
    out_pre = np.zeros_like(pre)

    for ii in range(len(pre)):
        curr_max = max(pre[len(pre) - ii - 1], curr_max)
        out_pre[len(pre) - ii - 1] = curr_max

    sampling_points = np.arange(0.0, 1.01, 0.1)
    pre_ind = rec[None,:] >= sampling_points[:, None]
    pre_ind = np.argmax(pre_ind, axis=1)

    ap = sum(out_pre[pre_ind]) / 11

    return ap

In [7]:
def draw_boxes(
        frame: np.ndarray,
        gt_coords: np.ndarray,
        pd_coords: np.ndarray,
        out_path:str=None
) -> None:
    """
    Produces a representation of a single frame with the provided ground truth
    and predicted bounding boxes drawn accordingly. If an out_path is provided
    then the output is not shown as a floating plot, being saved instead into
    the specified file instead.

    Parameters
    ----------
    frame: ArrayLike
        Image in RGB format to draw.
    gt_coords: ArrayLike
        Array of shape Nx4 where N is the number of boxes and each component
        is a xyxy format bounding box (left, top, right, bottom coordinates).
        These represent the ground truth boxes.
    pd_coords: ArrayLike
        Array of shape Nx4 where N is the number of boxes and each component
        is a xyxy format bounding box (left, top, right, bottom coordinates).
        These represent the ground truth boxes.
    out_path: str
        Full filename and path of an output image to save the results.

    Returns
    -------
    None
    """
    plt.figure()
    plt.imshow(frame)
    plt.axis("off")

    for ii in range(gt_coords.shape[0]):
        plt.gca().add_patch(
            patches.Rectangle(
                (gt_coords[ii, 0], gt_coords[ii, 1]),
                gt_coords[ii, 2] - gt_coords[ii, 0],
                gt_coords[ii, 3] - gt_coords[ii, 1],
                color="g",
                alpha=0.3,
        ))
    for ii in range(pd_coords.shape[0]):
        plt.gca().add_patch(
            patches.Rectangle(
                (pd_coords[ii, 0], pd_coords[ii, 1]),
                pd_coords[ii, 2] - pd_coords[ii, 0],
                pd_coords[ii, 3] - pd_coords[ii, 1],
                color="r",
                alpha=0.3,
            ))
    if out_path is not None:
        plt.savefig(out_path)
    else:
        plt.show()
    plt.close()

In [8]:
def compute_avg_precision(
        gt_path: str,
        pd_path: str,
        iou_thresh: float,
        alter_prediction: callable = None,
        add_params: dict = None,
) -> dict:
    """
    Computes the average precision from a prediction file w.r.t. a ground truth
    file. Both cases should respect the csv-like annotation format from Nvidia
    AICity challenge.

    Parameters
    ----------
    gt_path: str
        Path to the ground truth file.
    pd_path: str
        Path to the prediction file.
    iou_thresh: float
        IoU threshold above which a prediction is considered correct.
    alter_prediction: callable
        Function to modify the prediction (to test stochastic modifications
        for instance).
    add_params: dict
        Extra parameters for the alter_prediction function.

    Returns
    -------
    dict
        A dictionary containing each frame as key and the computed average
        precision as value.

    See Also
    --------
    load_annotations : load_annotations function.
    """
    truth = load_annotations(gt_path)
    pred = load_annotations(pd_path)

    if alter_prediction is not None:
        pred = alter_prediction(pred, **add_params)

    frame_indices = np.unique(np.concatenate([
        pd.unique(truth["frame"]), pd.unique(pred["frame"])
    ]))

    output = {}

    for frame_id in frame_indices:
        gt_frame = vectorise_annotations(truth[truth["frame"] == frame_id])
        pd_frame = vectorise_annotations(pred[pred["frame"] == frame_id])

        if gt_frame.shape[0] == 0 or pd_frame.shape[0] == 0:
            output[frame_id] = 0.0
        else:
            output[frame_id] = average_precision_frame(gt_frame, pd_frame, iou_thresh)

    return output


In [9]:
def generate_gt_from_xml(in_path: str, out_path: str) -> None:
    dataset = ET.parse(in_path).getroot()

    # Separate annotations by frames. We do not care about the classes for the time
    # being, we only grab cars

    annotations = []

    # [frame, ID, left, top, width, height, 1, -1, -1, -1]
    for track in dataset.findall("track"):
        if track.attrib["label"] == "car":
            for box in track.findall("box"):
                annot = (
                    box.attrib["frame"], track.attrib["id"],
                    box.attrib["xtl"], box.attrib["ytl"],
                    float(box.attrib["xbr"]) - float(box.attrib["xtl"]),
                    float(box.attrib["ybr"]) - float(box.attrib["ytl"]),
                    1, -1, -1, -1
                )

                # Some functional magic to convert all elements in the tuple to
                # strings of integer numbers
                annot = tuple(map(lambda x: str(int(float(x))), annot))
                annotations.append(annot)

    with open(out_path, 'w') as f_gt:
        for ii in annotations:
            f_gt.write(",".join(ii) + "\n")

In [10]:
def dropout_predictions(pred: pd.DataFrame, prob: float) -> pd.DataFrame:
    decision = np.random.rand(len(pred)) > prob
    pred = pred[decision]
    return pred

In [11]:
def offset_predictions(pred: pd.DataFrame, offset: int) -> pd.DataFrame:
    pred[["left", "top"]] = pred[["left", "top"]] + offset
    return pred

In [12]:
def iou_offset(gt: np.ndarray, offset: int) -> float:
    width = gt[:,2] - gt[:,0]
    height = gt[:,3] - gt[:,1]

    intersect = (width - offset) * (height - offset)
    union = (2 * width * height) - intersect

    return intersect / union

In [13]:
def test_iou(
        gt_path: str,
        offset: int
    ) -> dict:

    truth = load_annotations(gt_path)

    frame_indices = pd.unique(truth["frame"])
    correct = 0
    total = 0

    for frame_id in frame_indices:
        gt_frame = vectorise_annotations(truth[truth["frame"] == frame_id])
        iou_normal = np.max(iou(gt_frame, gt_frame + offset), axis=1)
        iou_theory = iou_offset(gt_frame, offset)

        if np.any(np.not_equal(iou_normal, iou_theory)):
            print(frame_id, iou_theory, iou_normal)
            img = mpimg.imread(f"/home/pau/Documents/master/M6/project/data/"
                               f"AICity_data/AICity_data/train/S03/c010/"
                               f"vdo_frames/{frame_id:05}.jpg")
            draw_boxes(img, gt_frame, gt_frame + offset)

        correct += np.count_nonzero(iou_normal == iou_theory)
        total += np.prod(iou_normal.shape)

    return correct / total

In [80]:
ANNOTATION_PATH = "/home/pau/Documents/master/M6/project/data/ai_challenge_s03_c010-full_annotation.xml"
VIDEO_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/vdo_frames"
GT_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/gt/gt.txt"
GT_FANCY_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/gt/gt_challenge.txt"
OUT_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/vdo_gt_yolo"

DET_RCNN_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/det/det_mask_rcnn.txt"
DET_SSD_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/det/det_ssd512.txt"
DET_YOLO_PATH = "/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/det/det_yolo3.txt"

In [16]:
ANNOTATION_PATH = "E:/Master/M6 - Video analysis/Project/annotations.xml"
VIDEO_PATH = "E:/Master/M6 - Video analysis/Project/AICity_data/train/S03/c010/vdo_frames"
GT_PATH = "E:/Master/M6 - Video analysis/Project/AICity_data/train/S03/c010/gt/gt.txt"
GT_FANCY_PATH = "E:\Master\M6 - Video analysis\Project/AICity_data/train/S03/c010/gt/gt_challenge.txt"
OUT_PATH = "E:\Master\M6 - Video analysis\Project/AICity_data/train/S03/c010/vdo_gt_yolo"

DET_RCNN_PATH = "E:\Master\M6 - Video analysis\Project/AICity_data/train/S03/c010/det/det_mask_rcnn.txt"
DET_SSD_PATH = "E:\Master\M6 - Video analysis\Project/AICity_data/train/S03/c010/det/det_ssd512.txt"
DET_YOLO_PATH = "E:\Master\M6 - Video analysis\Project/AICity_data/train/S03/c010/det/det_yolo3.txt"

In [17]:
gt = load_annotations(GT_PATH)
det_yolo = load_annotations(DET_YOLO_PATH)
det_ssd = load_annotations(DET_SSD_PATH)
det_rcnn = load_annotations(DET_RCNN_PATH)

In [18]:
test_iou(GT_PATH, 25)

1.0

In [19]:
test_iou(GT_PATH, 50)

465 [0.23374391 0.20467255 0.40143893] [0.34080358 0.20467255 0.40143893]


FileNotFoundError: [Errno 2] No such file or directory: '/home/pau/Documents/master/M6/project/data/AICity_data/AICity_data/train/S03/c010/vdo_frames/00465.jpg'

In [20]:
compute_avg_precision(GT_PATH, GT_PATH, 0.5)

{218: 1.0,
 219: 1.0,
 220: 1.0,
 221: 1.0,
 222: 1.0,
 223: 1.0,
 224: 1.0,
 225: 1.0,
 226: 1.0,
 227: 1.0,
 228: 1.0,
 229: 1.0,
 230: 1.0,
 231: 1.0,
 232: 1.0,
 233: 1.0,
 234: 1.0,
 235: 1.0,
 236: 1.0,
 237: 1.0,
 238: 1.0,
 239: 1.0,
 240: 1.0,
 241: 1.0,
 242: 1.0,
 243: 1.0,
 244: 1.0,
 245: 1.0,
 246: 1.0,
 247: 1.0,
 248: 1.0,
 249: 1.0,
 250: 1.0,
 251: 1.0,
 252: 1.0,
 253: 1.0,
 254: 1.0,
 255: 1.0,
 256: 1.0,
 257: 1.0,
 258: 1.0,
 259: 1.0,
 260: 1.0,
 261: 1.0,
 262: 1.0,
 263: 1.0,
 264: 1.0,
 265: 1.0,
 266: 1.0,
 267: 1.0,
 268: 1.0,
 269: 1.0,
 270: 1.0,
 271: 1.0,
 272: 1.0,
 273: 1.0,
 274: 1.0,
 275: 1.0,
 276: 1.0,
 277: 1.0,
 278: 1.0,
 279: 1.0,
 280: 1.0,
 281: 1.0,
 282: 1.0,
 283: 1.0,
 284: 1.0,
 285: 1.0,
 286: 1.0,
 287: 1.0,
 288: 1.0,
 289: 1.0,
 290: 1.0,
 291: 1.0,
 292: 1.0,
 293: 1.0,
 294: 1.0,
 295: 1.0,
 296: 1.0,
 297: 1.0,
 298: 1.0,
 299: 1.0,
 300: 1.0,
 301: 1.0,
 302: 1.0,
 303: 1.0,
 304: 1.0,
 305: 1.0,
 306: 1.0,
 307: 1.0,
 308: 1.0,

In [21]:
compute_avg_precision(GT_PATH, GT_PATH, 0.5, dropout_predictions, {"prob": 0.5})

{218: 1.0,
 219: 1.0,
 220: 1.0,
 221: 1.0,
 222: 0.0,
 223: 1.0,
 224: 1.0,
 225: 0.0,
 226: 0.0,
 227: 1.0,
 228: 1.0,
 229: 0.0,
 230: 0.0,
 231: 0.0,
 232: 0.0,
 233: 0.0,
 234: 1.0,
 235: 1.0,
 236: 1.0,
 237: 1.0,
 238: 0.0,
 239: 0.0,
 240: 1.0,
 241: 0.0,
 242: 1.0,
 243: 1.0,
 244: 1.0,
 245: 1.0,
 246: 0.0,
 247: 0.0,
 248: 0.0,
 249: 0.0,
 250: 1.0,
 251: 0.0,
 252: 0.0,
 253: 1.0,
 254: 1.0,
 255: 1.0,
 256: 0.0,
 257: 1.0,
 258: 1.0,
 259: 0.0,
 260: 0.0,
 261: 0.0,
 262: 0.0,
 263: 0.0,
 264: 0.0,
 265: 1.0,
 266: 0.0,
 267: 1.0,
 268: 1.0,
 269: 0.0,
 270: 0.0,
 271: 0.0,
 272: 1.0,
 273: 1.0,
 274: 1.0,
 275: 0.0,
 276: 1.0,
 277: 0.0,
 278: 1.0,
 279: 1.0,
 280: 0.0,
 281: 1.0,
 282: 0.0,
 283: 1.0,
 284: 1.0,
 285: 1.0,
 286: 0.0,
 287: 1.0,
 288: 0.0,
 289: 0.0,
 290: 0.0,
 291: 1.0,
 292: 1.0,
 293: 1.0,
 294: 1.0,
 295: 1.0,
 296: 1.0,
 297: 0.0,
 298: 1.0,
 299: 1.0,
 300: 0.0,
 301: 1.0,
 302: 0.0,
 303: 1.0,
 304: 1.0,
 305: 1.0,
 306: 0.0,
 307: 1.0,
 308: 0.0,

In [None]:
compute_avg_precision(GT_PATH, GT_PATH, 0.5, offset_predictions, {"offset": 50})

In [22]:
compute_avg_precision(GT_PATH, DET_RCNN_PATH, 0.5)

{1: 0.0,
 2: 0.0,
 3: 0.0,
 4: 0.0,
 5: 0.0,
 6: 0.0,
 7: 0.0,
 8: 0.0,
 9: 0.0,
 10: 0.0,
 11: 0.0,
 12: 0.0,
 13: 0.0,
 14: 0.0,
 15: 0.0,
 16: 0.0,
 17: 0.0,
 18: 0.0,
 19: 0.0,
 20: 0.0,
 21: 0.0,
 22: 0.0,
 23: 0.0,
 24: 0.0,
 25: 0.0,
 26: 0.0,
 27: 0.0,
 28: 0.0,
 29: 0.0,
 30: 0.0,
 31: 0.0,
 32: 0.0,
 33: 0.0,
 34: 0.0,
 35: 0.0,
 36: 0.0,
 37: 0.0,
 38: 0.0,
 39: 0.0,
 40: 0.0,
 41: 0.0,
 42: 0.0,
 43: 0.0,
 44: 0.0,
 45: 0.0,
 46: 0.0,
 47: 0.0,
 48: 0.0,
 49: 0.0,
 50: 0.0,
 51: 0.0,
 52: 0.0,
 53: 0.0,
 54: 0.0,
 55: 0.0,
 56: 0.0,
 57: 0.0,
 58: 0.0,
 59: 0.0,
 60: 0.0,
 61: 0.0,
 62: 0.0,
 63: 0.0,
 64: 0.0,
 65: 0.0,
 66: 0.0,
 67: 0.0,
 68: 0.0,
 69: 0.0,
 70: 0.0,
 71: 0.0,
 72: 0.0,
 73: 0.0,
 74: 0.0,
 75: 0.0,
 76: 0.0,
 77: 0.0,
 78: 0.0,
 79: 0.0,
 80: 0.0,
 81: 0.0,
 82: 0.0,
 83: 0.0,
 84: 0.0,
 85: 0.0,
 86: 0.0,
 87: 0.0,
 88: 0.0,
 89: 0.0,
 90: 0.0,
 91: 0.0,
 92: 0.0,
 93: 0.0,
 94: 0.0,
 95: 0.0,
 96: 0.0,
 97: 0.0,
 98: 0.0,
 99: 0.0,
 100: 0.0,
 101: 0.

In [None]:
compute_avg_precision(GT_FANCY_PATH, DET_RCNN_PATH, 0.5)