In [10]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
import sys
import os
import json
import torch
import yaml
import numpy as np
import cv2
import copy
from pathlib import Path
import matplotlib.pyplot as plt
from PIL import Image
from ultralytics import YOLO

In [12]:
DATA_PATH = "/workspace/plane-nerf/data/jackal_floor_evaluation_data"
MODEL_PATH = "/workspace/plane-nerf/data/jackal_floor_training_data_1/yolo_footprint/runs/segment/model_2024-03-25_12-42-34"

if not os.path.exists(DATA_PATH + "/masks"):
    os.makedirs(DATA_PATH + "/masks")

with open(f"{DATA_PATH}/ground_truth.json", "r") as f:
    transforms = json.load(f)

model = YOLO(MODEL_PATH+"/weights/best.pt")

In [13]:
new_frames = []

none_cnt = 0

for i in range(len(transforms["frames"])):
    f = transforms["frames"][i].copy()

    img_path = os.path.join(DATA_PATH, f["file_path"])
    img = Image.open(img_path)
    
    # Inference
    results = model([img_path])

    if (results[0].masks == None):
        none_cnt+=1
        #Save mask as single channel
        mask = np.zeros((img.size[1], img.size[0]), dtype=np.uint8)
        f["mask_path"] = f"masks/{i}.png"
        mask_path = os.path.join(DATA_PATH, f["mask_path"])
        mask = Image.fromarray(mask)
        mask.save(mask_path)

        new_frames.append(f)
        continue
    #Get Best mask
    best_mask = results[0].masks.xy[0]
    # Draw mask using coordinates in best_mask
    best_mask = np.array(best_mask)
    best_mask = best_mask.reshape(-1, 2).astype(np.int32)
    mask = np.zeros((img.size[1], img.size[0]), dtype=np.uint8)
    cv2.fillPoly(mask, [best_mask], 255)

    #Save mask as single channel
    f["mask_path"] = f"masks/{i}.png"
    mask_path = os.path.join(DATA_PATH, f["mask_path"])
    mask = Image.fromarray(mask)
    mask.save(mask_path)

    new_frames.append(f)
print(none_cnt)


0: 384x640 1 jackal, 16.3ms
Speed: 2.2ms preprocess, 16.3ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 22.2ms
Speed: 2.1ms preprocess, 22.2ms inference, 3.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 15.9ms
Speed: 2.4ms preprocess, 15.9ms inference, 3.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 15.6ms
Speed: 2.2ms preprocess, 15.6ms inference, 3.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 24.8ms
Speed: 2.3ms preprocess, 24.8ms inference, 2.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 21.2ms
Speed: 2.1ms preprocess, 21.2ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 24.9ms
Speed: 2.3ms preprocess, 24.9ms inference, 3.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 jackal, 18.4ms
Speed: 2.3ms preprocess, 18.4ms inference, 3.5ms postprocess per image at shape (1, 3, 38

14


In [14]:
transforms["frames"] = new_frames
with open(f"{DATA_PATH}/transforms.json", "w") as f:
    json.dump(transforms, f, indent=4)