In [None]:
import os
import subprocess
import pandas as pd
import json

import viame2coco.viame2coco as viame2coco
from pycocotools.coco import COCO
import cv2
# import matplotlib.pyplot as plt
import math


In [None]:
imagery_raw_bucket = "amlr-gliders-imagery-raw-dev"
imagery_raw_mt = "/home/sam_woodman_noaa_gov/amlr-gliders-imagery-raw-dev"

lib_bucket = "esd-image-library-dev"
lib_mt = "/home/sam_woodman_noaa_gov/esd-image-library-dev"

deployment = 'amlr08-20220513'
csv_base_path = "/home/sam_woodman_noaa_gov/jw-annotations/amlr08-20220513"

# Mount bucket(s)
os.makedirs(imagery_raw_mt, exist_ok=True)
cmd = ["gcsfuse", "--implicit-dirs", "-o", "ro", imagery_raw_bucket, imagery_raw_mt]
subprocess.run(cmd)

os.makedirs(lib_mt, exist_ok=True)
cmd = ["gcsfuse", "--implicit-dirs", lib_bucket, lib_mt]
subprocess.run(cmd)

## Convert VIAME CSV file to COCO format

In [None]:
# Read in CSV file(s)
os.listdir(csv_base_path)

# csv_path2 = os.path.join(csv_base_path, 'amlr08-20220513-dir0002-annotations-manual.csv')
csv_path2 = os.path.join(
    csv_base_path, 
    'amlr08-20220513-dir0001-frame111-frame759-annotations-manual.csv')

d2 = pd.read_csv(csv_path2)
display(d2)

display(d2.iloc[1:, 9].value_counts())

# pref = "/home/sam_woodman_noaa_gov/Dir0002"
# d2.loc[1:, '2: Video or Image Identifier'] = (
#     [f"{pref}/{i}" for i in d2['2: Video or Image Identifier'].values[1:]])
# d2.to_csv("tmp2.csv", index=False)

In [None]:
# viame2coco.viame2coco(csv_path2, "temporary description")
dir_path = os.path.join(
        f"/home/sam_woodman_noaa_gov/{imagery_raw_bucket}", 
        "SANDIEGO/2022/amlr08-20220513/images/Dir0001")

config = {
    'filename': 1,
    'label': 9, 
    'bbox_tlbr': {
        'tlx': 3,
        'tly': 4,
        'brx': 5,
        'bry': 6
    }
}

c2 = viame2coco.viame2coco(
    csv_path2, "temporary description", 
    filename_base=dir_path, viame_csv_config=config
)
j2_name = "../dir1-coco-pt2.json"
c2.to_json(j2_name)
display(c2)

In [None]:
# # Merge pt1 and pt2 of Dir0001 annotation files
# from pycocowriter import cocomerge
# from pycocowriter import coco
# import json
# import datetime

# j11_file = '/home/sam_woodman_noaa_gov/dir1-coco-pt1.json'
# j12_file = '/home/sam_woodman_noaa_gov/dir1-coco-pt2.json'

# with open(j11_file) as fin:
#     j11 = json.load(fin)
# with open(j12_file) as fin:
#     j12 = json.load(fin)

# print(j11["info"])
# info = coco.COCOInfo(
#     year=j11["info"]["year"], 
#     version=j11["info"]["version"], 
#     description=j11["info"]["description"], 
#     date_created=datetime.datetime.now(datetime.timezone.utc), 
# )

# j_dir1 = cocomerge.coco_merge(j11, j12, info=info)
# display(j_dir1)

# # with open('/home/sam_woodman_noaa_gov/dir1-coco.json', "w") as fp:
# #     json.dump(j_dir1 , fp)

### Explore COCO output

In [None]:
with open(j2_name) as fin:
    j2 = json.load(fin)

print(j2.keys())
[print(j2[i]) for i in j2.keys()]

j2_images = j2["images"]
print(j2_images)

j2_anno = j2["annotations"]
print(j2_anno)

# # print(j2["info"])
# # print(j2["licenses"])
# # print(j2["categories"])

j2_cat = j2["categories"]
j2_cat

## Extract regions from images
 
Using COCO output, based on this code: https://forum.image.sc/t/crop-image-and-annotations-to-bbox-coco-format/74520/4

COCO data format: https://cocodataset.org/#format-data

COCO API functions: top of https://github.com/ppwwyyxx/cocoapi/blob/master/PythonAPI/pycocotools/coco.py

In [None]:
### Sam exp - loop through categories and write regions to category folders

# roi_base = os.path.join(lib_mt, "esd-shadowgraph-library")
roi_base = os.path.join("/home/sam_woodman_noaa_gov", "Dir0001-pt2-out")

# Load COCO file for images+annotations+categories
annFile = j2_name
coco=COCO(annFile) #load via cocoapi
cats = coco.loadCats(coco.getCatIds()) #get all categorys in coco file

cats_names = [i["name"] for i in cats]
print(f"Categories: {cats_names}")

for cat in cats:
    # Get category name, and make directory
    cat_name = cat["name"]
    cat_base = os.path.join(roi_base, cat_name)
    os.makedirs(cat_base, exist_ok=True)
    
    catIds = coco.getCatIds(catNms=[cat_name]) #use to pull cat ID from cat
    imgIds = coco.getImgIds(catIds=catIds ) #get all images with the specified category above in

    print(f"For category {cat_name}, there are {len(imgIds)} images. " +
            f"These are being written to {cat_base}")
    for img_idx in imgIds:
        # Get and read in the current image
        img = coco.loadImgs(img_idx)[0]
        I = cv2.imread(os.path.join(dir_path, img['file_name']))
        # I

        # Get the current annotation(s)
        annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
        anns = coco.loadAnns(annIds)

        for i in range(len(anns)):
            ann = anns[i]
            crop_bbox = ann["bbox"]

            #make cropped image
            cropim = I[math.floor(crop_bbox[1]):math.ceil(crop_bbox[1] + crop_bbox[3]), 
                        math.floor(crop_bbox[0]):math.ceil(crop_bbox[0]+crop_bbox[2])]
            
            #save cropped image
            file_name_curr = os.path.splitext(img["file_name"])
            file_name = (
                f"{deployment}_{file_name_curr[0]}_crop-{(i+1):02}" + 
                file_name_curr[1])
            path_out = os.path.join(cat_base, file_name)
            cv2.imwrite(path_out, cropim)

In [None]:
# ### Sam exp - individual files
# q = 2

# # Get and read in the current image
# img = coco.loadImgs(imgIds[q])[0]
# I = cv2.imread(img['file_name'])
# I

# # Get the current annotation(s)
# annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
# anns = coco.loadAnns(annIds)

# crop_bbox = anns[0]["bbox"]
# #make cropped image
# cropim = I[math.floor(crop_bbox[1]):math.ceil(crop_bbox[1] + crop_bbox[3]), math.floor(crop_bbox[0]):math.ceil(crop_bbox[0]+crop_bbox[2])]
# #save cropped image
# cv2.imwrite(os.path.join("/home/sam_woodman_noaa_gov", os.path.basename(img["file_name"])), 
#             cropim)