In [None]:
import os
import re
import sys
import json
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv

sys.path.append(os.path.join("..", ".."))
from utils.s3_bucket import S3Bucket

In [None]:
load_dotenv(os.path.join("..", "..", "env"))

In [None]:
bucket_name = "ava-cv-raw-photo-bucket"
input_prefix = "temp/plants/"
output_prefix = "temp/leaves/"

In [None]:
bucket = S3Bucket(
    bucket_name=bucket_name,
    region_name=os.environ["REGION_NAME"]
)

In [None]:
annotations_key = os.path.join(input_prefix, "annotations.json")
annotations = bucket[annotations_key]
annotations = json.loads(annotations)

# Create dictionary which maps image IDs to its bounding boxes
image_id_to_bounding_boxes = {}
for image in annotations["images"]:
    image_id_to_bounding_boxes[image["id"]] = []

for annotation in annotations["annotations"]:
    image_id_to_bounding_boxes[annotation["image_id"]].append(annotation["bbox"])

In [None]:
# Save cropped images using bounding boxes
for image_data in tqdm(annotations["images"]):
    image_key = os.path.join(input_prefix, "images", image_data["file_name"])
    image = bucket[image_key]
    
    cropped_images = []
    image_np = np.array(image)
    for bbox in image_id_to_bounding_boxes[image_data["id"]]:
        cropped_image = image.copy().crop(bbox)
        cropped_images.append(cropped_image)

    for i, cropped_image in enumerate(cropped_images):
        new_key = re.sub(r"\.(jpe?g|png)", f"-{i}.jpg", image_key)
        new_key = new_key.replace(input_prefix, output_prefix)
        bucket[new_key] = cropped_image