In [19]:
import pandas as pd
import sys
from pathlib import Path
from IPython.display import clear_output
from datetime import datetime
import re
import json
import os
import glob

sys.path.append("/home/psa_images/SemiF-AnnotationPipeline")
from utils.utils import filter_and_select_dates

In [238]:
dev_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-developed-images"
cutout_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-cutouts/"
species_info_json = "/home/psa_images/SemiF-AnnotationPipeline/data/semifield-utils/species_information/species_info.json"
dates_list = sorted([x.stem for x in list(Path(cutout_dir).glob("*"))])

start_date_str = "2023-02-01"
# start_date_str = "2023-02-03"
end_date_str = "2024-02-04"
state_abbreviation = "NC"
batch_ids = filter_and_select_dates(
    dates_list, start_date_str, end_date_str, state_abbreviation, num_dates="all"
)
# batch_ids = [batch_ids[0]]
# batch_ids = ['NC_2023-07-03'] #good example
# batch_ids = ['NC_2023-06-12']
# batch_ids = ['MD_2023-03-16']
batch_ids

['NC_2023-02-03',
 'NC_2023-02-06',
 'NC_2023-02-20',
 'NC_2023-02-22',
 'NC_2023-03-07',
 'NC_2023-06-12',
 'NC_2023-07-03',
 'NC_2023-07-10',
 'NC_2023-07-11']

In [233]:
import json
import os
import glob
import shutil  # for file copying


class JsonFileHandler:
    def __init__(self, filepath, backup_dir=None, corrected_dir=None):
        self.filepath = filepath
        self.backup_dir = backup_dir
        self.corrected_dir = corrected_dir
        self.data = self._load_json()

    def _load_json(self):
        with open(self.filepath, "r") as file:
            return json.load(file)

    def save_corrected_json(self, data):
        if self.backup_dir:
            self.backup_original()
        corrected_filepath = os.path.join(
            self.corrected_dir, os.path.basename(self.filepath)
        )
        with open(corrected_filepath, "w") as file:
            json.dump(data, file, indent=4)  # Using indent=4 for pretty printing

    def backup_original(self):
        backup_filepath = os.path.join(self.backup_dir, os.path.basename(self.filepath))
        shutil.copy2(self.filepath, backup_filepath)

    def get_data(self):
        return self.data

    def set_data(self, data):
        self.data = data


class BBoxChecker:
    def __init__(self, data, x_range=(0, 9560), y_range=(0, 6368)):
        # def __init__(self, data, x_range=(0, 1), y_range=(0, 1)):
        self.data = data
        self.x_range = x_range
        self.y_range = y_range

    def are_coordinates_normalized(self, x, y):
        return 0 <= x <= 1 and 0 <= y <= 1

    def check_and_correct_coordinates(self, cutouts=True):
        keys_of_interest = ["top_left", "top_right", "bottom_left", "bottom_right"]
        modified = False  # This will keep track if we've made any changes

        if cutouts:
            if "bbox" in data and "local_coordinates" in data["bbox"]:
                for key in keys_of_interest:
                    if key in data["bbox"]["local_coordinates"]:
                        x, y = data["bbox"]["local_coordinates"][key]
                        corrected_x = float(
                            min(max(self.x_range[0], x), self.x_range[1])
                        )
                        corrected_y = float(
                            min(max(self.y_range[0], y), self.y_range[1])
                        )
                        if not self.are_coordinates_normalized(
                            corrected_x, corrected_y
                        ):
                            print(
                                f"Coordinates {corrected_x}, {corrected_y} are not normalized!"
                            )
                        if corrected_x != x or corrected_y != y:
                            data["bbox"]["local_coordinates"][key] = (
                                corrected_x,
                                corrected_y,
                            )
                            modified = True

        else:
            for bbox in self.data.get("bboxes", []):
                # print("bbox", bbox)
                if "local_coordinates" in bbox:
                    for key in keys_of_interest:
                        if key in bbox["local_coordinates"]:
                            x, y = bbox["local_coordinates"][key]

                            corrected_x = float(
                                min(max(self.x_range[0], x), self.x_range[1])
                            )
                            corrected_y = float(
                                min(max(self.y_range[0], y), self.y_range[1])
                            )

                            # print(corrected_x, corrected_y)
                            if not self.are_coordinates_normalized(
                                corrected_x, corrected_y
                            ):
                                print(
                                    f"Coordinates {corrected_x}, {corrected_y} are not normalized!"
                                )
                            if corrected_x != x or corrected_y != y:
                                # print("x: ", x)
                                # print("y: ", y)
                                # print("corrected  x: ", corrected_x)
                                # print("corrected  y: ", corrected_y)
                                bbox["local_coordinates"][key] = (
                                    corrected_x,
                                    corrected_y,
                                )
                                modified = True

        return modified


# cutout_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-cutouts/"
dev_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-developed-images"
cutout_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-cutouts/"

backup_dir = "./backup_directory/"  # Set your backup directory path here
corrected_dir = "./corrected_directory/"  # Set your corrected files directory path here
Path(backup_dir).mkdir(parents=True, exist_ok=True)
Path(corrected_dir).mkdir(parents=True, exist_ok=True)

for batch_id in batch_ids:
    # For cutouts
    full_cutout_dir_path = os.path.join(cutout_dir, batch_id)
    cutout_json_files = glob.glob(os.path.join(full_cutout_dir_path, "*.json"))
    for json_file in cutout_json_files:
        with open(json_file, "r") as f:
            data = json.load(f)

        checker = BBoxChecker(data)
        if checker.check_and_correct_coordinates(cutouts=True):
            print(
                f"Coordinates in {json_file} (cutouts=True) for batch_id {batch_id} were not normalized and have been corrected!"
            )

    # For developed images
    full_dev_dir_path = os.path.join(dev_dir, batch_id, "metadata")
    dev_json_files = glob.glob(os.path.join(full_dev_dir_path, "*.json"))
    for json_file in dev_json_files:
        with open(json_file, "r") as f:
            data = json.load(f)

        checker = BBoxChecker(data)
        if checker.check_and_correct_coordinates(cutouts=False):
            print(
                f"Coordinates in {json_file} (cutouts=False) for batch_id {batch_id} were not normalized and have been corrected!"
            )

KeyboardInterrupt: 

In [240]:
import os
import json
import glob

cutout_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-cutouts/"
dev_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-developed-images"


def get_cutout_file_path(dev_filename, bbox_id, cutout_dir_path):
    # Modify this based on how your filenames correlate.
    return os.path.join(cutout_dir_path, f"{dev_filename}_{bbox_id}.json")


for batch_id in batch_ids:
    print(f"Processing batch: {batch_id}")
    full_dev_dir_path = os.path.join(dev_dir, batch_id, "metadata")
    full_cutout_dir_path = os.path.join(cutout_dir, batch_id)

    dev_json_files = glob.glob(os.path.join(full_dev_dir_path, "*.json"))
    second_break = True
    for dev_file in dev_json_files:
        if not second_break:
            continue
        with open(dev_file, "r") as f:
            dev_data = json.load(f)

        for bbox_id, bbox in enumerate(dev_data.get("bboxes", [])):
            dev_coords = bbox.get("local_coordinates", {})
            # print(dev_coords)
            corresponding_cutout_file = get_cutout_file_path(
                os.path.splitext(os.path.basename(dev_file))[0],
                bbox_id,
                full_cutout_dir_path,
            )

            if os.path.exists(corresponding_cutout_file):
                with open(corresponding_cutout_file, "r") as f:
                    cutout_data = json.load(f)
                cutout_bbox = cutout_data.get("bbox", {})
                # If cutout_bbox is an empty dictionary, skip this iteration
                if not cutout_bbox:
                    print(f"Warning: Empty bbox data in {corresponding_cutout_file}")
                    second_break = False
                    continue
                # print(cutout_bbox)

                # Check if bbox in cutout data is a list of 4 values
                if isinstance(cutout_bbox, list) and len(cutout_bbox) == 4:
                    y1, y2, x1, x2 = cutout_bbox
                    expected_coords = {
                        "top_left": (x1, y1),
                        "top_right": (x2, y1),
                        "bottom_left": (x1, y2),
                        "bottom_right": (x2, y2),
                    }
                    # print(dev_coords)

                    # Check if these expected coords match the dev_coords
                    if dev_coords == expected_coords:
                        cutout_data["bbox"] = {"local_coordinates": dev_coords}
                        print(cutout_data["bbox"])

                        # with open(corresponding_cutout_file, 'w') as f:
                        # json.dump(cutout_data, f, indent=4)

Processing batch: NC_2023-02-03
Processing batch: NC_2023-02-06
Processing batch: NC_2023-02-20


KeyboardInterrupt: 