In [None]:
import cv2
import matplotlib.pyplot as plt
import pandas as pd
import sys
from pathlib import Path
from IPython.display import clear_output
from datetime import datetime
import re
import random

sys.path.append("/home/psa_images/SemiF-AnnotationPipeline")
sys.path.append("/home/psa_images/SemiF-AnnotationPipeline/segment")
from utils.viz import (
    batch_df,
    review_data,
    plot_bboxes,
    inspect_masks,
    preview_cutout_results,
    validation_sample_df,
    save_original_full_res_images,
    filter_by_area,
)
from utils.utils import filter_and_select_dates
from semif_utils.segment_utils import load_speciesinfo

# Define directories and load data

In [None]:
########### Change these as necessary ###########
# dev_dir = "/home/psa_images/SemiF-AnnotationPipeline/data/semifield-developed-images"
# cutout_dir = "/home/psa_images/SemiF-AnnotationPipeline/data/semifield-cutouts"
dev_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-developed-images"
cutout_dir = "/mnt/research-projects/s/screberg/longterm_images/semifield-cutouts/"
#################################################


species_info_json = "/home/psa_images/SemiF-AnnotationPipeline/data/semifield-utils/species_information/species_info.json"
dates_list = sorted([x.stem for x in list(Path(cutout_dir).glob("*"))])


start_date_str = "2022-01-10"
end_date_str = "2024-10-10"
state_abbreviation = "TX"

TXbatch_ids = filter_and_select_dates(
    dates_list, start_date_str, end_date_str, "TX", num_dates="all"
)  # num_dates can be "all" or integer
MDbatch_ids = filter_and_select_dates(
    dates_list, start_date_str, end_date_str, "MD", num_dates="all"
)  # num_dates can be "all" or integer
NCbatch_ids = filter_and_select_dates(
    dates_list, start_date_str, end_date_str, "NC", num_dates="all"
)  # num_dates can be "all" or integer
batch_ids = TXbatch_ids + MDbatch_ids + NCbatch_ids
# batch_ids = batch_ids[3:6]
batch_ids

In [None]:
dfs = []
for batch_id in batch_ids:
    batch_dir = f"{dev_dir}/{batch_id}"
    ogdf = batch_df(batch_id, cutout_dir, batch_dir)
    dfs.append(ogdf)
df = pd.concat(dfs)
df = df.sample(frac=0.1, random_state=42).reset_index(drop=True)

In [None]:
for i in df.columns:
    print(i)

In [None]:
mdf = df.copy()

area_max = 100000
area_min = 5000
random_state = 41
samples_per_species = 3

mdf = mdf[mdf["common_name"] != "unknown"]
mdf = mdf[mdf["common_name"] != "colorchecker"]
# mdf = mdf[mdf["is_primary"] == True]
mdf = mdf[mdf["extends_border"] == False]
mdf = mdf[(mdf["area"] >= area_min) & (mdf["area"] <= area_max)]

dfs_sample = []
for species in mdf.common_name.unique():
    tempdf = mdf[mdf["common_name"] == species]

    tempdf = tempdf.sample(n=samples_per_species if len(tempdf) > 10 else len(tempdf))
    dfs_sample.append(tempdf)

sampledf = pd.concat(dfs_sample)
sampledf = sampledf.sample(frac=1, random_state=random_state).reset_index(drop=True)

for idx, row in sampledf.iterrows():

    cutpath = row["cutout_paths"]
    croppath = row["cutout_paths"].replace(".png", ".jpg")
    species = row["common_name"]
    area = row["area"]
    cropimg = cv2.cvtColor(cv2.imread(croppath), cv2.COLOR_BGR2RGB)
    cutimg = cv2.cvtColor(cv2.imread(cutpath), cv2.COLOR_BGR2RGB)

    fig, axs = plt.subplots(1, 2)  # 1 row, 2 columns, and optional figure size
    # Display first image
    axs[0].imshow(cropimg)
    axs[0].set_title(f"{species}")  # Optional title

    # Display second image
    axs[1].imshow(cutimg)
    axs[1].set_title(f"{area}")  # Optional title

    plt.imshow(cutimg)
    plt.show()

## Settings

In [None]:
########## General Settings ###########
save_root = "Validation_results_cutouts_presentation"
sample_sz = 10  # Must be between 10-50
show_plots = True
gen_figsize = (12, 8)
transparent_fc = True
save = False
clear_notebook_outputs = False

plot_full_res_images = False
plot_boxes = False
plot_masks = False
plot_cutouts = True

random_state = 41
dpi = 300

########## Save OG Images ###########
plot_full_res_images = plot_full_res_images
show_image_plots = False
save_images = save
image_fig_size = gen_figsize
save_image_location = save_root


########## Plot Bbox Settings ###########
figsize = gen_figsize
show_labels = True
bbox_transparent_fc = transparent_fc
bbox_save = save
bbox_show_plots = show_plots
bbox_dpi = dpi

######### Image mask Settings ########
figsize = gen_figsize
colorize_semantic_mask = True
mask_transparent_fc = transparent_fc
include_suptitles = True
mask_save = save
species_info = species_info_json
masks_show_plots = show_plots
mask_dpi = dpi

######### Inspect Cutout Settings ########
# Configs (change these)
extends_border = None  # True
is_primary = None  # True
green_sum_max = 10000000000000
green_sum_min = 100
# percentiles: 25, 50, 75 quartiles, mean, or None
area_min = None
area_max = 25
# should be between 0 and 1
solid_max = None
solid_min = None
# number of components
component_min = None
component_max = None

########### Cutout Figure settings ###########
figsize = (12, 8)
include_title = True
cutout_save = save
cutout_transparent_fc = transparent_fc
cutouts_show_plots = show_plots
cutout_dpi = dpi
######################################################

## Review

In [None]:
for batch_id in batch_ids:
    clear_output(wait=True) if clear_notebook_outputs else None
    batch_dir = f"{dev_dir}/{batch_id}"
    cutout_csv_path = Path(cutout_dir, batch_id, batch_id + ".csv")
    # Prep data
    ogdf = batch_df(batch_id, cutout_dir, batch_dir)
    df = validation_sample_df(
        ogdf, sample_sz=sample_sz, random_state=random_state, drop_img_duplicates=True
    )
    review_data(df, batch_id)
    save_location = f"{save_root}/{batch_id}"
    if plot_full_res_images:
        save_original_full_res_images(
            df,
            save_location=save_image_location,
            figsize=image_fig_size,
            show_plots=show_image_plots,
        )

    if plot_boxes:
        plot_bboxes(
            df,
            figsize=figsize,
            show_labels=show_labels,
            transparent_fc=bbox_transparent_fc,
            save=bbox_save,
            save_location=Path(save_location, "bboxes"),
            show_plots=bbox_show_plots,
            dpi=bbox_dpi,
        )
    if plot_masks:
        inspect_masks(
            df,
            figsize=figsize,
            colorize_semantic_mask=colorize_semantic_mask,
            transparent_fc=mask_transparent_fc,
            include_suptitles=include_suptitles,
            save=mask_save,
            save_location=Path(save_location, "masks"),
            species_info=species_info_json,
            show_plots=masks_show_plots,
            dpi=mask_dpi,
        )
    if plot_cutouts:
        preview_cutout_results(
            df,
            extends_border,
            is_primary,
            green_sum_max,
            green_sum_min,
            area_min,
            area_max,
            solid_min,
            solid_max,
            component_min,
            component_max,
            figsize=figsize,
            save=cutout_save,
            save_location=Path(save_location, "cutouts"),
            title=include_title,
            transparent_fc=cutout_transparent_fc,
            show_plots=cutouts_show_plots,
            dpi=cutout_dpi,
        )