# Imports

In [2]:
import pandas as pd
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import zipfile
from pathlib import Path
from scipy.ndimage import label as region_map
from helpers import *

# Select data, CSV files, and outputs locations

In [None]:
analyzed_csv = "./csv_files/data_overview.csv"  # Enter the location of the analyzed data csv file
csv_files_zip = './csv_files/emb_csv_files.zip' # enter the location of a zip containing all EMB.csv files for the data
labels_out_dir1 = "./labels_circle/"    # select what folder the circle labels will be stored at
labels_out_dir2 = "./labels_sp/"     # select what folder the signal processing labels will be stored at
Path(labels_out_dir1).mkdir(parents=True, exist_ok=True) # create folder if doesn't exist
Path(labels_out_dir2).mkdir(parents=True, exist_ok=True) # create folder if doesn't exist

# Extract data from CSV file

In [None]:
analysis_overview_df = pd.read_csv(analyzed_csv)

# Create circle masks

In [None]:
with zipfile.ZipFile(csv_files_zip) as csv_labels_folder:
    for index, row in analysis_overview_df.iterrows():
        emb_csv_filename = f"csv_files_emb_roi/{row.session}_{row.scan_nb}_{row.plant_name}_emb.csv"
        with csv_labels_folder.open(emb_csv_filename) as emb_file:
            emb_df = pd.read_csv(emb_file, skiprows=1, index_col="index")
            
            # create the mask for the embolism regions
            img_pathname = (analysis_overview_df.loc[(analysis_overview_df["plant_name"] == row.plant_name) & (analysis_overview_df["scan_nb"] == row.scan_nb) & (analysis_overview_df["session"] == row.session), "pathname"]).iloc[0]
            original_img = cv.imread(img_pathname, cv.IMREAD_UNCHANGED)
            img_array = np.array(original_img)
            height, width = img_array.shape
            png_layer = np.zeros((height, width, 4))
            pixel_size = row.voxel_size
            
            # iterate through all the embolism regions in the csv file
            for emb_index, emb_region in emb_df.iterrows():           
                
                x_emb = emb_region.BaryCenterX
                y_emb = emb_region.BaryCenterY
                emb_radius = emb_region.EqDiameter/2

                # All values are in mm, need to convert to px using voxel size (mm size of pixel)
                x_emb_px = int(x_emb/pixel_size)
                y_emb_px = int(y_emb/pixel_size)
                emb_radius_px = int(emb_radius/pixel_size)

                # Draw the ROI
                cv.circle(png_layer, center=(x_emb_px, y_emb_px), radius=emb_radius_px, color=(0,0,255,170), thickness=-1)
                
            # Save the layer
            cv.imwrite(labels_out_dir1+f'{row.session}_{row.scan_nb}_{row.plant_name}_{row.image_nb}.tif', png_layer)

# Create signal processing based masks

In [None]:
with zipfile.ZipFile(csv_files_zip) as csv_labels_folder:
    for index, row in analysis_overview_df.iterrows():
        emb_csv_filename = f"csv_files_emb_roi/{row.session}_{row.scan_nb}_{row.plant_name}_emb.csv"
        with csv_labels_folder.open(emb_csv_filename) as emb_file:
            emb_df = pd.read_csv(emb_file, skiprows=1, index_col="index")
            
            # create the mask for the embolism regions
            img_pathname = (analysis_overview_df.loc[(analysis_overview_df["plant_name"] == row.plant_name) & (analysis_overview_df["scan_nb"] == row.scan_nb) & (analysis_overview_df["session"] == row.session), "pathname"]).iloc[0]
            original_img = np.array(cv.imread(img_pathname, cv.IMREAD_UNCHANGED))
            height, width = img_array.shape
            pixel_size = row.voxel_size

            # signal processing 
            sigma = find_best_sigma(original_img,125) # find sigma
            img_dn = denoise_nl_means(original_img, patch_size=7, patch_distance=11, h=0.6*sigma, fast_mode=False, sigma=sigma) # denoise using nl means
            th = find_otsu(img_dn) # find optimal threshold value using Otsu's method
            _,img_th = cv.threshold(img_dn,th,img_dn.max(),cv.THRESH_BINARY_INV) # threshold image
            region_map, regions = region_map(img_th)
            label_img = np.zeros_like(original_img)
                        
            # iterate through all the embolism regions in the csv file
            for emb_index, emb_region in emb_df.iterrows():           
                
                x_emb = emb_region.BaryCenterX
                y_emb = emb_region.BaryCenterY

                # All values are in mm, need to convert to px using voxel size (mm size of pixel)
                x_emb_px = int(x_emb/pixel_size)
                y_emb_px = int(y_emb/pixel_size)

                # select region of current embolism and store it to output if not too large
                center_region = region_map[x_emb_px,y_emb_px]
                if np.sum(region_map==center_region)<=1000:
                    label_img[region_map==center_region] = 1
                
            # Save the layer
            cv.imwrite(labels_out_dir2+f'{row.session}_{row.scan_nb}_{row.plant_name}_{row.image_nb}.tif', label_img)