# Program 4 - Analysis

## Base Setup

This section contains the basic environment set up for this notebook, including imports, constants, and any variable that needs to be easily accessed for changing.

In [None]:
#Import modules
import cv2
import os
import shutil
import time
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from ipylab import JupyterFrontEnd

This is a set of constants used mainly for workspace setup.

`OUTPUT_DIR`: The folder to create inside the INPUT_DIR where output files from this analysis program will be saved.\
`INPUT_DIR`: The relative or absolute path to the base directory containing the outputs from a Program 4 run that needs to be further analyzed.\
`SAMPLE_DIR`: The relative or absolute path to the directory containing the sample image(s).\
`SAMPLE_IMAGE_DICT`: A dictionary containing keys, which are the complete names of the sample image files, and their corresponding values, which are the complete names of the annotation files associated with those sample images. Currently, used mainly for testing purposes. Should be identical to the one used in the main Program 4.\
`COLOR_PALETTE`: A list containing colors to be used when visually plotting the prediction heatmaps. Each class has its own color. Should be identical to the one used in the main Program 4.\
`COLOR_OVERLAY_TRANSLATOR`: A dictionary whose keys are the colors in `COLOR_PALETTE` and values are the colors converted into separate RGB values.\
`NOTEBOOK_NAME`: The exact name of this notebook, including the file extension. Needed later for programmatic html conversion and copying of the notebook.\
`APP`: JupyterFrontEnd instance that is used to save the notebook programmatically later.

In [None]:
#Create constants
OUTPUT_DIR = "Analysis/"
INPUT_DIR = "Output/Small Follicles/ResNet34/Run Final 1/"
SAMPLE_DIR = "../../Data/Original/"
SAMPLE_IMAGE_DICT = {
    "14736_UN_050a.ome.tif": "14736_UN_050a.annotations.txt",
    "16418_UN_140b.ome.tif": "16418_UN_140b.annotations.txt",
    "19006_UN_020a.ome.tif": "19006_UN_020a.annotations.txt",
    "21930_LT_060a.ome.tif": "21930_LT_060a_annotationsTable.txt",
    "21930_LT_120b.ome.tif": "21930_LT_120b_annotationsTable.txt",
    "25058_LT_005a.ome.tif": "25058_LT_005a.annotations.txt",
    "25065_LT_010a.ome.tif": "25065_LT_010a.annotations.txt",
    "25081_LT_010a.ome.tif": "25081_LT_010a.annotations.txt",
    "27570_UN_110a.ome.tif": "27570_UN_110a.annotations.txt",
    "30381_RT_070b.ome.tif": "30381_RT_070b.ome.annotationsTable.txt",
    "30381_RT_140b.ome.tif": "30381_RT_140b.ome.annotationsTable.txt",
    "30381_RT_200c.ome.tif": "30381_RT_200c.ome.annotationsTable.txt",
    "32002_RT_050a.ome.tif": "32002_RT_050a.ome.annotationsTable.txt",
    "32002_RT_110b.ome.tif": "32002_RT_110b.ome.annotationsTable.txt",
    "32002_RT_160c.ome.tif": "32002_RT_160c.ome.annotationsTable.txt",
    "33564_RT_060a.ome.tif": "33564_RT_060a.ome.annotationsTable.txt",
    "33564_RT_120b.ome.tif": "33564_RT_120b.ome.annotationsTable.txt",
    "33564_RT_180b.ome.tif": "33564_RT_180b.ome.annotationsTable.txt",
    "DP28_25081_Section3_10X_ome_copy.tif": "DP28_25081_Section3_10X_ome_copy.annotations.txt",
    "32002_LT_180a.ome.tif": "32002_LT_180a.ome.annotationsTable.txt",
    "KY_PS_LB40SDwk16601_7_a.ome.tif": "LB40_SDwk16601_7a_annotationsTable.txt"
}
COLOR_PALETTE = ['white', 'red', 'gold', 'blue', 'green', 'darkviolet', 'dimgray']
COLOR_OVERLAY_TRANSLATOR = {
    "white": [255, 255, 255],
    "red": [255, 0, 0],
    "gold": [255, 215, 0],
    "blue": [0, 0, 255],
    "green": [0, 128, 0],
    "darkviolet": [148, 0, 211],
    "dimgray": [105, 105, 105]
}
NOTEBOOK_NAME = "Program 4 - Analysis Final v2.ipynb"
APP = JupyterFrontEnd()

These are variables and flags for functions that get used later.

`multiple_epochs`: Flag to determine whether to run the analysis on every prediction for every epoch or just every prediction made. Should be identical to the value used in the run of Program 4 being analyzed.\
`save_figs`: A flag to determine whether this program will save any graphs/images it generates.\
`window_size`: The size of one side of the square window that was used for predictions. <b>Note:</b> Should be the same number used in Program 4 to make predictions.\
`window_radius`: The window radius to use when making predictions. Half of the `window_size`.

In [None]:
#Create variables
multiple_epochs = True

save_figs = True

window_size = 200
window_radius = int(window_size / 2)

### Function Definitions

This section contains all the Functions used by this notebook.

`make_output_dir`: Creates the directory specified by the parameter `directory`.

<b>Parameters:</b>\
&emsp;`directory`: The directory path to create.

First, checks if `directory` already exists. If it does, nothing happens. If it doesn't creates `directory`.

In [None]:
def make_output_dir(directory):
    '''Check if the directory specified by OUTPUT_DIR exists inside INPUT_DIR. Create directory if it does not exist.'''
    #Create base output directory if it does not exist
    if not os.path.exists(directory):
        os.mkdir(directory)

`check_coord_bounds`: Checks if the coordinates contained in a dataframe are within certain bounds.

<b>Parameters:</b>\
&emsp;`df`: The dataframe containing coordinates.\
&emsp;`x`: A range object of the x bound.\
&emsp;`y`: A range object of the y bound.\
&emsp;`row_num`: The row number of the coordinates in the dataframe that are currently being looked at.

<b>Returns:</b>\
&emsp;A boolean of whether the coordinates in the desired row of the dataframe are within the x, y bounds.

In [None]:
def check_coord_bounds(df, x, y, row_num):
    '''Checks whether coordinates in a given row of a given dataframe are within the given x, y bounds.'''
    return x[0] <= df['Centroid Y px'][row_num] <= x[-1] and y[0] <= df['Centroid X px'][row_num] <= y[-1]

`get_coords`: Gets the modified coordinates contained in a dataframe as a list.

<b>Parameters:</b>\
&emsp;`df`: The dataframe containing coordinates.\
&emsp;`x`: A range object of the x bound.\
&emsp;`y`: A range object of the y bound.\
&emsp;`row_num`: The row number of the coordinates in the dataframe that are currently being looked at.

<b>Returns:</b>\
&emsp;A list containing the coordinates in a row of the dataframe that have been modified to fit in the x and y range bounds where the beginning of those bounds are the new 0, 0.

In [None]:
def get_coords(df, x, y, row_num):
    '''Returns a list of coords from a given row of a given dataframe with the given x, y bounds subtracted to fit on a plot.'''
    return list(df[['Centroid X px', 'Centroid Y px']].iloc[row_num].to_numpy() - np.array([y[0], x[0]]))

## Main Code

This section contains the main code of the program that further analyses the predictions made in the main Program 4.

If the `multiple_epochs` flag is set to False, loop through the input directory, skipping anything that is not a directory. Then, loop through each directory from the first loop. Get the image that the directory from the first loop belongs to. Create an ouput directory inside the directory from the second loop. Load in the base image from `SAMPLE_DIR` and create the same whole image slice from the main Program 4 (Note: Smaller image slices are not currently supported by this program.) Also loads in the annotations for the sample image and converts them to a dictionary of annotation coordinates separated by follicle class. Next, load in the overlay image and add markers for every human annotation onto it. If `save_figs` is True, save the newly marked overlay image. Then, load in the predictions dataframe and convert it into a colored overlay matching the `COLOR_PALETTE`. Put this overlay onto the original image. If `save_figs` is True, save the newly created colored overlay image. Finally, add the human annotation markers to the colored overlay image. If `save_figs` is True, save the newly created annotated color overlay image. Save the notebook and copy it to the output directory.

If the `multiple_epochs` flag is set to True, loop through the input directory, skipping everything that is not a directory. Then, loop through each epoch directory from the first loop. Next, loop through each image directory from the second loop. Then, loop through each directory inside the directory from the second loop. Get the image that the directory from the second loop belongs to. Create an ouput directory inside the directory from the third loop. Load in the base image from `SAMPLE_DIR` and create the same whole image slice from the main Program 4 (Note: Smaller image slices are not currently supported by this program.) Also loads in the annotations for the sample image and converts them to a dictionary of annotation coordinates separated by follicle class. Next, load in the overlay image and add markers for every human annotation onto it. If `save_figs` is True, save the newly marked overlay image. Then, load in the predictions dataframe and convert it into a colored overlay matching the `COLOR_PALETTE`. Put this overlay onto the original image. If `save_figs` is True, save the newly created colored overlay image. Finally, add the human annotation markers to the colored overlay image. If `save_figs` is True, save the newly created annotated color overlay image. Save the notebook and copy it to the output directory.

In [None]:
if not multiple_epochs: #If only one epoch was used in the main program 4
    #Loop through each image prediciton directory
    for folder in sorted(os.listdir(INPUT_DIR)):
        if os.path.isfile(INPUT_DIR + folder): #If the current item in the input directory is not a folder
            continue
    
        #Loop through each time-stamped directory inside the image prediction directory
        for subfolder in sorted(os.listdir(INPUT_DIR + folder + "/")):
            #Find the key in SAMPLE_IMAGE_DICT that matches up to the current image prediction directory
            for key in SAMPLE_IMAGE_DICT.keys():
                if folder in key:
                    break
    
            print("--- {} ---".format(key))
    
            directory = INPUT_DIR + folder + "/" + subfolder + "/"
            resolution = 0.1725 if key == "DP28_25081_Section3_10X_ome_copy.tif" else 0.69
    
            #Make the output directory
            make_output_dir(directory + OUTPUT_DIR)
            
            #Load in the image and create a full image slice
            image = cv2.cvtColor(cv2.imread(SAMPLE_DIR + key), cv2.COLOR_BGR2RGB)
            
            row_slice = range(window_radius, image.shape[0] - window_radius)
            col_slice = range(window_radius, image.shape[1] - window_radius)
    
            image = image[row_slice[0]:row_slice[-1] + 1, col_slice[0]:col_slice[-1] + 1, :]
    
            #Load in the annotations and modify as needed
            annotations = pd.read_csv(SAMPLE_DIR + SAMPLE_IMAGE_DICT[key], sep = "\t")
    
            annotations[["Centroid X px", "Centroid Y px"]] = annotations[['Centroid X µm', 'Centroid Y µm']] / resolution
            annotations = annotations[['Name', 'Centroid X px', 'Centroid Y px']]
            annotations.dropna(axis = 0, inplace = True, ignore_index = True)
    
            #Convert annotations to a coordinate dictionary of annotations
            annot_coords = {
                "Primordial": [[], COLOR_PALETTE[1]],
                "Transitional Primordial": [[], COLOR_PALETTE[2]],
                "Primary": [[], COLOR_PALETTE[3]],
                "Transitional Primary": [[], COLOR_PALETTE[4]],
                "Secondary": [[], COLOR_PALETTE[5]],
                "Multilayer": [[], COLOR_PALETTE[6]]
            }
    
            for i in range(len(annotations)):
                annot_class = annotations['Name'][i]
    
                match annot_class:
                    case 'Primordial':
                        if check_coord_bounds(annotations, row_slice, col_slice, i):
                            annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                    case 'Transitional Primordial':
                        if check_coord_bounds(annotations, row_slice, col_slice, i):
                            annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                    case 'Primary':
                        if check_coord_bounds(annotations, row_slice, col_slice, i):
                            annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                    case 'Transitional Primary':
                        if check_coord_bounds(annotations, row_slice, col_slice, i):
                            annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                    case 'Secondary':
                        if check_coord_bounds(annotations, row_slice, col_slice, i):
                            annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                    case 'Multilayer':
                        if check_coord_bounds(annotations, row_slice, col_slice, i):
                            annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
    
            #Load the overlay image
            overlay = cv2.cvtColor(cv2.imread(directory + "Overlay.png"), cv2.COLOR_BGR2RGB)
    
            #Draw the annotations onto the overlay image
            for coords, color in annot_coords.values():
                if len(coords) == 0:
                    continue
    
                for x, y in coords:
                    cv2.drawMarker(overlay, (int(x), int(y)), COLOR_OVERLAY_TRANSLATOR[color], markerType = cv2.MARKER_TILTED_CROSS, thickness = 5, markerSize = int((1 / image.shape[0]) * 500000))
    
            #Save the overlay image
            if save_figs:
                cv2.imwrite(directory + OUTPUT_DIR + "Annotated_Overlay.png", cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
    
                print("    - Saved Annotated_Overlay.png")
    
            #Load the predictions dataframe
            predictions = pd.read_parquet(directory + "Predictions.parquet")
    
            #Convert the predictions dataframe to a colored overlay
            def colored_overlay(value):
                match value:
                    case 0.0:
                        return COLOR_OVERLAY_TRANSLATOR['white']
                    case 1.0:
                        return COLOR_OVERLAY_TRANSLATOR['red']
                    case 2.0:
                        return COLOR_OVERLAY_TRANSLATOR['gold']
                    case 3.0:
                        return COLOR_OVERLAY_TRANSLATOR['blue']
                    case 4.0:
                        return COLOR_OVERLAY_TRANSLATOR['green']
                    case 5.0:
                        return COLOR_OVERLAY_TRANSLATOR['darkviolet']
                    case 6.0:
                        return COLOR_OVERLAY_TRANSLATOR['dimgray']
    
            predictions = np.asarray(predictions.map(colored_overlay).values.tolist(), dtype = np.uint8)
    
            #Put the colored overlay onto the original image and save
            overlay_with_color = cv2.addWeighted(image, 0.7, predictions, 0.3, 0.0)

            if save_figs:
                cv2.imwrite(directory + OUTPUT_DIR + "Colored_Overlay.png", cv2.cvtColor(overlay_with_color, cv2.COLOR_RGB2BGR))
    
                print("    - Saved Colored_Overlay.png")
    
            #Draw the annotations onto the colored overlay image
            for coords, color in annot_coords.values():
                if len(coords) == 0:
                    continue
    
                for x, y in coords:
                    cv2.drawMarker(overlay_with_color, (int(x), int(y)), COLOR_OVERLAY_TRANSLATOR[color], markerType = cv2.MARKER_TILTED_CROSS, thickness = 5, markerSize = int((1 / image.shape[0]) * 500000))
    
            #Save the annotated and colored overlay image
            if save_figs:
                cv2.imwrite(directory + OUTPUT_DIR + "Annotated_Colored_Overlay.png", cv2.cvtColor(overlay_with_color, cv2.COLOR_RGB2BGR))
    
                print("    - Saved Annotated_Colored_Overlay.png")
    
            #Save the notebook
            APP.commands.execute("docmanager:save")
    
            #Copy the notebook to the output directory
            shutil.copy2(NOTEBOOK_NAME, directory + OUTPUT_DIR)
    
            print("    - Saved and copied {}".format(NOTEBOOK_NAME))
        print()
    print("Done")
elif multiple_epochs: #If multiple epochs were used in the main program 4
    #Loop through each epoch directory
    for epoch_dir in os.listdir(INPUT_DIR):
        if os.path.isfile(INPUT_DIR + epoch_dir): #If the current item in the input directory is not a folder
            continue

        #Loop through each image prediciton directory
        for folder in os.listdir(INPUT_DIR + epoch_dir + "/"):
            if os.path.isfile(INPUT_DIR + epoch_dir + "/" + folder): #If the current item in the epoch directory is not a folder
                continue
        
            #Loop through each time-stamped directory inside the image prediction directory
            for subfolder in os.listdir(INPUT_DIR + epoch_dir + "/" + folder + "/"):
                #Find the key in SAMPLE_IMAGE_DICT that matches up to the current image prediction directory
                for key in SAMPLE_IMAGE_DICT.keys():
                    if folder in key:
                        break
        
                print("--- {} - {} ---".format(epoch_dir, key))
        
                directory = INPUT_DIR + epoch_dir + "/" + folder + "/" + subfolder + "/"
                resolution = 0.1725 if key == "DP28_25081_Section3_10X_ome_copy.tif" else 0.69
        
                #Make the output directory
                make_output_dir(directory + OUTPUT_DIR)
                
                #Load in the image and create a full image slice
                image = cv2.cvtColor(cv2.imread(SAMPLE_DIR + key), cv2.COLOR_BGR2RGB)
                
                row_slice = range(window_radius, image.shape[0] - window_radius)
                col_slice = range(window_radius, image.shape[1] - window_radius)
        
                image = image[row_slice[0]:row_slice[-1] + 1, col_slice[0]:col_slice[-1] + 1, :]
        
                #Load in the annotations and modify as needed
                annotations = pd.read_csv(SAMPLE_DIR + SAMPLE_IMAGE_DICT[key], sep = "\t")
        
                annotations[["Centroid X px", "Centroid Y px"]] = annotations[['Centroid X µm', 'Centroid Y µm']] / resolution
                annotations = annotations[['Name', 'Centroid X px', 'Centroid Y px']]
                annotations.dropna(axis = 0, inplace = True, ignore_index = True)
        
                #Convert annotations to a coordinate dictionary of annotations
                annot_coords = {
                    "Primordial": [[], COLOR_PALETTE[1]],
                    "Transitional Primordial": [[], COLOR_PALETTE[2]],
                    "Primary": [[], COLOR_PALETTE[3]],
                    "Transitional Primary": [[], COLOR_PALETTE[4]],
                    "Secondary": [[], COLOR_PALETTE[5]],
                    "Multilayer": [[], COLOR_PALETTE[6]]
                }
        
                for i in range(len(annotations)):
                    annot_class = annotations['Name'][i]
        
                    match annot_class:
                        case 'Primordial':
                            if check_coord_bounds(annotations, row_slice, col_slice, i):
                                annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                        case 'Transitional Primordial':
                            if check_coord_bounds(annotations, row_slice, col_slice, i):
                                annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                        case 'Primary':
                            if check_coord_bounds(annotations, row_slice, col_slice, i):
                                annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                        case 'Transitional Primary':
                            if check_coord_bounds(annotations, row_slice, col_slice, i):
                                annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                        case 'Secondary':
                            if check_coord_bounds(annotations, row_slice, col_slice, i):
                                annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
                        case 'Multilayer':
                            if check_coord_bounds(annotations, row_slice, col_slice, i):
                                annot_coords[annot_class][0].append(get_coords(annotations, row_slice, col_slice, i))
        
                #Load the overlay image
                overlay = cv2.cvtColor(cv2.imread(directory + "Overlay.png"), cv2.COLOR_BGR2RGB)
        
                #Draw the annotations onto the overlay image
                for coords, color in annot_coords.values():
                    if len(coords) == 0:
                        continue
        
                    for x, y in coords:
                        cv2.drawMarker(overlay, (int(x), int(y)), COLOR_OVERLAY_TRANSLATOR[color], markerType = cv2.MARKER_TILTED_CROSS, thickness = 5, markerSize = int((1 / image.shape[0]) * 500000))
        
                #Save the overlay image
                if save_figs:
                    cv2.imwrite(directory + OUTPUT_DIR + "Annotated_Overlay.png", cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
        
                    print("    - Saved Annotated_Overlay.png")
        
                #Load the predictions dataframe
                predictions = pd.read_parquet(directory + "Predictions.parquet")
        
                #Convert the predictions dataframe to a colored overlay
                def colored_overlay(value):
                    match value:
                        case 0.0:
                            return COLOR_OVERLAY_TRANSLATOR['white']
                        case 1.0:
                            return COLOR_OVERLAY_TRANSLATOR['red']
                        case 2.0:
                            return COLOR_OVERLAY_TRANSLATOR['gold']
                        case 3.0:
                            return COLOR_OVERLAY_TRANSLATOR['blue']
                        case 4.0:
                            return COLOR_OVERLAY_TRANSLATOR['green']
                        case 5.0:
                            return COLOR_OVERLAY_TRANSLATOR['darkviolet']
                        case 6.0:
                            return COLOR_OVERLAY_TRANSLATOR['dimgray']
        
                predictions = np.asarray(predictions.map(colored_overlay).values.tolist(), dtype = np.uint8)
        
                #Put the colored overlay onto the original image and save
                overlay_with_color = cv2.addWeighted(image, 0.7, predictions, 0.3, 0.0)

                if save_figs:
                    cv2.imwrite(directory + OUTPUT_DIR + "Colored_Overlay.png", cv2.cvtColor(overlay_with_color, cv2.COLOR_RGB2BGR))
            
                    print("    - Saved Colored_Overlay.png")
        
                #Draw the annotations onto the colored overlay image
                for coords, color in annot_coords.values():
                    if len(coords) == 0:
                        continue
        
                    for x, y in coords:
                        cv2.drawMarker(overlay_with_color, (int(x), int(y)), COLOR_OVERLAY_TRANSLATOR[color], markerType = cv2.MARKER_TILTED_CROSS, thickness = 5, markerSize = int((1 / image.shape[0]) * 500000))
        
                #Save the annotated and colored overlay image
                if save_figs:
                    cv2.imwrite(directory + OUTPUT_DIR + "Annotated_Colored_Overlay.png", cv2.cvtColor(overlay_with_color, cv2.COLOR_RGB2BGR))
        
                    print("    - Saved Annotated_Colored_Overlay.png")
        
                #Save the notebook
                APP.commands.execute("docmanager:save")
        
                #Copy the notebook to the output directory
                shutil.copy2(NOTEBOOK_NAME, directory + OUTPUT_DIR)
        
                print("    - Saved and copied {}".format(NOTEBOOK_NAME))
            print()
    print("Done")

### Convert Notebook to HTML and Move to Output Directory / Clean Up Working Directory

Programmatically save the notebook, convert it to html and move the html file to the base input directory.

In [None]:
#Needed for the next command to work, for some reason
time.sleep(1)

#Programmatically save the notebook
APP.commands.execute("docmanager:save")

#Convert the notebook to html
!jupyter nbconvert --to html "$NOTEBOOK_NAME"

#Move the html file to the input directory
shutil.move(NOTEBOOK_NAME[:-6] + ".html", INPUT_DIR + NOTEBOOK_NAME[:-6] + "_" + datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".html")