# Debugging Jupyter Notebook for OCTolyzer's collating of files 
This notebook copies the step-by-step process of OCTolyzer's final step when batch-processing to collate the results of all files processed, and should provide the end-user with the means of debugging the pipeline by testing each step individually.

### Add OCTolyzer to system paths to permit access to analysis files

In [1]:
import sys
from importlib import reload
sys.path.append(r'../')

### Import necessary packages and libraries

In [2]:
import shutil
import sys
import time
import pandas as pd
import pprint
import numpy as np
from tqdm import tqdm
from pathlib import Path, PosixPath, WindowsPath
from PIL import ImageOps, Image
import matplotlib.pyplot as plt
from skimage import segmentation, morphology
import os
import sys

In [3]:
from octolyzer.measure.slo import slo_measurement
from octolyzer import analyse, collate_data, utils
from octolyzer.segment.octseg import choroidalyzer_inference, deepgpet_inference
from octolyzer.segment.sloseg import slo_inference, avo_inference, fov_inference
from octolyzer.measure.bscan.thickness_maps import grid

  from tqdm.autonotebook import tqdm


### Load in measurements for a specific `.vol` file

In [6]:
fname_path = '../demo/output_020225/Radial_1'
logging_list = []

In [7]:
msg = "Loading in measurements..."
print(msg)
logging_list.append(msg)
fname = os.path.split(fname_path)[-1]
output_fname = os.path.join(fname_path, f"{fname}_output.xlsx")

# Load metadata
meta_df = pd.read_excel(output_fname, sheet_name="metadata")
oct_dfs = []
slo_dfs = []

# Load in SLO measurements
if meta_df.bscan_type.iloc[0] == "Peripapillary":
    for r in ["B", "C", "whole"]:
        try:
            df = pd.read_excel(output_fname, sheet_name=f"slo_measurements_{r}")
        except:
            df = pd.DataFrame()
        slo_dfs.append(df)
else:
    try:
        df = pd.read_excel(output_fname, sheet_name=f"slo_measurements_whole")
    except:
        df = pd.DataFrame()
    slo_dfs.append(df)

# Load in OCT measurements
if meta_df.bscan_type.iloc[0] == "Ppole":
    keys = ['etdrs_measurements', 'etdrs_volume_measurements', 'square_measurements', 'square_volume_measurements']
else:
    keys = ['oct_measurements']
for key in keys:
    try:
        df = pd.read_excel(output_fname, sheet_name=key)
        oct_dfs.append(df)
    except:
        print(f'{key} does not exist. Skipping.')

msg = "Successfully loaded in all measurements!\n"
print(msg)
logging_list.append(msg)

Loading in measurements...
Successfully loaded in all measurements!



### Collate results

In [9]:
param_dict = {
    "save_individual_segmentations": 1,
    "save_individual_images": 1,
    "preprocess_bscans": 1,
    "analyse_choroid": 1,
    "analyse_slo": 1,
    "custom_maps": [], # this cannot be "0" like it is in config.txt - it is an empty list
    "analyse_all_maps": 1,
    "analyse_square_grid": 1,
    "choroid_measure_type": "perpendicular",
    "linescan_roi_distance": 3000
}

# flags for choroid analysis, preprocessing bscans
preprocess_data = param_dict["preprocess_bscans"]

# For saving out representative Bscan/SLO/segmentation masks
save_ind_segmentations = param_dict["save_individual_segmentations"]
save_ind_images = param_dict["save_individual_images"]

# Custom retinal thickness maps
custom_maps = param_dict["custom_maps"]
all_maps = param_dict["analyse_all_maps"]

# analysing choroid?
analyse_choroid = param_dict['analyse_choroid']

# square grid for Ppole
sq_grid_flag = param_dict['analyse_square_grid']

# analysing SLO?
analyse_slo_flag = param_dict['analyse_slo']

# User-specified measure type for choroid
chor_measure_type = param_dict['choroid_measure_type']

# User-specified ROI distance either side of fovea
macula_rum = param_dict['linescan_roi_distance']

In [10]:
save_directory = '../demo/output_020225'

In [11]:
vol_paths = sorted(Path('../demo/input').glob("*.vol"))
vol_paths

[PosixPath('../demo/input/Linescan_1.vol'),
 PosixPath('../demo/input/Peripapillary_1.vol'),
 PosixPath('../demo/input/Ppole_1.vol'),
 PosixPath('../demo/input/Radial_1.vol')]

In [12]:
oct_slo_result_dict = {}
for path in tqdm(vol_paths, desc='Analysing...', leave=False):
    
    # OS compatibility
    if isinstance(path, PosixPath):
        fname_type = str(path).split('/')[-1]
    elif isinstance(path, WindowsPath):
        fname_type = str(path).split("\\")[-1]
    oct_slo_result_dict[fname_type] = {}
    fname = fname_type.split(".")[0]
    
    fname_path = os.path.join(save_directory, fname)
    output_fname = os.path.join(fname_path, f"{fname}_output.xlsx")
    slo_mannotations = ( len(list(Path(fname_path).glob("*.nii.gz"))) - len(list(Path(fname_path).glob("*_used.nii.gz"))) )  > 0
    param_dict['manual_annotation'] = int(slo_mannotations)
    if os.path.exists(output_fname) and not slo_mannotations:
        print(f"Previously analysed {fname}.")
        ind_df, slo_dfs, oct_dfs, log = collate_data._load_files(fname_path, logging_list=[])
        oct_slo_result_dict[fname_type]['metadata'] = ind_df
        if analyse_slo_flag:
            oct_slo_result_dict[fname_type]['slo'] = slo_dfs
        else:
            oct_slo_result_dict[fname_type]['slo'] = [pd.DataFrame()]
        oct_slo_result_dict[fname_type]['oct'] = oct_dfs
        oct_slo_result_dict[fname_type]['log'] = log
            
    elif "_ANGIO" in fname:
        print(f"{fname} is an OCT-A scan. Skipping.\n\n")
        
    else:
        if robust_run:
            try:
                output = analyse.analyse(path, 
                                save_directory, 
                                choroidalyzer, 
                                slosegmenter, 
                                avosegmenter,
                                fovsegmenter,
                                deepgpet,
                                param_dict)
                slo_analysis_output, oct_analysis_output = output
                oct_slo_result_dict[fname_type]['metadata'] = oct_analysis_output[0]
                if slo_analysis_output is not None and analyse_slo_flag:
                    oct_slo_result_dict[fname_type]['slo'] = slo_analysis_output[1]
                else:
                    oct_slo_result_dict[fname_type]['slo'] = [pd.DataFrame()]
                oct_slo_result_dict[fname_type]['oct'] = oct_analysis_output[3]
                oct_slo_result_dict[fname_type]['log'] = oct_analysis_output[-1]
            except Exception as e:
                # print and log error
                user_fail = f"\nFailed to analyse {fname}."
                log = utils.print_error(e)
                logging_list = [user_fail] + log
                skip = "Skipping and moving to next file.\nCheck data input and/or set robust_run to 0 to debug code.\n"
                print(skip)

                # Try at least save out metadata from loading volfile for failed
                # file - making sure to mark in FAILED column
                try:
                    _, metadata, _, _, _ = utils.load_volfile(path, verbose=False)
                    metadata['FAILED'] = True
                    if metadata["bscan_type"] == 'Peripapillary':
                        del metadata['stxy_coord']

                # Catch any exceptions with failing to even load image and metadata from 
                # volfile
                except:
                    metadata = {'Filename':os.path.split(path)[1]}
                    fail_load = "Failed to even load path, check utils.load_volfile"
                    print(fail_load)
                    log.append(fail_load)

                oct_slo_result_dict[fname_type]['metadata'] = metadata
                oct_slo_result_dict[fname_type]['oct'] = logging_list[0]
                oct_slo_result_dict[fname_type]['log'] = logging_list
                
        else:
            output = analyse.analyse(path, 
                            save_directory, 
                            choroidalyzer, 
                            slosegmenter, 
                            avosegmenter,
                            fovsegmenter,
                            deepgpet,
                            param_dict)
            slo_analysis_output, oct_analysis_output = output
            oct_slo_result_dict[fname_type]['metadata'] = oct_analysis_output[0]
            if slo_analysis_output is not None and analyse_slo_flag:
                oct_slo_result_dict[fname_type]['slo'] = slo_analysis_output[1]
            else:
                oct_slo_result_dict[fname_type]['slo'] = [pd.DataFrame()]
            oct_slo_result_dict[fname_type]['oct'] = oct_analysis_output[3]
            oct_slo_result_dict[fname_type]['log'] = oct_analysis_output[-1]

Analysing...:  25%|█████████▊                             | 1/4 [00:00<00:00,  7.93it/s]

Previously analysed Linescan_1.
Loading in measurements...
Successfully loaded in all measurements!

Previously analysed Peripapillary_1.
Loading in measurements...
Successfully loaded in all measurements!

Previously analysed Ppole_1.
Loading in measurements...


                                                                                        

square_measurements does not exist. Skipping.
square_volume_measurements does not exist. Skipping.
Successfully loaded in all measurements!

Previously analysed Radial_1.
Loading in measurements...
Successfully loaded in all measurements!





In [13]:
result_dict = oct_slo_result_dict

In [14]:
analyse_slo = param_dict['analyse_slo']
MODULE_PATH = '../'

### Loop over loaded in results for every file and collate together into unified DataFrames

In [15]:
print(f"Collecting all results together into one output file.")
all_logging_list = []
all_slo_df = pd.DataFrame()
all_ind_df = pd.DataFrame()
all_oct_linescan_df = pd.DataFrame()
all_oct_ppole_df = pd.DataFrame()
all_oct_sq_ppole_df = pd.DataFrame()
all_oct_peri_df = pd.DataFrame()
all_oct_radial_df = pd.DataFrame()
for fname_type, output in result_dict.items():
    
    fname = fname_type.split(".")[0]
    all_logging_list.append(f"\n\nANALYSIS LOG OF {fname_type}")

    # Create default dataframe for failed individuals and save out error
    if isinstance(output['oct'], str):
        log = output['log']
        metadata = output['metadata']
        slo_dfs = pd.DataFrame()
        oct_dfs = pd.DataFrame()
        fname_path = os.path.join(save_directory, fname)
        all_logging_list.extend(log)    
        with open(os.path.join(fname_path, f"{fname}_log.txt"), "w") as f:
            for line in log:
                f.write(line+"\n")
        ind_df = pd.DataFrame(metadata, index=[0])
    
    # Otherwise, collate measurements and save out segmentations if specified
    else:    
        
        ind_df = output['metadata']
        bscan_type = ind_df.bscan_type.iloc[0]
        slo_dfs = output['slo']
        oct_dfs = output['oct']
        N_oct = len(oct_dfs)
        logging_list = output['log']

        # If cannot find measurements/etadata, create default dataframe and bypass
        # the segmentation visualisation
        all_logging_list.extend(logging_list)

        # process SLO measurement DFs and save out global dataframe
        flat_slo_df = pd.DataFrame()
        if len(slo_dfs[0]) > 0:
            rtypes = []
            for df in slo_dfs:
                if len(df) == 0:
                    continue
                # flatten
                dfarr = df.values.flatten()

                # Collect all columns in flattened DF
                dfcols = list(df.columns)
                rtype = df.zone.iloc[0]
                rtypes.append(rtype)
                vtypes = df.vessel_map.values
                dfcols = [col+f"_{vtype}_{rtype}" for vtype in vtypes for col in dfcols]
                df_flatten = pd.DataFrame(dfarr.reshape(1,-1), columns = dfcols, index=[0])

                # Remove indexing columns and concatenate with different zones
                cols_to_drop = df_flatten.columns[df_flatten.columns.str.contains("vessel_map|zone")]
                df_flatten = df_flatten.drop(cols_to_drop, axis=1, inplace=False)
                flat_slo_df = pd.concat([flat_slo_df, df_flatten], axis=1)    

            # Order feature columns by importance in literature
            order_str = ["fractal_dimension", "vessel_density", "tortuosity_density", 'average_global_calibre', 
                         'average_local_calibre', 'CRAE_Knudtson', 'CRVE_Knudtson', 'AVR']
            order_str_rv = [col+f"_{vtype}_{rtype}" 
                                for rtype in rtypes[::-1] 
                                    for vtype in vtypes for col in order_str]
            flat_slo_df = flat_slo_df[order_str_rv]
            flat_slo_df = flat_slo_df.loc[:, ~(flat_slo_df == -1).any()]
            flat_slo_df = flat_slo_df.rename({f"AVR_artery-vein_{rtype}":f"AVR_{rtype}" 
                                              for rtype in rtypes}, inplace=False, axis=1)

            # Concatenate measurements with metadata filename
            ind_slo_df = pd.concat([ind_df['Filename'], flat_slo_df], axis=1)

        # process OCT measurement DFs and save out global dataframe
        flat_oct_df = pd.DataFrame()
        flat_sq_oct_df = pd.DataFrame()

        # For Peripapillary, H-line/V-line and Radial scans
        if bscan_type != 'Ppole':
            for idx, df in enumerate(oct_dfs):
                
                # Remove choroid measurements if not analysing choroid
                if not analyse_choroid:
                    df = df[df['layer'] != 'CHORupper_CHORlower']

                # Collect all layers in flattened DF
                ltypes = df.layer.values
                
                # Flatten and collect all flattened columns through combining
                # layers, features and potentially scan number
                if bscan_type == "Peripapillary":
                    dfarr = df.values[:,1:].flatten()
                    dfcols = list(df.columns[1:])
                    dfcols = [f"{layer}_{col}" for layer in ltypes for col in dfcols]
                
                else:
                    dfarr = df.values[:,2:].flatten()
                    ltypes = df.layer.drop_duplicates().values
                    dfcols = list(df.columns[2:])
                    scan_idxs = df.scan_number.drop_duplicates().values
                    dfcols = [f"{layer}_{col}_{idx}" for layer in ltypes for idx in scan_idxs for col in dfcols]
                
                df_flatten = pd.DataFrame(dfarr.reshape(1,-1), columns = dfcols, index=[0])

                # Concatenate to flatten dF
                flat_oct_df = pd.concat([flat_oct_df, df_flatten], axis=1).dropna(axis=1)
    
        else:
            
            # Loop across measurement DataFrames for Ppole features
            for idx, df in enumerate(oct_dfs[:2]):

                # Remove choroid measurements if not analysing choroid
                if not analyse_choroid:
                    df = df[~df['map_name'].isin(['choroid_vessel', 'choroid_CVI', 'choroid'])]
                
                # flatten
                dfarr = df.values[:,2:].flatten()
            
                # Collect all columns in flattened DF. 
                dfcols = list(df.columns[2:])
                mtypes = df.map_name.values
                retinal_maps = [mtype for mtype in mtypes if "choroid" not in mtype]
                for key in ['retina', 'inner_retina', 'outer_retina']:
                    if key in retinal_maps:
                        retinal_maps.remove(key)
               
                # For thickness/area, then for volume
                if idx == 0:
                    dfcols = [col+f"_{mtype}_[um]" 
                                      if mtype not in ['choroid_CVI', 'choroid_vessel'] 
                                      else [col+f"_{mtype}", col+f"_{mtype}_[um2]"][mtype=='vessel'] 
                                for mtype in mtypes for col in dfcols]
                else:
                    dfcols = [col+f"_{mtype}_[mm3]" for mtype in mtypes for col in dfcols]

                df_flatten = pd.DataFrame(dfarr.reshape(1,-1), columns = dfcols, index=[0])

                # Concatenate to flatten dataframe
                flat_oct_df = pd.concat([flat_oct_df, df_flatten], axis=1).dropna(axis=1)

            # This is for Ppole scans where we have thickness and volume measurements
            if N_oct > 2:
                for idx, df in enumerate(oct_dfs[2:]):
                    # flatten
                    dfarr = df.values[:,2:].flatten()
                
                    # Collect all columns in flattened DF. 
                    dfcols = list(df.columns[2:])
                    mtypes = df.map_name.values
                    retinal_maps = [mtype for mtype in mtypes if "choroid" not in mtype]
                    for key in ['retina', 'inner_retina', 'outer_retina']:
                        if key in retinal_maps:
                            retinal_maps.remove(key)
                   
                    # For thickness/area, then for volume
                    if idx == 0:
                        dfcols = [col+f"_{mtype}_[um]" 
                                          if mtype not in ['choroid_CVI', 'choroid_vessel'] 
                                          else [col+f"_{mtype}", col+f"_{mtype}_[um2]"][mtype=='vessel'] 
                                    for mtype in mtypes for col in dfcols]
                    else:
                        dfcols = [col+f"_{mtype}_[mm3]" for mtype in mtypes for col in dfcols]

                    df_flatten = pd.DataFrame(dfarr.reshape(1,-1), columns = dfcols, index=[0])

                    # Concatenate to flatten dataframe
                    flat_sq_oct_df = pd.concat([flat_sq_oct_df, df_flatten], axis=1).dropna(axis=1)
                    
        # Concatenate measurements with metadata
        ind_oct_df = pd.concat([ind_df['Filename'], flat_oct_df], axis=1)
        if N_oct > 2:
            ind_sq_oct_df = pd.concat([ind_df['Filename'], flat_sq_oct_df], axis=1)
            
        # Concatenate to create global dataframe of SLO results
        if len(slo_dfs[0]) > 0:
            all_slo_df = pd.concat([all_slo_df, ind_slo_df], axis=0)

        # Append row to global dataframe of OCT results dependent on bscan type
        if bscan_type == 'Ppole':
            all_oct_ppole_df = pd.concat([all_oct_ppole_df, ind_oct_df], axis=0)
            if N_oct > 2:
                all_oct_sq_ppole_df = pd.concat([all_oct_sq_ppole_df, ind_sq_oct_df], axis=0)
        elif bscan_type == 'Peripapillary':
            all_oct_peri_df = pd.concat([all_oct_peri_df, ind_oct_df], axis=0)
        elif bscan_type == 'Radial':
            all_oct_radial_df = pd.concat([all_oct_radial_df, ind_oct_df], axis=0)
        else:
            all_oct_linescan_df = pd.concat([all_oct_linescan_df, ind_oct_df], axis=0)

    # Concenate metadata to global dataframe of metadata, robust to suppress NaN warnings
    # from different Bscan types having different columns
    if (len(all_ind_df)) > 0 & (len(ind_df) > 0):
        all_ind_df = pd.concat([all_ind_df.fillna("NA"), ind_df.fillna("NA")], axis=0)
    else:
        all_ind_df = ind_df.copy()

Collecting all results together into one output file.


### Save out collated results

In [18]:
# Reset index
all_ind_df = all_ind_df.reset_index(drop=True)
all_slo_df = all_slo_df.reset_index(drop=True)
all_oct_ppole_df = all_oct_ppole_df.reset_index(drop=True)
all_oct_sq_ppole_df = all_oct_sq_ppole_df.reset_index(drop=True)
all_oct_peri_df = all_oct_peri_df.reset_index(drop=True)
all_oct_linescan_df = all_oct_linescan_df.reset_index(drop=True)
all_oct_radial_df = all_oct_radial_df.reset_index(drop=True)

# Remove any rows in all_oct_linescan_df which are just -1s, i.e. fovea was not detected
all_oct_linescan_df = all_oct_linescan_df[~(all_oct_linescan_df.iloc[:, 1:]==-1).all(axis=1)]
all_oct_radial_df = all_oct_radial_df[~(all_oct_radial_df.iloc[:, 1:]==-1).all(axis=1)]

# save out global metadata and measurements
with pd.ExcelWriter(os.path.join(save_directory, f'analysis_output.xlsx')) as writer:
    
    # write metadata
    all_ind_df.to_excel(writer, sheet_name='metadata', index=False)

    # SLO
    if all_slo_df.shape[0] > 0:
        all_slo_df.to_excel(writer, sheet_name='SLO_measurements', index=False)
    else:
        if analyse_slo:
            print('WARNING: analyse_slo flag is 1, but there are no SLO measurements loaded!')

    # OCT measurements, save out sheets if populated
    if all_oct_linescan_df.shape[0] > 0:
        all_oct_linescan_df.to_excel(writer, sheet_name='OCT_Linescan_measurements', index=False)

    # Radial scan measurements
    if all_oct_radial_df.shape[0] > 0:
        all_oct_radial_df.to_excel(writer, sheet_name='OCT_Radial_measurements', index=False)

    # Ppole ETDRS measurements
    if all_oct_ppole_df.shape[0] > 0: 
        all_oct_ppole_df.to_excel(writer, sheet_name='OCT_Ppole_ETDRS_measurements', index=False)
        img_path = os.path.join(MODULE_PATH, os.path.join('figures','etdrs_posterior_pole_grid.png'))
        fname = os.path.split(img_path)[1]
        shutil.copy(img_path, os.path.join(save_directory, fname))

    # Ppole Posterior Pole Grid measurements
    if all_oct_sq_ppole_df.shape[0] > 0:
        all_oct_sq_ppole_df.to_excel(writer, sheet_name='OCT_Ppole_Square_measurements', index=False)
        img_path = os.path.join(MODULE_PATH, os.path.join('figures','square_posterior_pole_grid.png'))
        fname = os.path.split(img_path)[1]
        shutil.copy(img_path, os.path.join(save_directory, fname))

    # Peripapillary measurements
    if all_oct_peri_df.shape[0] > 0:
        all_oct_peri_df.to_excel(writer, sheet_name='OCT_Peripapillary_measurements', index=False)

    # Save out metadata key and descriptions
    utils.metakey_df.to_excel(writer, sheet_name='metadata_keys', index=False)

# save out log
with open(os.path.join(save_directory, f"analysis_log.txt"), "w") as f:
    for line in all_logging_list:
        f.write(line+"\n")