## Interobserver Variability

This Notebook demonstrates how to compute the interobserver variability of your Atlas data.

In [None]:
import os
import sys
import gc
import re
import time

sys.path.append('../../..')

import pandas as pd

import SimpleITK as sitk

from loguru import logger

# Format the output a bit nicer for Jupyter
logger.remove()
logger.add(sys.stdout, format="{time:YYYY-MM-DD HH:mm:ss} {level} {message}", level="DEBUG")

data_path = './data'
working_path = "./working"
if not os.path.exists(working_path):
    os.makedirs(working_path)

# Read the data into a dictionary

data = {}

for root, dirs, files in os.walk(data_path, topdown=False):
    
    if root == data_path:
        continue
        
    case = root.split('/')[-1]
    data[case] = {}
    for f in files:
        file_path = os.path.join(root, f)
        
        name = f.split('.')[0].upper()
        
        # Clean up names with double underscore:
        name = name.replace('__','_')
        
        observer = None
        
        matches = re.findall(r"(.*)_([0-9])", f.split('.')[0])
        
        if len(matches) > 0:
            name = matches[0][0].upper()
            observer = matches[0][1]
        
        if observer: 
            if name in data[case]:
                data[case][name][observer] = file_path
            else:
                data[case][name] = {observer: file_path}
                
        else:
            data[case][name] = file_path



### Compute the interobserver variability for each case

In [None]:
df_inter_ob_var_file = os.path.join(working_path, "df_inter_ob_var.pkl")

# If already computed, read the data from a file
if os.path.exists(df_inter_ob_var_file):
    print(f'Reading from file: {df_inter_ob_var_file}')
    df_inter_ob_var = pd.read_pickle(df_inter_ob_var_file)
else:

    inter_observe_var = []

    for c in data:
        for s in data[c]:
            if not s.startswith('STRUCT_'):
                continue

            for o1 in data[c][s]:
                for o2 in data[c][s]:

                    if o1==o2:
                        continue

                    mask_1 = sitk.ReadImage(data[c][s][o1])
                    mask_2 = sitk.ReadImage(data[c][s][o2])

                    lomif = sitk.LabelOverlapMeasuresImageFilter()
                    lomif.Execute(mask_1, mask_2)

                    hdif = sitk.HausdorffDistanceImageFilter()
                    hdif.Execute(mask_1, mask_2)

                    dce = lomif.GetDiceCoefficient()
                    hmax = hdif.GetHausdorffDistance()
                    havg = hdif.GetAverageHausdorffDistance()

                    row = {'o1': o1, 
                           'o2': o2, 
                           'case': c, 
                           'struct': s, 
                           'dce': dce, 
                           'hausdorff_max': hmax, 
                           'hausdorff_avg': havg }

                    inter_observe_var.append(row)

    df_inter_ob_var = pd.DataFrame(inter_observe_var)
    print(f'Saving to file: {df_inter_ob_var_file}')
    df_inter_ob_var.to_pickle(df_inter_ob_var_file)

### Output the results

In [None]:
df_inter = df_inter_ob_var.groupby(['struct']).aggregate(['mean', 'std', 'min', 'max'])
df_inter = df_inter[['dce','hausdorff_max','hausdorff_avg']]
df_inter