# Evaluation of models
Notebook to evaluate analytically models. 

It parses the evaluation run log for every model and extracts some basic numbers.
It then computes evaluation metrics

In [17]:
import pandas as pd
import re
import ast
import numpy as np
import sys

data_dir = "../../data"

In [18]:
'''
Parse log file from evaluation on ClearML to obtain metrics calculated during run
'''
def evaluate_detection(file_path, src):
    # Initialize lists to store the extracted data
    classification_data = []
    counts_data = []
    segmentation_data = []
    iteration_data = []

    current_data = ''
    processing = False
    iteration = 0

    # Function to preprocess and parse the string as a dictionary
    def parse_data(string):
        # Remove trailing commas in the dictionary string
        string = re.sub(r',\s*}', '}', string)
        string = re.sub(r',\s*\]', ']', string)
        return ast.literal_eval(string)

    # Open and read the file
    with open(file_path, 'r') as file:
        for line in file:
            if "'classification':" in line:
                processing = True
                iteration += 1
                current_data = line.strip()
            elif processing:
                current_data += ' ' + line.strip()
                # Check if the line ends with '}}' and not followed by a comma
                if re.search(r'}\s*}\s*$', line.strip()):
                    processing = False
                    try:
                        data_dict = parse_data(current_data)
                        iteration_data.append(iteration)
                        classification_data.append(data_dict.get('classification', {}))
                        counts_dict = data_dict.get('counts', {})
                        counts_data.append({k: v[1] for k, v in counts_dict.items()})
                        segmentation_data.append(data_dict.get('segmentation', {}))
                    except Exception as e:
                        print(f"Error parsing data: {e}")
                        print("Faulty data:", current_data)

    # Create separate pandas DataFrames
    df_classification = pd.DataFrame(classification_data, index=iteration_data)
    df_counts = pd.DataFrame(counts_data, index=iteration_data)
    df_segmentation = pd.DataFrame(segmentation_data, index=iteration_data)
    
    df_counts['src'] = src
    df_classification['src'] = src
    df_segmentation['src'] = src

    # Calculating TP, FP, FN
    df_counts['TP'] = df_counts['predicted_overlapping_counts']
    df_counts['FP'] = df_counts['predicted_counts'] - df_counts['predicted_overlapping_counts']
    df_counts['FN'] = df_counts['true_counts'] - df_counts['predicted_overlapping_counts']
    
    # Cleaning
    df_segmentation.replace('nan', np.nan, inplace=True)
    df_classification.columns = ['back', 'cmb', 'src']

    return df_classification, df_counts, df_segmentation

In [19]:
file_path = f'{data_dir}/eval_kdd_all.txt'
df_classification1, df_counts1, df_segmentation1 = evaluate_detection(file_path, src="KDD_all")


In [20]:
df_classification1.head()

Unnamed: 0,back,cmb,src
1,TN,TP,KDD_all
2,TN,TP,KDD_all
3,TN,TP,KDD_all
4,TN,TP,KDD_all


In [21]:
df_counts1.head()

Unnamed: 0,predicted_counts,predicted_overlapping_counts,true_counts,src,TP,FP,FN
1,26,9,11,KDD_all,9,17,2
2,31,5,18,KDD_all,5,26,13
3,37,10,13,KDD_all,10,27,3
4,14,5,6,KDD_all,5,9,1


In [22]:
df_segmentation1.head()

Unnamed: 0,f1_0,f1_1,f1_avg,precision_0,precision_1,precision_avg,recall_0,recall_1,recall_avg,specificity_0,specificity_1,specificity_avg,src
1,0.999945,0.43728,0.43728,0.999951,0.410944,0.410944,0.999939,0.467223,0.467223,0.467223,0.999939,0.999939,KDD_all
2,0.999907,0.156255,0.156255,0.999921,0.13873,0.13873,0.999893,0.178848,0.178848,0.178848,0.999893,0.999893,KDD_all
3,0.999968,0.466051,0.466051,0.999984,0.367767,0.367767,0.999951,0.636027,0.636027,0.636027,0.999951,0.999951,KDD_all
4,0.999975,0.399208,0.399208,0.999982,0.341695,0.341695,0.999969,0.48,0.48,0.48,0.999969,0.999969,KDD_all


## Detection metrics

In [23]:
import pandas as pd

def evaluate_detection(df_counts, df_f1=None):
    # Copying df_counts to a new DataFrame for metric calculations
    df = df_counts.copy()

    # Calculating TP, FP, FN
    TP = df['predicted_overlapping_counts'].sum()
    FP = (df['predicted_counts'] - df['predicted_overlapping_counts']).sum()
    FN = (df['true_counts'] - df['predicted_overlapping_counts']).sum()

    # Calculating metrics
    TPR = TP / (TP + FN) if (TP + FN) != 0 else 0
    PPV = TP / (TP + FP) if (TP + FP) != 0 else 0
    F1 = 2 * (PPV * TPR) / (PPV + TPR) if (PPV + TPR) != 0 else 0

    # Creating a DataFrame for results
    metrics_data = {
        'Experiment': df_counts['src'].iloc[0][-1] if 'src' in df_counts else 'N/A',
        # 'TP': [TP],
        # 'FP': [FP],
        # 'FN': [FN],
        'TPR': [TPR],
        'PPV': [PPV],
        'F1': [F1]
    }
    results_df = pd.DataFrame(metrics_data)

    # Additional metrics calculations
    results_df['TPavg'] = TP / len(df)
    results_df['FPavg'] = FP / len(df)
    results_df['FPmedian'] = np.median((df['predicted_counts'] - df['predicted_overlapping_counts']))
    results_df['FP/cmb'] = FP / df['true_counts'].sum() if df['true_counts'].sum() != 0 else 0
    results_df['FNavg'] = FN / len(df)

    # Incorporating F1 from another DataFrame if provided
    if df_f1 is not None and 'f1_avg' in df_f1.columns:
        results_df['DiceScore'] = df_f1['f1_avg'].mean()

    return results_df

In [24]:
results_df1 = evaluate_detection(df_counts1, df_segmentation1)

results_df1.round(2)

Unnamed: 0,Experiment,TPR,PPV,F1,TPavg,FPavg,FPmedian,FP/cmb,FNavg,DiceScore
0,l,0.6,0.27,0.37,7.25,19.75,21.5,1.65,4.75,0.36


## Classification metrics
Classification is here considered as detecting some microbleed in a usbject with some microbleed

In [25]:
def evaluate_classification(df):
    # Counting the occurrences of each metric
    TP = np.sum((df['cmb'] == 'TP'))
    FP = np.sum((df['cmb'] == 'FP'))
    TN = np.sum((df['cmb'] == 'TN'))
    FN = np.sum((df['cmb'] == 'FN'))

    # Calculating metrics
    TPR = TP / (TP + FN) if (TP + FN) != 0 else 0
    PPV = TP / (TP + FP) if (TP + FP) != 0 else 0
    F1 = 2 *( (PPV * TPR) / (PPV + TPR))  if (PPV + TPR) != 0 else 0,
    TNR = TN / (TN + FP) if (TN + FP) != 0 else 0
    ACC = (TP + TN) / (TP + FP + TN + FN) if (TP + FP + TN + FN) != 0 else 0

    # Creating a DataFrame for results
    metrics_data = {
        'Experiment': df['src'].iloc[0][-1],
        'TPR': [TPR],
        'PPV': [PPV],
        'F1': [F1][0],
        'TNR': [TNR],
        'ACC': [ACC]
    }
    results_df = pd.DataFrame(metrics_data)
    
    return results_df    

In [26]:
results_df1 = evaluate_classification(df_classification1)

results_df1.round(2)

Unnamed: 0,Experiment,TPR,PPV,F1,TNR,ACC
0,l,1.0,1.0,1.0,0,1.0
