# Evaluation of models
Notebook to evaluate analytically models

In [27]:
import pandas as pd
import re
import ast
import numpy as np


data_dir = "../data"

In [28]:
def evaluate_detection(file_path, src):
    # Initialize lists to store the extracted data
    classification_data = []
    counts_data = []
    segmentation_data = []
    iteration_data = []

    current_data = ''
    processing = False
    iteration = 0

    # Function to preprocess and parse the string as a dictionary
    def parse_data(string):
        # Custom preprocessing can be added here if necessary
        return ast.literal_eval(string)

    # Open and read the file
    with open(file_path, 'r') as file:
        for line in file:
            if "'classification':" in line:
                processing = True
                iteration += 1
                current_data = line.strip()
            elif processing:
                current_data += line.strip()
                if '}}' in line:
                    processing = False
                    # Parse the string as a dictionary
                    try:
                        data_dict = parse_data(current_data)
                    except: 
                        print(current_data)
                    # Extract data and store in respective lists
                    iteration_data.append(iteration)
                    classification_data.append(data_dict.get('classification', {}))
                    counts_data.append({k: v[0] for k, v in data_dict.get('counts', {}).items()})
                    segmentation_data.append(data_dict.get('segmentation', {}))

    # Create separate pandas DataFrames
    df_classification = pd.DataFrame(classification_data, index=iteration_data)
    df_counts = pd.DataFrame(counts_data, index=iteration_data)
    df_segmentation = pd.DataFrame(segmentation_data, index=iteration_data)
    
    df_counts['src'] = src
    df_classification['src'] = src
    df_segmentation['src'] = src

    # Calculating TP, FP, FN
    df_counts['TP'] = df_counts['predicted_overlapping_counts']
    df_counts['FP'] = df_counts['predicted_counts'] - df_counts['predicted_overlapping_counts']
    df_counts['FN'] = df_counts['true_counts'] - df_counts['predicted_overlapping_counts']
    
    # Cleaning
    df_segmentation.replace('nan', np.nan, inplace=True)
    df_classification.columns = ['back', 'cmb', 'src']


    

    return df_classification, df_counts, df_segmentation

In [29]:
file_path = f'{data_dir}/eval_log_exp1_processed.txt'
df_classification1, df_counts1, df_segmentation1 = evaluate_detection(file_path, src="Experiment1")

file_path = f'{data_dir}/eval_log_exp2_processed.txt'
df_classification2, df_counts2, df_segmentation2 = evaluate_detection(file_path, src="Experiment2")

file_path = f'{data_dir}/eval_log_exp3_processed.txt'
df_classification3, df_counts3, df_segmentation3 = evaluate_detection(file_path, src="Experiment3")


In [30]:
df_classification1.head()

Unnamed: 0,back,cmb,src
1,TN,TP,Experiment1
2,FN,FP,Experiment1
3,TN,TP,Experiment1
4,FN,FP,Experiment1
5,TN,TP,Experiment1


In [31]:
df_counts1.head()

Unnamed: 0,predicted_counts,predicted_overlapping_counts,true_counts,src,TP,FP,FN
1,20,3,14,Experiment1,3,17,11
2,12,0,0,Experiment1,0,12,0
3,7,1,2,Experiment1,1,6,1
4,2,0,0,Experiment1,0,2,0
5,8,2,3,Experiment1,2,6,1


In [32]:
df_segmentation2.head()

Unnamed: 0,f1_0,f1_1,f1_avg,precision_0,precision_1,precision_avg,recall_0,recall_1,recall_avg,specificity_0,specificity_1,specificity_avg,src
1,0.999997,0.0,0.0,1.0,0.0,0.0,0.999994,,,,0.999994,0.999994,Experiment2
2,0.99998,0.099174,0.099174,0.999993,0.0625,0.0625,0.999967,0.24,0.24,0.24,0.999967,0.999967,Experiment2
3,0.999998,0.0,0.0,1.0,0.0,0.0,0.999996,,,,0.999996,0.999996,Experiment2
4,1.0,0.0,0.0,1.0,0.0,0.0,1.0,,,,1.0,1.0,Experiment2
5,0.999998,0.0,0.0,1.0,0.0,0.0,0.999996,,,,0.999996,0.999996,Experiment2


In [33]:
df_counts3.head()

Unnamed: 0,predicted_counts,predicted_overlapping_counts,true_counts,src,TP,FP,FN
1,0,0,0,Experiment3,0,0,0
2,5,3,3,Experiment3,3,2,0
3,0,0,0,Experiment3,0,0,0
4,0,0,0,Experiment3,0,0,0
5,1,0,0,Experiment3,0,1,0


In [34]:
df_classification3.head()

Unnamed: 0,back,cmb,src
1,TP,TN,Experiment3
2,TN,TP,Experiment3
3,TP,TN,Experiment3
4,TP,TN,Experiment3
5,FN,FP,Experiment3


In [35]:
df_segmentation3.head()

Unnamed: 0,f1_0,f1_1,f1_avg,precision_0,precision_1,precision_avg,recall_0,recall_1,recall_avg,specificity_0,specificity_1,specificity_avg,src
1,1.0,,,1.0,,,1.0,,,,1.0,1.0,Experiment3
2,0.999997,0.605,0.605,0.999996,0.691429,0.691429,0.999998,0.537778,0.537778,0.537778,0.999998,0.999998,Experiment3
3,1.0,,,1.0,,,1.0,,,,1.0,1.0,Experiment3
4,1.0,,,1.0,,,1.0,,,,1.0,1.0,Experiment3
5,0.999999,0.0,0.0,1.0,0.0,0.0,0.999998,,,,0.999998,0.999998,Experiment3


## Detection metrics

In [36]:
import pandas as pd

def evaluate_detection(df_counts, df_f1=None):
    # Copying df_counts to a new DataFrame for metric calculations
    df = df_counts.copy()

    # Calculating TP, FP, FN
    TP = df['predicted_overlapping_counts'].sum()
    FP = (df['predicted_counts'] - df['predicted_overlapping_counts']).sum()
    FN = (df['true_counts'] - df['predicted_overlapping_counts']).sum()

    # Calculating metrics
    TPR = TP / (TP + FN) if (TP + FN) != 0 else 0
    PPV = TP / (TP + FP) if (TP + FP) != 0 else 0
    F1 = 2 * (PPV * TPR) / (PPV + TPR) if (PPV + TPR) != 0 else 0

    # Creating a DataFrame for results
    metrics_data = {
        'Experiment': df_counts['src'].iloc[0][-1] if 'src' in df_counts else 'N/A',
        # 'TP': [TP],
        # 'FP': [FP],
        # 'FN': [FN],
        'TPR': [TPR],
        'PPV': [PPV],
        'F1': [F1]
    }
    results_df = pd.DataFrame(metrics_data)

    # Additional metrics calculations
    results_df['TPavg'] = TP / len(df)
    results_df['FPavg'] = FP / len(df)
    results_df['FPmedian'] = np.median((df['predicted_counts'] - df['predicted_overlapping_counts']))
    results_df['FP/cmb'] = FP / df['true_counts'].sum() if df['true_counts'].sum() != 0 else 0
    results_df['FNavg'] = FN / len(df)

    # Incorporating F1 from another DataFrame if provided
    if df_f1 is not None and 'f1_avg' in df_f1.columns:
        results_df['DiceScore'] = df_f1['f1_avg'].mean()

    return results_df

In [37]:
results_df1 = evaluate_detection(df_counts1, df_segmentation1)
results_df2 = evaluate_detection(df_counts2, df_segmentation2)
results_df3 = evaluate_detection(df_counts3, df_segmentation3)

results_df = pd.concat([results_df1, results_df2, results_df3])

results_df.round(2)

Unnamed: 0,Experiment,TPR,PPV,F1,TPavg,FPavg,FPmedian,FP/cmb,FNavg,DiceScore
0,1,0.61,0.22,0.32,1.64,5.91,4.0,2.2,1.05,0.18
0,2,0.79,0.14,0.24,1.36,8.23,5.5,4.76,0.36,0.22
0,3,0.71,0.44,0.54,1.23,1.59,1.0,0.92,0.5,0.37


## Classification metrics
Classification is here considered as detecting some microbleed in a usbject with some microbleed

In [38]:
def evaluate_classification(df):
    # Counting the occurrences of each metric
    TP = np.sum((df['cmb'] == 'TP'))
    FP = np.sum((df['cmb'] == 'FP'))
    TN = np.sum((df['cmb'] == 'TN'))
    FN = np.sum((df['cmb'] == 'FN'))

    # Calculating metrics
    TPR = TP / (TP + FN) if (TP + FN) != 0 else 0
    PPV = TP / (TP + FP) if (TP + FP) != 0 else 0
    F1 = 2 *( (PPV * TPR) / (PPV + TPR))  if (PPV + TPR) != 0 else 0,
    TNR = TN / (TN + FP) if (TN + FP) != 0 else 0
    ACC = (TP + TN) / (TP + FP + TN + FN) if (TP + FP + TN + FN) != 0 else 0

    # Creating a DataFrame for results
    metrics_data = {
        'Experiment': df['src'].iloc[0][-1],
        'TPR': [TPR],
        'PPV': [PPV],
        'F1': [F1][0],
        'TNR': [TNR],
        'ACC': [ACC]
    }
    results_df = pd.DataFrame(metrics_data)
    
    return results_df    

In [39]:
results_df1 = evaluate_classification(df_classification1)
results_df2 = evaluate_classification(df_classification2)
results_df3 = evaluate_classification(df_classification3)

results_df = pd.concat([results_df1, results_df2, results_df3])

results_df.round(2)

Unnamed: 0,Experiment,TPR,PPV,F1,TNR,ACC
0,1,1.0,0.68,0.81,0.0,0.68
0,2,0.87,0.72,0.79,0.29,0.68
0,3,0.87,0.87,0.87,0.71,0.82
