# Evaluation

## Step 1: Read the frequencies

In [None]:
import pandas as pd
import respiration.utils as utils

analysis_dir = utils.dir_path('outputs', 'analysis', mkdir=True)
analysis_file = utils.join_paths(analysis_dir, '01_frequency_analysis.csv')

frequencies = pd.read_csv(analysis_file)

## Step 2: Calculate MAE, RMSE and PCC

In [None]:
import numpy as np

from tqdm.auto import tqdm
from respiration.analysis import (
    pearson_correlation,
)

# For each model and metric calculate the MAE, RMSE and PCC
models = frequencies['model'].unique()
methods = frequencies['method'].unique()

records = []

for model in tqdm(models):
    for method in methods:
        data = frequencies[(frequencies['model'] == model) &
                           (frequencies['method'] == method)]

        # Combine all predictions and ground truth values
        predictions = np.array(data['prediction'].values)
        ground_truth = np.array(data['ground_truth'].values)

        # Turn the frequencies into beats per minute
        predictions = predictions * 60
        ground_truth = ground_truth * 60

        # Calculate the metrics
        mae = np.mean(np.abs(predictions - ground_truth))
        rmse = np.sqrt(np.mean((predictions - ground_truth) ** 2))

        pcc, p_pcc = pearson_correlation(
            predictions,
            ground_truth,
        )

        records.append({
            'model': model,
            'method': method,
            'MAE': round(mae, 3),
            'RMSE': round(rmse, 3),
            'PCC': round(pcc, 3),
            'PCC-p-value': round(p_pcc, 3),
        })

In [None]:
# Create a dataframe with the results
experiment_analysis = pd.DataFrame(records)

# Replace NaN values with 0
experiment_analysis = experiment_analysis.fillna(0)

# Store the results
experiment_analysis.to_csv(utils.join_paths(analysis_dir, 'metrics.csv'), index=False)

experiment_analysis.head()

In [None]:
# Print the variance of the different methods
for method in methods:
    data = experiment_analysis[experiment_analysis['method'] == method]
    print(f'{method}: {data["PCC"].std()}')

## Step 2: Extract the frequencies using a sliding window approach

In [None]:
import respiration.utils as utils

figure_dir = utils.dir_path('outputs', 'figures', 'evaluation', mkdir=True)

In [None]:
model_selection = [
    "raft_small",
    "raft_large",
    "lucas_kanade",
    "pixel_intensity_grey",
    "FlowNet2S",  # Best on PSD
    "FlowNet2CS",  # Best on average
    "big_small",
    "mtts_can",
    "UBFC_cross_RhythmFormer",
    "SCAMPS_TSCAN",
    "SCAMPS_DeepPhys",
    "PURE_EfficientPhys",
    "random",
    "SimpleViT_20240821_115511",  # Best Normal on Average
    # "SimpleViT_20240729_195756",
    # "SimpleViT_20240728_172805",
    # # "RF_20240902_210159",  # Winner PSD
    # "RF_20240903_051739",  # Winner PK
    # # "RF_20240904_001421",  # Face Winner psd
    # "RF_20240904_001421",  # Face Winner pk
    "RF_20240902_210159",  # Best Normal on Average
    "RF_20240904_001421",  # Best Face on Average
]

rename = {
    'raft_small': 'RAFT (Small)',
    'raft_large': 'RAFT (Large)',
    'lucas_kanade': 'Lucas-Kanade',
    'big_small': 'BigSmall',
    'mtts_can': 'MTTS-CAN',
    'pixel_intensity_grey': 'Pixel Intensity',
    'PURE_EfficientPhys': 'EfficientPhys',
    'SCAMPS_DeepPhys': 'DeepPhys',
    'SCAMPS_TSCAN': 'TS-CAN',
    'MMPD_intra_RhythmFormer': 'Rhythm Former (original)',
    'UBFC_cross_RhythmFormer': 'Rhythm Former (original)',
    'SimpleViT_20240728_172805': 'SimpleViT (Faces)',
    'SimpleViT_20240728_114332': 'SimpleViT (Normal)',
    'SimpleViT_20240729_195756': 'SimpleViT (Normal)',
    # 'RF_20240903_051739': 'Respiration Rhythm Former (Normal)',
    # 'RF_20240902_210159': 'Respiration Rhythm Former (Normal)',
    # 'RF_20240726_104536': 'Respiration Rhythm Former (Normal)',
    # 'RF_20240801_124757': 'Respiration Rhythm Former (Normal)',
    # 'RF_20240802_155121': 'Respiration Rhythm Former (Normal)',
    'RF_20240805_200748': 'Respiration Rhythm Former (Faces)',
}

In [None]:
# Create a new dataframe with the selected models
results_table_select = experiment_analysis[experiment_analysis['model'].isin(model_selection)]

# Rename the models
results_table_select.loc[:, 'model'] = results_table_select['model'].apply(lambda x: rename[x] if x in rename else x)

# Only keep the PSD method
results_table_select = results_table_select[results_table_select['method'] == 'psd']

# Remove the method column
results_table_select = results_table_select.drop(columns='method')

# Save the results
results_table_select.to_csv(utils.join_paths(analysis_dir, 'metrics_filtered.csv'), index=False)

In [None]:
# Plot the MAE and PCC for the psd method
import seaborn as sns
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(12, 6))

# Only keep the model selection
points = experiment_analysis.copy()
points = points[points['model'].isin(model_selection)]

# Only keep the PK method
points = points[points['method'] == 'psd']

# Scatter the MAE and PCC for the different models
sns.scatterplot(
    data=points,
    # x='PSD_MAE',
    # y='PSD_PCC',
    x='MAE',
    y='PCC',
    s=250,
    style='model',
    hue='model',
)

plt.xlabel('MAE (BPM)')
plt.ylabel('Correlation')
plt.title('MAE and Pearson Correlation for the different models')
plt.tight_layout()

# Set the dimensions of the plot
# plt.xlim(0, 8)
# plt.ylim(0, 1)

# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Disable the legend
# plt.legend().remove()

# Save the figure
utils.savefig(plt.gcf(), figure_dir, 'pcc_mae')

plt.show()

## RhythmFormer comparison

In [None]:
plt.figure(figsize=(12, 6))

# rf_models = experiment_analysis.copy()
rf_models = experiment_analysis[
    # (experiment_analysis['method'] == 'psd') &
    (experiment_analysis['model'].str.contains('RF'))
]

# Make all correlations positive
rf_models.loc[:, 'PCC'] = rf_models['PCC'].apply(lambda x: abs(x))

# Scatter the MAE and PCC for the different models
sns.scatterplot(
    data=rf_models,
    x='MAE',
    y='PCC',
    s=250,
    style='model',
    hue='method',
)

plt.xlabel('MAE (BPM)')
plt.ylabel('Correlation')
plt.title('MAE and Pearson Correlation for the different models')
plt.tight_layout()

# Set the dimensions of the plot
# plt.xlim(0, 8)
# plt.ylim(0, 1)

# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Disable the legend
# plt.legend().remove()

plt.show()

In [None]:
rf_models = experiment_analysis[
    # (experiment_analysis['method'] == 'psd') &
    (experiment_analysis['model'].str.contains('RF'))
]

# Make the correlations positive
rf_models.loc[:, 'PCC'] = rf_models['PCC'].apply(lambda x: abs(x))

# Average the results for the different models
rf_models = rf_models.groupby('model')[['MAE', 'RMSE', 'PCC', 'PCC-p-value']].mean().reset_index()

# Plot the results
plt.figure(figsize=(12, 6))

# Scatter the MAE and PCC for the different models
sns.scatterplot(
    data=rf_models,
    x='MAE',
    y='PCC',
    s=250,
    style='model',
)

plt.xlabel('MAE (BPM)')
plt.ylabel('Correlation')
plt.title('MAE and Pearson Correlation for the different models')
plt.tight_layout()

plt.show()

## Step 3: Score the performance of the models

In [None]:
results_table_x = experiment_analysis.copy()

# Calculate the PCC in absolute values
results_table_x['PCC'] = results_table_x['PCC'].apply(lambda x: abs(x))

# Use the PSD and PK methods
# results_table_x = results_table_x[(results_table_x['method'] == 'pk') |
#                                   (results_table_x['method'] == 'psd')]
# results_table_x = results_table_x[(results_table_x['method'] == 'pk')]

# Calculate the average RMSE for each model
average_metric = results_table_x.groupby('model')['MAE'].mean().reset_index()
average_metric['MAE'] = average_metric['MAE'].apply(lambda x: round(x, 3))
average_metric['MAE_std'] = results_table_x.groupby('model')['MAE'].std().values

# Add the averaged RMSE for each model
average_metric['RMSE'] = results_table_x.groupby('model')['RMSE'].mean().values
average_metric['RMSE'] = average_metric['RMSE'].apply(lambda x: round(x, 3))
average_metric['RMSE_std'] = results_table_x.groupby('model')['RMSE'].std().values

# Add the averaged PCC for each model
average_metric['PCC'] = results_table_x.groupby('model')['PCC'].mean().values
average_metric['PCC'] = average_metric['PCC'].apply(lambda x: round(x, 3))
average_metric['p-value'] = results_table_x.groupby('model')['PCC-p-value'].mean().values
average_metric['p-value'] = average_metric['p-value'].apply(lambda x: round(x, 3))

# Store the results
average_metric.to_csv(utils.join_paths(analysis_dir, 'average_metrics.csv'), index=False)

average_metric

In [None]:
plt.figure(figsize=(12, 6))

average_metric_select = average_metric[average_metric['model'].isin(model_selection)]
# average_metric_select = average_metric.copy()

# Rename the models
average_metric_select.loc[:, 'model'] = average_metric_select['model'].apply(lambda x: rename[x] if x in rename else x)

# Scatter the MAE and PCC for the different models
sns.scatterplot(
    data=average_metric_select,
    # x='NFCP_MAE',
    x='MAE',
    y='PCC',
    # x='nfcp_MAE',
    # y='nfcp_PCC',
    # x='nfcp_MAE',
    # y='nfcp_PCC',
    # x='PSD_MAE',
    # y='PSD_PCC',
    s=250,
    style='model',
    hue='model',
)

plt.xlabel('MAE')
plt.ylabel('PCC')
# plt.xlabel('MAE (BPM)')
# plt.ylabel('Correlation')
plt.title('MAE and Pearson Correlation for the different models')
plt.tight_layout()

# Set the dimensions of the plot
# plt.xlim(0, 8)
# plt.ylim(0, 1)

# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# Disable the legend
# plt.legend().remove()

plt.show()

In [None]:
import numpy as np

dim = (3, 6)
_, axs = plt.subplots(dim[0], dim[1], figsize=(20, 10))

for idx, model in enumerate(model_selection):
    data = frequencies[(frequencies['model'] == model) &
                       (frequencies['method'] == 'psd')]

    preds = np.array(data['prediction'].values)
    gts = np.array(data['ground_truth'].values)

    # Transform the values from Hz to beats per minute
    preds = preds * 60
    gts = gts * 60

    x = idx // dim[1]
    y = idx % dim[1]

    if preds.std() > 0:
        pcc = np.corrcoef(gts, preds)[0, 1]
        axs[x, y].text(0.1, 0.9, f'PCC: {round(pcc, 3)}', transform=axs[x, y].transAxes)
        # Add a trend line
        axs[x, y].plot(np.unique(gts), np.poly1d(np.polyfit(gts, preds, 1))(np.unique(gts)), color='red')
    else:
        axs[x, y].text(0.1, 0.9, f'PCC: 0.0', transform=axs[x, y].transAxes)
        axs[x, y].plot(np.unique(gts), np.poly1d(np.polyfit(gts, preds, 1))(np.unique(gts)), color='red')

    # preds += np.random.normal(-0.5, 0.5, len(preds))
    # gts += np.random.normal(-0.5, 0.5, len(gts))

    # Scatter plot
    model = rename[model] if model in rename else model
    axs[x, y].scatter(gts, preds, label=model, s=20, alpha=0.01)
    axs[x, y].set_title(f'{model}')

    # Show the range 0 to 35 for the x- and y-axis
    axs[x, y].set_xlim(5, 30)
    axs[x, y].set_ylim(5, 30)

    # Name the x- and y-axis
    axs[x, y].set_xlabel('Ground truth (bpm)')
    axs[x, y].set_ylabel('Prediction (bpm)')

plt.tight_layout()

# Save the figure
utils.savefig(plt.gcf(), figure_dir, 'correlation')

plt.show()

In [None]:
# Create a bland-altman plots for the different models
_, axs = plt.subplots(dim[0], dim[1], figsize=(20, 10))

for idx, model in enumerate(model_selection):
    data = frequencies[(frequencies['model'] == model) &
                       (frequencies['method'] == 'psd')]

    preds = np.array(data['prediction'].values)
    gts = np.array(data['ground_truth'].values)

    # Transform the values from Hz to beats per minute
    preds = preds * 60
    gts = gts * 60

    x = idx // dim[1]
    y = idx % dim[1]

    # Calculate the difference between the two values
    diff = preds - gts

    # Calculate the mean of the two values
    mean = (preds + gts) / 2

    # Scatter plot
    model = rename[model] if model in rename else model
    axs[x, y].scatter(mean, diff, label=model, s=15, alpha=0.01)
    axs[x, y].set_title(f'{model}')

    # Name the x- and y-axis
    axs[x, y].set_xlabel('Mean (bpm)')
    axs[x, y].set_ylabel('Difference (bpm)')

    # Add a horizontal line at diff.mean()
    axs[x, y].axhline(diff.mean(), color='red', linestyle='--')

    # Add the 95% confidence interval
    axs[x, y].axhline(diff.mean() + 1.96 * diff.std(), color='green', linestyle='--')
    axs[x, y].axhline(diff.mean() - 1.96 * diff.std(), color='green', linestyle='--')

    # Set the y-axis to -10 to 10
    axs[x, y].set_ylim(-20, 20)
    axs[x, y].set_xlim(6, 30)

plt.tight_layout()

# Save the figure
utils.savefig(plt.gcf(), figure_dir, 'bland_altman')

plt.show()

## Poincare-curve

In [None]:
plt.figure(figsize=(12, 6))

poincare_meta = []

for model in model_selection:
    data = frequencies[(frequencies['model'] == model) &
                       (frequencies['method'] == 'psd')]

    preds = np.array(data['prediction'].values) * 60
    gts = np.array(data['ground_truth'].values) * 60

    plot_x = []
    plot_y = []

    for tolerance in range(0, 80):
        tolerance /= 4
        plot_x.append(tolerance)

        # Calculate the values that are in range
        in_range = preds[abs(preds - gts) <= tolerance]
        # print(f'{model} - {tolerance}: {len(in_range) / len(preds) * 100}')
        poincare_meta.append({
            'model': model,
            'tolerance': tolerance,
            'percentage': len(in_range) / len(preds) * 100,
        })

        # Calculate the percentage of the values that are in range
        plot_y.append(len(in_range) / len(preds) * 100)

    model = rename[model] if model in rename else model
    plt.plot(plot_x, plot_y, label=model)

plt.xlabel('Tolerance (BPM)')
plt.ylabel('Percentage')

# Set the grid with a step of 1
plt.xticks(np.arange(0, 21, 1))
plt.yticks(np.arange(0, 105, 5))

# Set x range to 0-20
plt.xlim(0, 16)

plt.title('Poincare-curve')
plt.legend()
plt.show()

In [None]:
poincare_meta_df = pd.DataFrame(poincare_meta)
poincare_meta_df.to_csv(utils.join_paths(analysis_dir, 'poincare.csv'), index=False)