# Combine analysis results

In [None]:
import pandas as pd
import respiration.utils as utils

analysis_dir = utils.dir_path('outputs', 'analysis')

## Harmonize the data

All respiration extraction methods have slightly different data structures. We need to harmonize the data to be able to compare the models. Only the best performing method for each model is kept.

In [None]:
unsupervised_path = utils.join_paths(analysis_dir, 'unsupervised_analysis.csv')

unsupervised = pd.read_csv(unsupervised_path)

# Only keep roi==chest, because it is the most accurate
unsupervised = unsupervised[unsupervised['roi'] == 'chest']

# Remove roi column
unsupervised = unsupervised.drop(columns=['roi'])

# Rename method to model
unsupervised = unsupervised.rename(columns={'method': 'model'})

In [None]:
fine_tuned_path = utils.join_paths(analysis_dir, 'fine_tuned_analysis.csv')

fine_tuned = pd.read_csv(fine_tuned_path)
# Rename model_id to model
fine_tuned = fine_tuned.rename(columns={'model_id': 'model'})

fine_tuned['model'] = 'fine_tuned_' + fine_tuned['model']

In [None]:
transformer_path = utils.join_paths(analysis_dir, 'transformer_analysis.csv')

transformer = pd.read_csv(transformer_path)
# Rename model_id to model
transformer = transformer.rename(columns={'model_id': 'model'})

# Add to each model name the prefix 'transformer_'
transformer['model'] = 'transformer_' + transformer['model']

In [None]:
raft_path = utils.join_paths(analysis_dir, 'raft_analysis.csv')
raft = pd.read_csv(raft_path)

# Only keep roi==chest, because it is the most accurate
raft = raft[raft['roi'] == 'chest']
raft = raft[raft['signal_direction'] == 'signal_v']

# Remove roi and signal_direction columns
raft = raft.drop(columns=['roi', 'signal_direction'])

In [None]:
pretrained_path = utils.join_paths(analysis_dir, 'pretrained_analysis.csv')
pretrained = pd.read_csv(pretrained_path)

In [None]:
# Merge all dataframes
analysis = pd.concat([fine_tuned, pretrained, unsupervised, raft, transformer])

## Compare all models

In [None]:
analysis['error'] = (analysis['prediction'] - analysis['ground_truth']).abs()

In [None]:
analysis

In [None]:
models = analysis['model'].unique()
metrics = analysis['metric'].unique()

In [None]:
import scipy.stats as stats

correlations = []

for model in models:
    model_data = analysis[analysis['model'] == model]

    for metric in metrics:
        metric_data = model_data[model_data['metric'] == metric]

        if len(metric_data) != 0:
            correlation, p_value = stats.pearsonr(metric_data['prediction'], metric_data['ground_truth'])
            rmse = ((metric_data['prediction'] - metric_data['ground_truth']) ** 2).mean() ** 0.5

            correlations.append({
                'model': model,
                'metric': metric,
                'correlation': correlation,
                'p_value': p_value,
                'rmse': rmse
            })

correlations = pd.DataFrame(correlations)
correlations

In [None]:
# Only show correlations that are significant
correlations[correlations['p_value'] < 0.05]

In [None]:
len(models)

## Visualize the results

In [None]:
figure_dir = utils.dir_path('outputs', 'figures')

### Plot the prediction vs ground truth

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import respiration.utils as utils

metric = 'pk'

fig, axs = plt.subplots(3, 5, figsize=(20, 12))

# Add some space between the plots
fig.tight_layout(pad=5.0)

for idx, model in enumerate(models):
    ax = axs[idx // 5, idx % 5]
    model_data = analysis[(analysis['model'] == model) & (analysis['metric'] == metric)]

    ax.scatter(model_data['prediction'], model_data['ground_truth'])

    # Add a regression line
    x = model_data['prediction']
    y = model_data['ground_truth']
    m, b = np.polyfit(x, y, 1)
    ax.plot(x, m * x + b, color='red')

    ax.set_xlabel('Prediction')
    ax.set_ylabel('Ground truth')
    ax.set_title(model)

utils.savefig(fig, figure_dir, 'model_correlations')

### Bland-Altman plot

In [None]:
fig, axs = plt.subplots(3, 5, figsize=(20, 12))

# Add some space between the plots
fig.tight_layout(pad=5.0)

# Bland-Altman plot, where the numbers from top to bottom are mean + 1.96 std., mean, and mean - 1.96 std., respectively.

for idx, model in enumerate(models):
    ax = axs[idx // 5, idx % 5]
    model_data = analysis[(analysis['model'] == model) & (analysis['metric'] == metric)]

    mean = model_data['error'].mean()
    std = model_data['error'].std()

    ax.scatter(model_data['prediction'], model_data['error'])
    ax.axhline(mean + 1.96 * std, color='red', linestyle='--')
    ax.axhline(mean, color='red')
    ax.axhline(mean - 1.96 * std, color='red', linestyle='--')

    # Set the y range to be between -10 and 10
    ax.set_ylim(-5, 12)

    ax.set_xlabel('Prediction')
    ax.set_ylabel('Error')
    ax.set_title(model)

utils.savefig(fig, figure_dir, 'bland_altman')

## Plot the RMSE

In [None]:
fig, ax = plt.subplots(figsize=(20, 6))

for model in models:
    model_data = correlations[(correlations['model'] == model) &
                              (correlations['metric'] == metric)]
    ax.bar(model, model_data['rmse'].values[0])

ax.set_xlabel('Model')
ax.set_ylabel('RMSE')
ax.set_title('RMSE of the models')

# Rotate the x labels
plt.xticks(rotation=45)

utils.savefig(fig, figure_dir, 'rmse')