# Analysis of unsupervised models

In [None]:
import numpy as np
import pandas as pd
import respiration.utils as utils

signals_dir = utils.dir_path('outputs', 'signals')

ground_truth_file = utils.join_paths(signals_dir, 'respiration_gt.csv')
ground_truth = pd.read_csv(ground_truth_file)
ground_truth['signal'] = ground_truth['signal'].apply(eval).apply(np.array)

predictions_file = utils.join_paths(signals_dir, 'raft_predictions.csv')
predictions = pd.read_csv(predictions_file)
predictions['signal'] = predictions['signal'].apply(eval).apply(np.array)
predictions['signal_std'] = predictions['signal_std'].apply(eval).apply(np.array)
predictions.head()

## Compare the predictions to the ground truth

In [None]:
import os
from tqdm.auto import tqdm
import respiration.analysis as analysis

analysis_dir = utils.dir_path('outputs', 'analysis', mkdir=True)
analysis_file = utils.join_paths(analysis_dir, 'raft_analysis.csv')

if os.path.exists(analysis_file):
    analysis_results = pd.read_csv(analysis_file)
else:
    analysis_results = []

    for index, row in tqdm(predictions.iterrows(), total=len(predictions)):
        subject, setting = row['subject'], row['setting']

        # Get the ground truth signal and sampling rate
        gt_signal = ground_truth[
            (ground_truth['subject'] == subject) &
            (ground_truth['setting'] == setting)
            ]['signal'].values[0]

        prediction, sampling_rate = row['signal'], row['sampling_rate']

        compare = analysis.SignalCompare(
            prediction,
            gt_signal,
            sampling_rate,
        )

        for metric, result in compare.compare_all().items():
            analysis_results.append({
                'subject': subject,
                'setting': setting,
                'model': row['model'],
                'roi': row['roi'],
                'metric': metric,
                'result': result,
            })

    analysis_results = pd.DataFrame(analysis_results)
    analysis_results.to_csv(analysis_file, index=False)

In [None]:
analysis_results

## Get the mean and standard deviation of the results

In [None]:
method_results = []

metrics = analysis_results['metric'].unique()
models = analysis_results['model'].unique()

for model in models:
    for metric in metrics:
        data = analysis_results[
            (analysis_results['model'] == model) &
            (analysis_results['metric'] == metric) &
            (analysis_results['roi'] == 'chest')
            ]

        method_results.append({
            'model': model,
            'metric': metric,
            'mean': data['result'].mean(),
            'std': data['result'].std(),
        })

method_results = pd.DataFrame(method_results)

In [None]:
method_results

In [None]:
# Plot for each metric how well the methods performs
import matplotlib.pyplot as plt

metrics = [
    'pk_error',
    'cp_error',
    'nfcp_error',
    'psd_error',
    'distance_mse',
    'distance_pearson',
    'distance_dtw',
]
metrics.sort()

fig, axes = plt.subplots(3, 3, figsize=(20, 7))

# Add some space between the plots
fig.tight_layout(pad=5.0)

fig.suptitle('Comparison of unsupervised methods')

for idx, metric in enumerate(metrics):
    ax = axes[idx // 3, idx % 3]
    data = method_results[method_results['metric'] == metric]

    # Plot the mean and standard deviation
    ax.bar(data['model'], data['mean'], yerr=data['std'], capsize=5)

    ax.set_title(metric)
    ax.set_ylabel('Mean distance')
    ax.set_xlabel('Model')

    # Rotate the x-axis labels
    for tick in ax.get_xticklabels():
        tick.set_rotation(0)

# Store the plot as svg
figure_dir = utils.dir_path('outputs', 'figures', 'raft', mkdir=True)
utils.savefig(fig, figure_dir, 'error_model_comparison')