# Analyse

This notebook analyses the predictions of the models. The analysis includes the calculation of the respiration frequency from the predicted and ground truth signals. The calculated frequencies are then used to compute the mean absolute error (MAE), root mean square error (RMSE), and Pearson correlation coefficient (PCC) between the predicted and ground truth frequencies.

In [None]:
# The sample rate of the signals
sample_rate = 30

# The lowpass frequency for the bandpass filter
lowpass = 0.1

# The highpass frequency for the bandpass filter
highpass = 0.5

# If the signals should be normalized
normalize = True

# If the signals should be filtered
filter_signal = True

# The stride of the sliding window: seconds * sample_rate
stride = 1 * sample_rate

In [None]:
import numpy as np
from respiration.analysis import (
    butterworth_filter,
    normalize_signal,
)


def preprocess(prediction: np.ndarray,
               ground_truth: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
    """
    Preprocess the prediction and ground truth signals. The preprocessing steps include detrending, filtering,
    and normalization.
    :param prediction: The predicted signal.
    :param ground_truth: The ground truth signal.
    :return: The preprocessed prediction and ground truth signals.
    """

    assert prediction.shape == ground_truth.shape, \
        (f'Prediction and ground truth signals must have the same shape. Got prediction shape: {prediction.shape}, '
         f'ground truth shape: {ground_truth.shape}')

    if filter_signal:
        prediction = butterworth_filter(
            prediction,
            sample_rate,
            lowpass,
            highpass)
        ground_truth = butterworth_filter(
            ground_truth,
            sample_rate,
            lowpass,
            highpass)

    if normalize:
        prediction = normalize_signal(prediction)
        ground_truth = normalize_signal(ground_truth)

    return prediction, ground_truth

In [None]:
import respiration.analysis as analysis


def frequency_from_peaks(data: np.ndarray) -> float:
    """
    Compute the respiration frequency from the peaks of the signal.
    :param data: The signal.
    :return: The respiration frequency.
    """
    return analysis.frequency_from_peaks(data, sample_rate, min_frequency=lowpass)


def frequency_from_psd(data: np.ndarray) -> float:
    """
    Compute the respiration frequency from the peaks of the signal.
    :param data: The signal.
    :return: The respiration frequency.
    """
    return analysis.frequency_from_psd(data, sample_rate, min_freq=lowpass, max_freq=highpass)


def frequency_from_crossing_point(data: np.ndarray) -> float:
    """
    Compute the respiration frequency from the peaks of the signal.
    :param data: The signal.
    :return: The respiration frequency.
    """
    return analysis.frequency_from_crossing_point(data, sample_rate)


def frequency_from_nfcp(data: np.ndarray) -> float:
    """
    Compute the respiration frequency from the peaks of the signal.
    :param data: The signal.
    :return: The respiration frequency.
    """
    return analysis.frequency_from_nfcp(data, sample_rate)

In [None]:
from tqdm.auto import tqdm
from respiration.dataset import VitalCamSet
from respiration.analysis import PredictionsReader

dataset = VitalCamSet()

predictions = PredictionsReader().read_all()

records = []

for idx, row in tqdm(predictions.iterrows(), total=len(predictions)):
    subject, setting = row['subject'], row['setting']
    prediction_signal = row['signal']
    model = row['model']

    gt_signal = dataset.get_breathing_signal(subject, setting)

    # Cut the gt_signal to have the same length as the prediction
    gt_signal = gt_signal[:len(prediction_signal)]

    prediction_signal, gt_signal = preprocess(prediction_signal, gt_signal)

    methods = {
        'cp': frequency_from_crossing_point,
        'nfcp': frequency_from_nfcp,
        'pk': frequency_from_peaks,
        'psd': frequency_from_psd,
    }

    for size in [30, 40, 50, 60, 70, 80, 90, 100, 110]:
        window_size = size * sample_rate

        for inx in range(0, len(prediction_signal) - window_size, stride):
            prediction_window = prediction_signal[inx:inx + window_size]
            ground_truth_window = gt_signal[inx:inx + window_size]

            for key, metric in methods.items():
                records.append({
                    'subject': subject,
                    'setting': setting,
                    'model': model,
                    'index': inx,
                    'window_size': window_size,
                    'method': key,
                    'prediction': metric(prediction_window),
                    'ground_truth': metric(ground_truth_window),
                })

In [None]:
import pandas as pd
import respiration.utils as utils

df = pd.DataFrame(records)

analysis_dir = utils.dir_path('outputs', 'analysis', mkdir=True)
analysis_file = utils.join_paths(analysis_dir, 'frequencies.csv')

df.to_csv(analysis_file, index=False)

## Calculate metrics

In [None]:
import pandas as pd
import respiration.utils as utils

analysis_dir = utils.dir_path('outputs', 'analysis', mkdir=True)
analysis_file = utils.join_paths(analysis_dir, 'frequencies.csv')

frequencies = pd.read_csv(analysis_file)

In [None]:
import numpy as np

from tqdm.auto import tqdm
from respiration.analysis import (
    pearson_correlation,
)

# For each model and metric calculate the MAE, RMSE and PCC
models = frequencies['model'].unique()
methods = frequencies['method'].unique()

records = []

for model in tqdm(models):
    for method in methods:
        data = frequencies[(frequencies['model'] == model) &
                           (frequencies['method'] == method)]

        # Combine all predictions and ground truth values
        predictions = np.array(data['prediction'].values)
        ground_truth = np.array(data['ground_truth'].values)

        # Turn the frequencies into beats per minute
        predictions = predictions * 60
        ground_truth = ground_truth * 60

        # Calculate the metrics
        mae = np.mean(np.abs(predictions - ground_truth))
        rmse = np.sqrt(np.mean((predictions - ground_truth) ** 2))

        pcc, p_pcc = pearson_correlation(
            predictions,
            ground_truth,
        )

        records.append({
            'model': model,
            'method': method,
            'MAE': round(mae, 3),
            'RMSE': round(rmse, 3),
            'PCC': round(pcc, 3),
            'PCC-p-value': round(p_pcc, 3),
        })

In [None]:
# Create a dataframe with the results
experiment_analysis = pd.DataFrame(records)

# Replace NaN values with 0
experiment_analysis = experiment_analysis.fillna(0)

# Store the results
experiment_analysis.to_csv(utils.join_paths(analysis_dir, 'metrics.csv'), index=False)