In [None]:
# List all the files in the training data directory
import os

import numpy as np
import pandas as pd
import respiration.utils as utils

# Get the list of files in the training data directory
training_data_dir = utils.dir_path('models', 'rhythm_former_v2')

configs = []

for model_dir in os.listdir(training_data_dir):
    manifest_path = utils.join_paths(training_data_dir, model_dir, 'manifest.json')

    # Check if the manifest file exists
    if not os.path.exists(manifest_path):
        continue

    # Load the manifest file
    manifest = utils.read_json(manifest_path)

    setting = manifest['testing_scenarios'][0][1]

    # Add the metadata to the records list
    configs.append({
        'model': 'RF_' + manifest['timestamp'],
        'setting': setting,
        'image_dimension': manifest['image_size'][0],
        'best_epoch': manifest['models'][-1]['epoch'],
        'frequency_weight': manifest['loss_fn_config']['frequency_weight'],
        'mse_weight': manifest['loss_fn_config']['mse_weight'],
        'norm_weight': manifest['loss_fn_config']['norm_weight'],
        'pearson_weight': manifest['loss_fn_config']['pearson_weight'],
        'spectral_convergence_weight': manifest['loss_fn_config']['spectral_convergence_weight'],
        'spectral_magnitude_weight': manifest['loss_fn_config']['spectral_magnitude_weight'],
        'split': manifest['split'],
    })

# Create a DataFrame from the records list
configs = pd.DataFrame(configs)

# Sort the DataFrame by the model
configs = configs.sort_values(by='model')

configs

In [None]:
analysis_dir = utils.dir_path('outputs', 'analysis')

metrics_file = utils.join_paths(analysis_dir, 'metrics.csv')
metrics = pd.read_csv(metrics_file)

metrics_avg_file = utils.join_paths(analysis_dir, 'metrics_average.csv')
metrics_average = pd.read_csv(metrics_avg_file)

In [None]:
figure_dir = utils.dir_path('outputs', 'figures', mkdir=True)

In [None]:
# Plot the MAE and PCC for the psd method
import seaborn as sns
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(12, 6))

# Only keep the models that start with "RF_"
points = metrics_average[metrics_average['model'].str.startswith('RF_')]

# Scatter the MAE and PCC for the different models
sns.scatterplot(
    data=points,
    x='mae',
    y='pcc',
    s=250,
    style='model',
    hue='model',
)

plt.xlabel('MAE (BPM)')
plt.ylabel('Correlation')
plt.title('MAE and Pearson Correlation for the different models')
plt.tight_layout()

# Set the dimensions of the plot
# plt.xlim(0, 8)
# plt.ylim(0, 1)

# Place the legend outside the plot
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.show()

In [None]:
# Merge the points and configs DataFrames
data = pd.merge(configs, points, on='model')

# Save the data to a CSV file
data.to_csv(utils.join_paths(analysis_dir, 'loss_function.csv'), index=False)

data

In [None]:
loss_components = {
    'frequency_weight',
    'mse_weight',
    'norm_weight',
    'pearson_weight',
    'spectral_convergence_weight',
    'spectral_magnitude_weight',
    # 'image_dimension',
}

natural_setting = data[(data['setting'] == '101_natural_lighting') & (data['image_dimension'] == 128)]

for component in loss_components:
    xxx = natural_setting.groupby(component)['mae'].mean().reset_index()
    
    # Add the standard deviation of the models
    xxx['std'] = natural_setting.groupby(component)['mae'].std().values
    
    # Add the count of the models
    xxx['count'] = natural_setting.groupby(component)['mae'].count().values

    enabled = xxx.loc[0]['mae']
    disabled = xxx.loc[1]['mae']

    print("################################################################")
    print(component, enabled - disabled)
    print(xxx)
    print("\n")