#Global Features

#Multilabel

In [None]:
import wfdb
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
import shap
from collections import defaultdict

# Update with your folder path
folder_path = '/Users/roupenminassian/physionet.org/files/pulse-transit-time-ppg/1.1.0/Data/'

# Initialize data structures
subject_data = defaultdict(lambda: defaultdict(list))

# Sampling frequency
fs = 500

# Initialize aggregation variables
total_weighted_precision = 0
total_weighted_recall = 0
total_weighted_f1 = 0
total_support = 0
total_accuracy = 0
total_subjects = 0

# Load and organize data by subject and activity
for filename in os.listdir(folder_path):
    if filename.endswith(".dat"):
        subject, activity = filename.split('_')[0], filename.split('_')[1].split('.')[0]
        record_name = os.path.join(folder_path, filename.split('.')[0])
        record = wfdb.rdrecord(record_name)
        subject_data[subject][activity] = record

# Prepare data for training/testing
train_features, test_features = [], []
train_labels, test_labels = [], []

# Initialize a dictionary to hold models for each subject
subject_models = {}

# Process data for each subject
for subject in subject_data.keys():
    train_features, test_features = [], []
    train_labels, test_labels = [], []

    print(f"Processing subject: {subject}")

    for activity in subject_data[subject].keys():
        record = subject_data[subject][activity]

        # Extract ECG and PPG signals
        ecg_index = record.sig_name.index('ecg')
        ecg_signal = record.p_signal[:, ecg_index]
        pleth_2_index = record.sig_name.index('pleth_2')
        pleth_2_signal = record.p_signal[:, pleth_2_index]

        # Perform 80/20 split
        train_size = int(0.8 * len(ecg_signal))
        ecg_train, ecg_test = ecg_signal[:train_size], ecg_signal[train_size:]
        pleth_2_train, pleth_2_test = pleth_2_signal[:train_size], pleth_2_signal[train_size:]

        # Extract global features and prefix the feature names
        ecg_train_features_raw = global_features(ecg_train, fs, is_ecg=True)
        pleth_2_train_features_raw = global_features(pleth_2_train, fs, is_ecg=False)
        ecg_test_features_raw = global_features(ecg_test, fs, is_ecg=True)
        pleth_2_test_features_raw = global_features(pleth_2_test, fs, is_ecg=False)

        ecg_train_features = {"ECG_" + k: v for k, v in ecg_train_features_raw.items()}
        pleth_2_train_features = {"PPG_" + k: v for k, v in pleth_2_train_features_raw.items()}
        ecg_test_features = {"ECG_" + k: v for k, v in ecg_test_features_raw.items()}
        pleth_2_test_features = {"PPG_" + k: v for k, v in pleth_2_test_features_raw.items()}

        # Combine features and append to training/testing sets
        train_features.append({**ecg_train_features, **pleth_2_train_features})
        test_features.append({**ecg_test_features, **pleth_2_test_features})

        train_labels.append(activity)
        test_labels.append(activity)

    # Convert features and labels to NumPy arrays
    X_train = np.array([list(f.values()) for f in train_features])
    y_train = np.array(train_labels)
    X_test = np.array([list(f.values()) for f in test_features])
    y_test = np.array(test_labels)

    # Normalize the data
    scaler = StandardScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)

    # Train the classifier
    clf = RandomForestClassifier(n_estimators=1000, random_state=42)
    clf.fit(X_train_normalized, y_train)

    # Store the model for this subject
    subject_models[subject] = clf

    # Predict on the test set
    y_pred = clf.predict(X_test_normalized)

    # Calculate and print the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy for {subject}: {accuracy * 100:.2f}%")

    # Aggregate metrics
    metrics = precision_recall_fscore_support(y_test, y_pred, average=None)
    precision_values, recall_values, f1_values, support_values = metrics

    total_support_subject = sum(support_values)
    weighted_precision = sum(p * s for p, s in zip(precision_values, support_values)) / total_support_subject
    weighted_recall = sum(r * s for r, s in zip(recall_values, support_values)) / total_support_subject
    weighted_f1 = sum(f * s for f, s in zip(f1_values, support_values)) / total_support_subject

    total_weighted_precision += weighted_precision * total_support_subject
    total_weighted_recall += weighted_recall * total_support_subject
    total_weighted_f1 += weighted_f1 * total_support_subject
    total_support += total_support_subject
    total_accuracy += accuracy
    total_subjects += 1

    # Calculate additional metrics (Recall, F1, Precision)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Visualize Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(conf_matrix, annot=True, cmap="YlGnBu", xticklabels=['sit', 'walk', 'run'], yticklabels=['sit', 'walk', 'run'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    plt.close()

    # Calculate and plot SHAP values
    explainer = shap.TreeExplainer(clf)
    shap_values = explainer.shap_values(X_test_normalized)

    # Get the feature names (assuming all features are keys in one of the feature dictionaries)
    feature_names = list(train_features[0].keys())

    # Plot SHAP values for each class
    for i, label in enumerate(clf.classes_):
        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Summary for class {label}")

        # Display SHAP summary plot
        shap.summary_plot(shap_values[i], X_test, feature_names=feature_names, show=False)

        # Create Explanation object for bar plot
        expected_value = explainer.expected_value[i]
        explanation = shap.Explanation(values=shap_values[i],
                                      base_values=expected_value,
                                      data=X_test,
                                      feature_names=feature_names)

        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Bar for class {label}")

        # Display SHAP bar plot
        shap.plots.bar(explanation, max_display=20, show=False)

# Calculate final aggregated metrics
final_precision = total_weighted_precision / total_support
final_recall = total_weighted_recall / total_support
final_f1 = total_weighted_f1 / total_support
final_accuracy = total_accuracy / total_subjects

final_metrics = {
    "Precision": final_precision,
    "Recall": final_recall,
    "F1-Score": final_f1,
    "Support": total_support,
    "Accuracy": final_accuracy
}

print("Final Aggregated Metrics:", final_metrics)

##Binary

In [None]:
import wfdb
import os
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_recall_fscore_support
from sklearn.preprocessing import StandardScaler
import seaborn as sns
import matplotlib.pyplot as plt
import shap
from collections import defaultdict

# Update with your folder path
folder_path = '/Users/roupenminassian/physionet.org/files/pulse-transit-time-ppg/1.1.0/Data/'

# Initialize data structures
subject_data = defaultdict(lambda: defaultdict(list))

# Sampling frequency
fs = 500

# Initialize aggregation variables
total_weighted_precision = 0
total_weighted_recall = 0
total_weighted_f1 = 0
total_support = 0
total_accuracy = 0
total_subjects = 0

# Load and organize data by subject and activity
for filename in os.listdir(folder_path):
    if filename.endswith(".dat"):
        subject, activity = filename.split('_')[0], filename.split('_')[1].split('.')[0]
        record_name = os.path.join(folder_path, filename.split('.')[0])
        record = wfdb.rdrecord(record_name)
        subject_data[subject][activity] = record

# Prepare data for training/testing
train_features, test_features = [], []
train_labels, test_labels = [], []

# Initialize a dictionary to hold models for each subject
subject_models = {}

# Process data for each subject
for subject in subject_data.keys():
    train_features, test_features = [], []
    train_labels, test_labels = [], []

    print(f"Processing subject: {subject}")

    for activity in subject_data[subject].keys():
        record = subject_data[subject][activity]

        # Extract ECG and PPG signals
        ecg_index = record.sig_name.index('ecg')
        ecg_signal = record.p_signal[:, ecg_index]
        pleth_2_index = record.sig_name.index('pleth_2')
        pleth_2_signal = record.p_signal[:, pleth_2_index]

        # Perform 80/20 split
        train_size = int(0.8 * len(ecg_signal))
        ecg_train, ecg_test = ecg_signal[:train_size], ecg_signal[train_size:]
        pleth_2_train, pleth_2_test = pleth_2_signal[:train_size], pleth_2_signal[train_size:]

        # Extract global features and prefix the feature names
        ecg_train_features_raw = global_features(ecg_train, fs, is_ecg=True)
        pleth_2_train_features_raw = global_features(pleth_2_train, fs, is_ecg=False)
        ecg_test_features_raw = global_features(ecg_test, fs, is_ecg=True)
        pleth_2_test_features_raw = global_features(pleth_2_test, fs, is_ecg=False)

        ecg_train_features = {"ECG_" + k: v for k, v in ecg_train_features_raw.items()}
        pleth_2_train_features = {"PPG_" + k: v for k, v in pleth_2_train_features_raw.items()}
        ecg_test_features = {"ECG_" + k: v for k, v in ecg_test_features_raw.items()}
        pleth_2_test_features = {"PPG_" + k: v for k, v in pleth_2_test_features_raw.items()}

       # Convert 'run' and 'walk' labels to 'movement', 'sit' to 'rest'
        converted_activity = activity
        if activity in ['run', 'walk']:
            converted_activity = 'movement'
        elif activity == 'sit':
            converted_activity = 'rest'

        # Combine features and append to training/testing sets
        train_features.append({**ecg_train_features, **pleth_2_train_features})
        test_features.append({**ecg_test_features, **pleth_2_test_features})

        train_labels.append(converted_activity)
        test_labels.append(converted_activity)

    # Convert features and labels to NumPy arrays
    X_train = np.array([list(f.values()) for f in train_features])
    y_train = np.array(train_labels)
    X_test = np.array([list(f.values()) for f in test_features])
    y_test = np.array(test_labels)

    # Normalize the data
    scaler = StandardScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)

    # Train the classifier
    clf = RandomForestClassifier(n_estimators=1000, random_state=42)
    clf.fit(X_train_normalized, y_train)

    # Store the model for this subject
    subject_models[subject] = clf

    # Predict on the test set
    y_pred = clf.predict(X_test_normalized)

    # Calculate and print the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy for {subject}: {accuracy * 100:.2f}%")

    # Aggregate metrics
    metrics = precision_recall_fscore_support(y_test, y_pred, average=None)
    precision_values, recall_values, f1_values, support_values = metrics

    total_support_subject = sum(support_values)
    weighted_precision = sum(p * s for p, s in zip(precision_values, support_values)) / total_support_subject
    weighted_recall = sum(r * s for r, s in zip(recall_values, support_values)) / total_support_subject
    weighted_f1 = sum(f * s for f, s in zip(f1_values, support_values)) / total_support_subject

    total_weighted_precision += weighted_precision * total_support_subject
    total_weighted_recall += weighted_recall * total_support_subject
    total_weighted_f1 += weighted_f1 * total_support_subject
    total_support += total_support_subject
    total_accuracy += accuracy
    total_subjects += 1

    # Calculate additional metrics (Recall, F1, Precision)
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Visualize Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(conf_matrix, annot=True, cmap="YlGnBu", xticklabels=['movement', 'rest'], yticklabels=['movement', 'rest'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    plt.close()

    # Calculate and plot SHAP values
    explainer = shap.TreeExplainer(clf)
    shap_values = explainer.shap_values(X_test_normalized)

    # Get the feature names (assuming all features are keys in one of the feature dictionaries)
    feature_names = list(train_features[0].keys())

    # Plot SHAP values for each class
    for i, label in enumerate(clf.classes_):
        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Summary for class {label}")

        # Display SHAP summary plot
        shap.summary_plot(shap_values[i], X_test, feature_names=feature_names, show=False)

        # Create Explanation object for bar plot
        expected_value = explainer.expected_value[i]
        explanation = shap.Explanation(values=shap_values[i],
                                      base_values=expected_value,
                                      data=X_test,
                                      feature_names=feature_names)

        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Bar for class {label}")

        # Display SHAP bar plot
        shap.plots.bar(explanation, max_display=20, show=False)

# Calculate final aggregated metrics
final_precision = total_weighted_precision / total_support
final_recall = total_weighted_recall / total_support
final_f1 = total_weighted_f1 / total_support
final_accuracy = total_accuracy / total_subjects

final_metrics = {
    "Precision": final_precision,
    "Recall": final_recall,
    "F1-Score": final_f1,
    "Support": total_support,
    "Accuracy": final_accuracy
}

print("Final Aggregated Metrics:", final_metrics)

#Local Features

##Multilabel

In [None]:
# Import required modules
import os
import wfdb
import shap
import numpy as np
from collections import defaultdict
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns

# Define folder path
folder_path = '/Users/roupenminassian/physionet.org/files/pulse-transit-time-ppg/1.1.0/Data/'  # Update with your path

fs = 500  # Sampling frequency

# Initialize aggregation variables
total_weighted_precision = 0
total_weighted_recall = 0
total_weighted_f1 = 0
total_support = 0
total_accuracy = 0
total_subjects = 0

# Initialize data structures
subject_data = defaultdict(lambda: defaultdict(list))

# Initialize a dictionary to hold models for each subject
subject_models = {}

# List of keys to extract from both ECG and PPG features
selected_keys_ecg = ['mean', 'median', 'variance', 'std_dev', 'skewness', 'kurtosis', 'num_peaks', 'num_valleys', 'spectral_entropy', 'dom_freq', 'mean_nni','sdnn','sdsd','pnn20','pnn50','rmssd','sd1','sd2','respiratory_rate']
selected_keys_ppg = ['mean', 'median', 'variance', 'std_dev', 'skewness', 'kurtosis', 'num_peaks', 'num_valleys', 'spectral_entropy', 'dom_freq']

# Load and organize data by subject and activity
for filename in os.listdir(folder_path):
    if filename.endswith(".dat"):
        subject, activity = filename.split('_')[0], filename.split('_')[1].split('.')[0]
        record_name = os.path.join(folder_path, filename.split('.')[0])
        record = wfdb.rdrecord(record_name)
        subject_data[subject][activity] = record

# Process data for each subject
for subject in subject_data.keys():

    print(f"Processing subject: {subject}")

    train_features, test_features = [], []
    train_labels, test_labels = [], []

    all_prefixed_features = []


    for activity in subject_data[subject].keys():

        print(f"Processing activity: {activity}")

        record = subject_data[subject][activity]

        # Extract ECG and PPG signals
        ecg_index = record.sig_name.index('ecg')
        ecg_signal = record.p_signal[:, ecg_index]
        pleth_2_index = record.sig_name.index('pleth_2')
        pleth_2_signal = record.p_signal[:, pleth_2_index]

        # Extract local features
        ecg_local_features = local_features(ecg_signal, fs, is_ecg=True)
        pleth_2_local_features = local_features(pleth_2_signal, fs, is_ecg=False)

        # Split local windows 5/1
        num_windows = len(ecg_local_features)
        train_size = int(5/6 * num_windows)
        ecg_train_local_features = ecg_local_features[:train_size]
        pleth_2_train_local_features = pleth_2_local_features[:train_size]
        ecg_test_local_features = ecg_local_features[train_size:]
        pleth_2_test_local_features = pleth_2_local_features[train_size:]

        # Prefix the selected keys for plotting later
        prefixed_keys_ecg = ["ECG_" + key for key in selected_keys_ecg]
        prefixed_keys_ppg = ["PPG_" + key for key in selected_keys_ppg]

        # Combine and flatten features
        train_combined_features = []
        for ecg_feat, ppg_feat in zip(ecg_train_local_features, pleth_2_train_local_features):
            combined = {**ecg_feat, **ppg_feat}
            train_combined_features.append(list(combined.values()))

        test_combined_features = []
        for ecg_feat, ppg_feat in zip(ecg_test_local_features, pleth_2_test_local_features):
            combined = {**ecg_feat, **ppg_feat}
            test_combined_features.append(list(combined.values()))

        # Aggregate the features across the sets
        train_aggregated_features = np.mean(train_combined_features, axis=0)
        test_aggregated_features = np.mean(test_combined_features, axis=0)

        # Append the aggregated features as a single sample
        train_features.append(train_aggregated_features)
        test_features.append(test_aggregated_features)

        # Append the label only once per activity
        train_labels.append(activity)
        test_labels.append(activity)

    # Convert to NumPy arrays
    X_train = np.array(train_features)
    y_train = np.array(train_labels)
    X_test = np.array(test_features)
    y_test = np.array(test_labels)

    # Normalize the data
    scaler = StandardScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)

    # Train the classifier
    clf = RandomForestClassifier(n_estimators=1000, random_state=42)
    clf.fit(X_train_normalized, y_train)

    # Store the model for this subject
    subject_models[subject] = clf

    # Predict on the test set
    y_pred = clf.predict(X_test_normalized)

    # Calculate and print the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy for {subject}: {accuracy * 100:.2f}%")

    # Aggregate metrics
    metrics = precision_recall_fscore_support(y_test, y_pred, average=None)
    precision_values, recall_values, f1_values, support_values = metrics

    total_support_subject = sum(support_values)
    weighted_precision = sum(p * s for p, s in zip(precision_values, support_values)) / total_support_subject
    weighted_recall = sum(r * s for r, s in zip(recall_values, support_values)) / total_support_subject
    weighted_f1 = sum(f * s for f, s in zip(f1_values, support_values)) / total_support_subject

    total_weighted_precision += weighted_precision * total_support_subject
    total_weighted_recall += weighted_recall * total_support_subject
    total_weighted_f1 += weighted_f1 * total_support_subject
    total_support += total_support_subject
    total_accuracy += accuracy
    total_subjects += 1

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(conf_matrix, annot=True, cmap="YlGnBu", xticklabels=['sit', 'walk', 'run'], yticklabels=['sit', 'walk', 'run'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    plt.close()

    # SHAP Value Calculation and Plotting
    explainer = shap.TreeExplainer(clf)
    shap_values = explainer.shap_values(X_test_normalized)

    # For ECG features
    for idx in range(len(ecg_train_local_features)):
        suffix = "_" + str(idx + 1)
        for key in prefixed_keys_ecg:
            all_prefixed_features.append(key + suffix)

    # For PPG features
    for idx in range(len(pleth_2_train_local_features)):
        suffix = "_" + str(idx + 1)
        for key in prefixed_keys_ppg:
            all_prefixed_features.append(key + suffix)

    # Plot SHAP values for each class
    for i, label in enumerate(clf.classes_):
        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Summary for class {label}")

        # Display SHAP summary plot
        shap.summary_plot(shap_values[i], X_test, feature_names=all_prefixed_features, show=False)

        # Create Explanation object for bar plot
        expected_value = explainer.expected_value[i]
        explanation = shap.Explanation(values=shap_values[i],
                                      base_values=expected_value,
                                      data=X_test,
                                      feature_names=all_prefixed_features)

        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Bar for class {label}")

        # Display SHAP bar plot
        shap.plots.bar(explanation, max_display=20, show=False)

# Calculate final aggregated metrics
final_precision = total_weighted_precision / total_support
final_recall = total_weighted_recall / total_support
final_f1 = total_weighted_f1 / total_support
final_accuracy = total_accuracy / total_subjects

final_metrics = {
    "Precision": final_precision,
    "Recall": final_recall,
    "F1-Score": final_f1,
    "Support": total_support,
    "Accuracy": final_accuracy
}

print("Final Aggregated Metrics:", final_metrics)

##Binary

In [None]:
# Import required modules
import os
import wfdb
import shap
import numpy as np
from collections import defaultdict
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_fscore_support
import matplotlib.pyplot as plt
import seaborn as sns

# Define folder path
folder_path = '/Users/roupenminassian/physionet.org/files/pulse-transit-time-ppg/1.1.0/Data/'  # Update with your path

fs = 500  # Sampling frequency

# Initialize aggregation variables
total_weighted_precision = 0
total_weighted_recall = 0
total_weighted_f1 = 0
total_support = 0
total_accuracy = 0
total_subjects = 0

# Initialize data structures
subject_data = defaultdict(lambda: defaultdict(list))

# Initialize a dictionary to hold models for each subject
subject_models = {}

# List of keys to extract from both ECG and PPG features
selected_keys_ecg = ['mean', 'median', 'variance', 'std_dev', 'skewness', 'kurtosis', 'num_peaks', 'num_valleys', 'spectral_entropy', 'dom_freq', 'mean_nni','sdnn','sdsd','pnn20','pnn50','rmssd','sd1','sd2','respiratory_rate']
selected_keys_ppg = ['mean', 'median', 'variance', 'std_dev', 'skewness', 'kurtosis', 'num_peaks', 'num_valleys', 'spectral_entropy', 'dom_freq']

# Load and organize data by subject and activity
for filename in os.listdir(folder_path):
    if filename.endswith(".dat"):
        subject, activity = filename.split('_')[0], filename.split('_')[1].split('.')[0]
        record_name = os.path.join(folder_path, filename.split('.')[0])
        record = wfdb.rdrecord(record_name)
        subject_data[subject][activity] = record

# Process data for each subject
for subject in subject_data.keys():

    print(f"Processing subject: {subject}")

    train_features, test_features = [], []
    train_labels, test_labels = [], []

    all_prefixed_features = []

    for activity in subject_data[subject].keys():

        print(f"Processing activity: {activity}")

        # Convert to binary classes ('movement' and 'rest')
        binary_activity = 'movement' if activity in ['run', 'walk'] else 'rest'

        record = subject_data[subject][activity]

        # Extract ECG and PPG signals
        ecg_index = record.sig_name.index('ecg')
        ecg_signal = record.p_signal[:, ecg_index]
        pleth_2_index = record.sig_name.index('pleth_2')
        pleth_2_signal = record.p_signal[:, pleth_2_index]

        # Extract local features
        ecg_local_features = local_features(ecg_signal, fs, is_ecg=True)
        pleth_2_local_features = local_features(pleth_2_signal, fs, is_ecg=False)

        # Split local windows 5/1
        num_windows = len(ecg_local_features)
        train_size = int(5/6 * num_windows)
        ecg_train_local_features = ecg_local_features[:train_size]
        pleth_2_train_local_features = pleth_2_local_features[:train_size]
        ecg_test_local_features = ecg_local_features[train_size:]
        pleth_2_test_local_features = pleth_2_local_features[train_size:]

        # Prefix the selected keys for plotting later
        prefixed_keys_ecg = ["ECG_" + key for key in selected_keys_ecg]
        prefixed_keys_ppg = ["PPG_" + key for key in selected_keys_ppg]

        # Combine and flatten features
        train_combined_features = []
        for ecg_feat, ppg_feat in zip(ecg_train_local_features, pleth_2_train_local_features):
            combined = {**ecg_feat, **ppg_feat}
            train_combined_features.append(list(combined.values()))

        test_combined_features = []
        for ecg_feat, ppg_feat in zip(ecg_test_local_features, pleth_2_test_local_features):
            combined = {**ecg_feat, **ppg_feat}
            test_combined_features.append(list(combined.values()))

        # Aggregate the features across the sets
        train_aggregated_features = np.mean(train_combined_features, axis=0)
        test_aggregated_features = np.mean(test_combined_features, axis=0)

        # Append the aggregated features as a single sample
        train_features.append(train_aggregated_features)
        test_features.append(test_aggregated_features)

        # Append the label only once per activity
        train_labels.append(binary_activity)
        test_labels.append(binary_activity)

    # Convert to NumPy arrays
    X_train = np.array(train_features)
    y_train = np.array(train_labels)
    X_test = np.array(test_features)
    y_test = np.array(test_labels)

    # Normalize the data
    scaler = StandardScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)

    # Train the classifier
    clf = RandomForestClassifier(n_estimators=1000, random_state=42)
    clf.fit(X_train_normalized, y_train)

    # Store the model for this subject
    subject_models[subject] = clf

    # Predict on the test set
    y_pred = clf.predict(X_test_normalized)

    # Calculate and print the accuracy
    accuracy = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy for {subject}: {accuracy * 100:.2f}%")

    # Aggregate metrics
    metrics = precision_recall_fscore_support(y_test, y_pred, average=None)
    precision_values, recall_values, f1_values, support_values = metrics

    total_support_subject = sum(support_values)
    weighted_precision = sum(p * s for p, s in zip(precision_values, support_values)) / total_support_subject
    weighted_recall = sum(r * s for r, s in zip(recall_values, support_values)) / total_support_subject
    weighted_f1 = sum(f * s for f, s in zip(f1_values, support_values)) / total_support_subject

    total_weighted_precision += weighted_precision * total_support_subject
    total_weighted_recall += weighted_recall * total_support_subject
    total_weighted_f1 += weighted_f1 * total_support_subject
    total_support += total_support_subject
    total_accuracy += accuracy
    total_subjects += 1

    # Classification Report
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

    # Confusion Matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(10, 7))
    sns.heatmap(conf_matrix, annot=True, cmap="YlGnBu", xticklabels=['movement', 'rest'], yticklabels=['movement', 'rest'])
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()
    plt.close()

    # SHAP Value Calculation and Plotting
    explainer = shap.TreeExplainer(clf)
    shap_values = explainer.shap_values(X_test_normalized)

    # For ECG features
    for idx in range(len(ecg_train_local_features)):
            suffix = "_" + str(idx + 1)
            for key in selected_keys_ecg:
                all_prefixed_features.append(key + suffix)

    # For PPG features
    for idx in range(len(pleth_2_train_local_features)):
        suffix = "_" + str(idx + 1)
        for key in selected_keys_ppg:
            all_prefixed_features.append(key + suffix)

    # Plot SHAP values for each class
    for i, label in enumerate(clf.classes_):
        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Summary for class {label}")

        # Display SHAP summary plot
        shap.summary_plot(shap_values[i], X_test, feature_names=all_prefixed_features, show=False)

        # Create Explanation object for bar plot
        expected_value = explainer.expected_value[i]
        explanation = shap.Explanation(values=shap_values[i],
                                      base_values=expected_value,
                                      data=X_test,
                                      feature_names=all_prefixed_features)

        plt.figure(figsize=(16, 10))
        plt.title(f"SHAP Bar for class {label}")

        # Display SHAP bar plot
        shap.plots.bar(explanation, max_display=20, show=False)

# Calculate final aggregated metrics
final_precision = total_weighted_precision / total_support
final_recall = total_weighted_recall / total_support
final_f1 = total_weighted_f1 / total_support
final_accuracy = total_accuracy / total_subjects

final_metrics = {
    "Precision": final_precision,
    "Recall": final_recall,
    "F1-Score": final_f1,
    "Support": total_support,
    "Accuracy": final_accuracy
}

print("Final Aggregated Metrics:", final_metrics)