#Social Signal Classification with Hybrid CNN-RNN Models

This notebook implements hybrid architectures combining CNNs for feature extraction with RNNs for temporal modeling:
1. **CNN-LSTM Model** - Captures long-term temporal dependencies
2. **CNN-GRU Model** - More efficient alternative to LSTM
3. **CNN-Bidirectional LSTM** - Processes sequences in both directions
4. **Attention-based CNN-LSTM** - Focuses on important time steps

## Why Hybrid Models Work Better for Time-Series
- **CNN layers**: Extract local patterns and features from accelerometer data
- **RNN layers**: Model temporal dependencies and sequence patterns
- **Together**: Combine spatial feature extraction with temporal modeling

In [None]:
import os
import glob
import json
import zipfile
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    f1_score,
    precision_score,
    recall_score
)

import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (
    Conv1D, MaxPooling1D, GlobalAveragePooling1D, GlobalMaxPooling1D,
    LSTM, GRU, Bidirectional, TimeDistributed,
    Dense, Dropout, BatchNormalization, Flatten, Input, Concatenate,
    Attention, MultiHeadAttention, LayerNormalization
)
from tensorflow.keras.callbacks import (
    EarlyStopping,
    ReduceLROnPlateau,
    ModelCheckpoint
)


print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

In [None]:
#configuration
notebook_dir = os.getcwd()
your_dataset_path = os.path.join(notebook_dir, "data", "social_signal")

window_size = 50
step_size = 25
epochs = 50
batch_size = 32

STATIC_THRESHOLD = 0.05

np.random.seed(42)
tf.random.set_seed(42)

MODEL_TYPE = 'cnn_gru'

## Utility Functions

In [None]:
def calculate_acceleration_variance(data):
    accel_magnitude = np.sqrt(data[:, 0]**2 + data[:, 1]**2 + data[:, 2]**2)
    variance = np.var(accel_magnitude)
    is_static = variance < STATIC_THRESHOLD
    return variance, is_static

def filter_static_windows(windows, labels):
    static_indices = []
    for i, window in enumerate(windows):
        _, is_static = calculate_acceleration_variance(window)
        if is_static:
            static_indices.append(i)


    if len(static_indices) == 0:
        return np.array([]), np.array([]), np.array([], dtype=int)

    static_indices = np.array(static_indices, dtype=int)
    static_windows = windows[static_indices]
    static_labels = labels[static_indices]

    return static_windows, static_labels, static_indices

In [None]:
def augment_window(window, aug_type='scale'):

    if aug_type == 'scale':
        scale = np.random.uniform(0.90, 1.10)
        return window * scale

    elif aug_type == 'noise':
        noise = np.random.normal(0, 0.01, window.shape)
        return window + noise

    elif aug_type == 'shift':
        shift = np.random.randint(-3, 4)
        if shift > 0:
            return np.concatenate([window[shift:], np.repeat(window[-1:], shift, axis=0)], axis=0)
        elif shift < 0:
            return np.concatenate([np.repeat(window[:1], -shift, axis=0), window[:shift]], axis=0)
        return window

    elif aug_type == 'magnitude':
        mag_scale = np.random.uniform(0.93, 1.07)
        magnitude = np.sqrt(np.sum(window**2, axis=1, keepdims=True))
        direction = window / (magnitude + 1e-8)
        new_magnitude = magnitude * mag_scale
        return direction * new_magnitude

    elif aug_type == 'rotation':

        angle = np.random.uniform(-0.1, 0.1)
        cos_a, sin_a = np.cos(angle), np.sin(angle)
        rotation_matrix = np.array([[cos_a, -sin_a, 0],
                                   [sin_a, cos_a, 0],
                                   [0, 0, 1]])
        return window @ rotation_matrix.T

    return window

In [None]:
def load_files_from_folder(folder_path):
    file_paths = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.csv'):
            full_file_path = os.path.join(folder_path, file_name)
            file_paths.append(full_file_path)
    return file_paths

def split_files(file_list, test_size=0.2):
    if len(file_list) == 0:
        return [], []
    if len(file_list) == 1:
        print(f"Only 1 file found")
        return file_list, []
    if len(file_list) == 2:
        return [file_list[0]], [file_list[1]]

    train_files, test_files = train_test_split(file_list, test_size=test_size,
                                                shuffle=True, random_state=42)
    return train_files, test_files

def load_and_apply_sliding_windows(file_paths, window_size, step_size, label):
    windows = []
    labels = []

    for file_path in file_paths:
        try:
            data = pd.read_csv(file_path, usecols=['accelX', 'accelY', 'accelZ'])
        except KeyError:
            try:
                data = pd.read_csv(file_path, usecols=['accel_x', 'accel_y', 'accel_z'])
            except KeyError:
                continue

        data = data.to_numpy()
        num_samples = data.shape[0]

        for i in range(0, num_samples - window_size + 1, step_size):
            window = data[i:i + window_size]
            windows.append(window)
            labels.append(label)

            if label == 0:
                aug_prob = 0.6
                num_augmentations = 2
            elif label == 1:
                aug_prob = 0.9
                num_augmentations = 3
            elif label == 2:
                aug_prob = 0.95
                num_augmentations = 3
            elif label == 3:
                aug_prob = 0.4
                num_augmentations = 1

            if np.random.random() < aug_prob:
                aug_types = ['scale', 'noise', 'shift', 'magnitude', 'rotation']
                for aug_idx in range(num_augmentations):
                    aug_type = aug_types[aug_idx % len(aug_types)]
                    augmented = augment_window(window, aug_type)
                    windows.append(augmented)
                    labels.append(label)

    return np.array(windows), np.array(labels)

def process_activity(activity, label, dataset_path, window_size=50, step_size=25, test_size=0.2):
    folder_path = os.path.join(dataset_path, activity)

    if not os.path.exists(folder_path):
        return np.array([]), np.array([]), np.array([]), np.array([])

    file_list = load_files_from_folder(folder_path)

    if len(file_list) == 0:
        return np.array([]), np.array([]), np.array([]), np.array([])

    train_files, test_files = split_files(file_list, test_size=test_size)

    train_windows, train_labels = load_and_apply_sliding_windows(
        train_files, window_size, step_size, label)

    test_windows, test_labels = load_and_apply_sliding_windows(
        test_files, window_size, step_size, label)

    return train_windows, train_labels, test_windows, test_labels

def combine_data(train_test_data, data_type):
    windows_list = [train_test_data[activity][f'{data_type}_windows']
                    for activity in train_test_data
                    if len(train_test_data[activity][f'{data_type}_windows']) > 0]

    labels_list = [train_test_data[activity][f'{data_type}_labels']
                   for activity in train_test_data
                   if len(train_test_data[activity][f'{data_type}_labels']) > 0]

    if len(windows_list) == 0:
        return np.array([]), np.array([])

    concatenated_windows = np.concatenate(windows_list, axis=0)
    concatenated_labels = np.concatenate(labels_list, axis=0)

    return concatenated_windows, concatenated_labels

## Hybrid Model Architectures

In [None]:
def build_cnn_lstm_model(input_shape, num_classes):

    model = Sequential([

        Conv1D(filters=64, kernel_size=5, activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        LSTM(64, return_sequences=False, dropout=0.2, recurrent_dropout=0.2),

        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),

        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
        metrics=['accuracy']
    )

    return model

In [None]:
def build_cnn_gru_model(input_shape, num_classes):

    model = Sequential([

        Conv1D(filters=64, kernel_size=5, activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        GRU(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2),
        GRU(64, return_sequences=False, dropout=0.2, recurrent_dropout=0.2),

        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),

        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
        metrics=['accuracy']
    )

    return model

In [None]:
def build_cnn_bidirectional_lstm_model(input_shape, num_classes):

    model = Sequential([

        Conv1D(filters=64, kernel_size=5, activation='relu', padding='same', input_shape=input_shape),
        BatchNormalization(),
        Conv1D(filters=64, kernel_size=5, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        Conv1D(filters=128, kernel_size=3, activation='relu', padding='same'),
        BatchNormalization(),
        MaxPooling1D(pool_size=2),
        Dropout(0.2),

        Bidirectional(LSTM(64, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)),
        Bidirectional(LSTM(32, return_sequences=False, dropout=0.2, recurrent_dropout=0.2)),

        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.3),

        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),

        Dense(num_classes, activation='softmax')
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
        metrics=['accuracy']
    )

    return model

In [None]:
def build_cnn_attention_lstm_model(input_shape, num_classes):

    inputs = Input(shape=input_shape)

    x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Conv1D(filters=64, kernel_size=5, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(0.2)(x)

    x = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(x)
    x = BatchNormalization()(x)
    x = MaxPooling1D(pool_size=2)(x)
    x = Dropout(0.2)(x)

    lstm_out = LSTM(128, return_sequences=True, dropout=0.2, recurrent_dropout=0.2)(x)

    attention = MultiHeadAttention(num_heads=4, key_dim=32)(lstm_out, lstm_out)
    attention = LayerNormalization()(attention + lstm_out)  # Skip connection

    avg_pool = GlobalAveragePooling1D()(attention)
    max_pool = GlobalMaxPooling1D()(attention)
    x = Concatenate()([avg_pool, max_pool])

    x = Dense(128, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    x = Dense(64, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)

    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
        metrics=['accuracy']
    )

    return model

In [None]:
def build_model(model_type, input_shape, num_classes):

    if model_type == 'cnn_lstm':
        model = build_cnn_lstm_model(input_shape, num_classes)
    elif model_type == 'cnn_gru':
        model = build_cnn_gru_model(input_shape, num_classes)
    elif model_type == 'cnn_bilstm':
        model = build_cnn_bidirectional_lstm_model(input_shape, num_classes)
    elif model_type == 'cnn_attention_lstm':
        model = build_cnn_attention_lstm_model(input_shape, num_classes)
    else:
        raise ValueError(f"Unknown model type: {model_type}")

    model.summary()
    return model

In [None]:
def evaluate_per_class_accuracy(y_true, y_pred, activity_names):
    cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3])


    print("PER-CLASS ACCURACY")


    total_pass = 0
    class_accuracies = []

    for i, activity in enumerate(activity_names):
        if cm[i, :].sum() > 0:
            acc = cm[i, i] / cm[i, :].sum()
            class_accuracies.append(acc)
            status = "good" if acc >= 0.75 else "not good"
            if acc >= 0.75:
                total_pass += 1
            print(f"{activity:25s}: {acc:.4f} ({acc*100:.2f}%) {status}")
        else:
            class_accuracies.append(0.0)
            print(f"{activity:25s}: No samples in test set")


    print(f"{'Mean Class Accuracy':25s}: {np.mean(class_accuracies):.4f} ({np.mean(class_accuracies)*100:.2f}%)")
    print(f"Classes passing: {total_pass}/{len(activity_names)}")


    return class_accuracies

## Data Loading and Preprocessing

In [None]:

print(f"Model Type: {MODEL_TYPE}")


# Defining activities
activities = {
    'breathingNormally': 0,
    'coughing': 1,
    'hyperventilation': 2,
    'other': 3
}

activity_names = list(activities.keys())

print(f"\nFound {len(activities)} social signal activities:")
for activity, label in activities.items():
    print(f"  [{label}] {activity}")

In [None]:

print(f"Window size: {window_size}, Step size: {step_size}")

train_test_data = {}

for activity, label in activities.items():
    print(f"\n  Processing {activity}...")
    train_test_data[activity] = {}

    (train_test_data[activity]['train_windows'],
     train_test_data[activity]['train_labels'],
     train_test_data[activity]['test_windows'],
     train_test_data[activity]['test_labels']) = process_activity(
        activity, label, your_dataset_path,
        window_size=window_size,
        step_size=step_size,
        test_size=0.2
    )

    train_count = len(train_test_data[activity]['train_windows'])
    test_count = len(train_test_data[activity]['test_windows'])
    total_count = train_count + test_count

    print(f"    Total windows: {total_count} (Train: {train_count}, Test: {test_count})")

In [None]:
#combining and filtering data

X_train_all, y_train_all = combine_data(train_test_data, 'train')
X_test_all, y_test_all = combine_data(train_test_data, 'test')

print(f"Total windows - Train: {len(X_train_all)}, Test: {len(X_test_all)}")
#filtering for static activities
X_train, y_train, train_static_indices = filter_static_windows(X_train_all, y_train_all)
X_test, y_test, test_static_indices = filter_static_windows(X_test_all, y_test_all)

print(f"Static windows - Train: {len(X_train)}, Test: {len(X_test)}")

print("\nClass distribution after filtering:")
for i, activity in enumerate(activity_names):
    train_count = np.sum(y_train == i)
    test_count = np.sum(y_test == i)
    print(f"  {activity:20s}: Train={train_count:6d}, Test={test_count:5d}")

#normalizing the data
scaler = StandardScaler()
X_train_reshaped = X_train.reshape(-1, X_train.shape[-1])
X_test_reshaped = X_test.reshape(-1, X_test.shape[-1])

X_train_normalized = scaler.fit_transform(X_train_reshaped)
X_test_normalized = scaler.transform(X_test_reshaped)

X_train = X_train_normalized.reshape(X_train.shape)
X_test = X_test_normalized.reshape(X_test.shape)


In [None]:
#one hot encoding labels

encoder = OneHotEncoder(sparse_output=False)
y_train_one_hot = encoder.fit_transform(y_train.reshape(-1, 1))
y_test_one_hot = encoder.transform(y_test.reshape(-1, 1))

print(f"Training samples per class:")
for i, activity in enumerate(activity_names):
    count = np.sum(y_train == i)
    print(f"  {activity}: {count}")

## Model Building and Training

In [None]:


# Building model
input_shape = (window_size, 3)
num_classes = len(activities)

model = build_model(MODEL_TYPE, input_shape, num_classes)

y_train_classes = np.argmax(y_train_one_hot, axis=1)
unique, counts = np.unique(y_train_classes, return_counts=True)
total_samples = len(y_train_classes)

class_weight_dict = {}
for i in range(len(unique)):
    base_weight = total_samples / (len(unique) * counts[i])

    if i == 0:
        class_weight_dict[i] = base_weight * 2.2
    elif i == 1:
        class_weight_dict[i] = base_weight * 3.8
    elif i == 2:
        class_weight_dict[i] = base_weight * 4.5
    elif i == 3:
        class_weight_dict[i] = base_weight * 1.3

print(f"\nOptimized class weights: {class_weight_dict}")

early_stop = EarlyStopping(
    monitor='val_accuracy',
    patience=15,
    restore_best_weights=True,
    verbose=1,
    mode='max'
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=0.00001,
    verbose=1
)

checkpoint = ModelCheckpoint(
    f'best_{MODEL_TYPE}_model.keras',
    monitor='val_accuracy',
    save_best_only=True,
    mode='max',
    verbose=1
)

#training the model
#check epochs and batch size
print(f"epochs: {epochs}, Batch size: {batch_size}")

history = model.fit(
    X_train, y_train_one_hot,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_test, y_test_one_hot),
    class_weight=class_weight_dict,
    callbacks=[early_stop, reduce_lr, checkpoint],
    verbose=1
)



## Model Evaluation

In [None]:
#evaluating the model

y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test_one_hot, axis=1)

overall_acc = accuracy_score(y_true, y_pred)

print(f"OVERALL ACCURACY: {overall_acc:.4f} ({overall_acc*100:.2f}%)")


class_accuracies = evaluate_per_class_accuracy(y_true, y_pred, activity_names)


print("CLASSIFICATION REPORT")

print(classification_report(y_true, y_pred, target_names=activity_names, digits=4))

In [None]:


report = classification_report(y_true, y_pred,
                              target_names=activity_names,
                              digits=4,
                              output_dict=True)

classes = activity_names
precision_scores = [report[activity]['precision'] for activity in activity_names]
recall_scores = [report[activity]['recall'] for activity in activity_names]
f1_scores = [report[activity]['f1-score'] for activity in activity_names]

x = np.arange(len(classes))
width = 0.25

fig, ax = plt.subplots(figsize=(12, 6))

bars1 = ax.bar(x - width, precision_scores, width, label='Precision', color='#5B9BD5')
bars2 = ax.bar(x, recall_scores, width, label='Recall', color='#FF9F40')
bars3 = ax.bar(x + width, f1_scores, width, label='F1-Score', color='#70AD47')

for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.2f}',
                ha='center', va='bottom', fontsize=10, fontweight='bold')

ax.set_xlabel('Activity Class', fontsize=12, fontweight='bold')
ax.set_ylabel('Score', fontsize=12, fontweight='bold')
ax.set_title('Per-Class Performance Metrics', fontsize=14, fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(classes, rotation=0, ha='center')
ax.legend(loc='upper right', fontsize=11)
ax.set_ylim(0, 1.0)
ax.grid(axis='y', alpha=0.3, linestyle='--')

ax.axhline(y=0.75, color='red', linestyle='--', linewidth=1.5, alpha=0.7, label='75% Threshold')

plt.tight_layout()
plt.savefig(f'per_class_metrics_{MODEL_TYPE}.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nPer-class metrics bar graph saved as: per_class_metrics_{MODEL_TYPE}.png")

In [None]:
cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=activity_names, yticklabels=activity_names)
plt.title(f'Confusion Matrix - {MODEL_TYPE.upper()}', fontsize=14, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'confusion_matrix_{MODEL_TYPE}.png', dpi=300, bbox_inches='tight')
plt.show()

cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='Blues',
            xticklabels=activity_names, yticklabels=activity_names)
plt.title(f'Normalized Confusion Matrix - {MODEL_TYPE.upper()}', fontsize=14, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'confusion_matrix_normalized_{MODEL_TYPE}.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))

#accuracy plot
ax1.plot(history.history['accuracy'], label='Train Accuracy')
ax1.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax1.set_title(f'Model Accuracy - {MODEL_TYPE.upper()}', fontweight='bold')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Accuracy')
ax1.legend()
ax1.grid(True, alpha=0.3)

#loss plot
ax2.plot(history.history['loss'], label='Train Loss')
ax2.plot(history.history['val_loss'], label='Validation Loss')
ax2.set_title(f'Model Loss - {MODEL_TYPE.upper()}', fontweight='bold')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'training_history_{MODEL_TYPE}.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
model.save(f'final_{MODEL_TYPE}_model.keras')
print(f"\nModel saved as: final_{MODEL_TYPE}_model.keras")

scaler_params = {
    'mean': scaler.mean_.tolist(),
    'scale': scaler.scale_.tolist()
}
with open(f'scaler_params_{MODEL_TYPE}.json', 'w') as f:
    json.dump(scaler_params, f)
print(f"Scaler parameters saved as: scaler_params_{MODEL_TYPE}.json")

activity_mapping = {v: k for k, v in activities.items()}
with open(f'activity_mapping_{MODEL_TYPE}.json', 'w') as f:
    json.dump(activity_mapping, f)
print(f"Activity mapping saved as: activity_mapping_{MODEL_TYPE}.json")

## Final Summary

In [None]:

print(f"Final Results Summary - {MODEL_TYPE.upper()}")

print(f"Overall Accuracy: {overall_acc*100:.2f}%")
print(f"Mean Class Accuracy: {np.mean(class_accuracies)*100:.2f}%")
print(f"Total Test Samples: {len(X_test)}")
print(f"\nPer-Class Performance:")
for i, activity in enumerate(activity_names):
    mask = y_true == i
    if np.sum(mask) > 0:
        class_acc = class_accuracies[i]
        status = "good" if class_acc >= 0.75 else "not good"
        print(f"  {status} {activity:20s}: {class_acc*100:.2f}%")

classes_passing = sum(1 for acc in class_accuracies if acc >= 0.75)
print(f"\nClasses achieving >75% accuracy: {classes_passing}/{len(activity_names)}")

In [None]:
#converting to tflite format

model_path = f'best_{MODEL_TYPE}_model.keras'
print(f"\nLoading trained model: {model_path}")

if not os.path.exists(model_path):
    raise FileNotFoundError(f"Trained model not found: {model_path}")

model = tf.keras.models.load_model(model_path)

converter = tf.lite.TFLiteConverter.from_keras_model(model)

converter.target_spec.supported_ops = [
    tf.lite.OpsSet.TFLITE_BUILTINS,
    tf.lite.OpsSet.SELECT_TF_OPS
]
converter._experimental_lower_tensor_list_ops = False

tflite_model = converter.convert()

tflite_filename = f'{MODEL_TYPE}_model.tflite'
with open(tflite_filename, 'wb') as f:
    f.write(tflite_model)

print(f"TFLite model saved: {tflite_filename}")
print(f"  Size: {os.path.getsize(tflite_filename) / 1024:.2f} KB")

#saving the metadata

metadata = {
    'model_type': MODEL_TYPE,
    'window_size': window_size,
    'step_size': step_size,
    'activity_mapping': activities,
    'scaler_mean': scaler.mean_.tolist(),
    'scaler_scale': scaler.scale_.tolist(),
    'training_date': pd.Timestamp.now().isoformat(),
    'includes_tf_ops': True,
    'model_architecture': 'hybrid_cnn_rnn'
}

metadata_filename = f'{MODEL_TYPE}_metadata.json'
with open(metadata_filename, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"Metadata saved: {metadata_filename}")


try:
    interpreter = tf.lite.Interpreter(model_path=tflite_filename)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()


    print(f"  Input shape:  {input_details[0]['shape']}")
    print(f"  Output shape: {output_details[0]['shape']}")

    # Test with a sample input
    test_input = np.random.randn(1, window_size, 3).astype(np.float32)
    interpreter.set_tensor(input_details[0]['index'], test_input)
    interpreter.invoke()
    test_output = interpreter.get_tensor(output_details[0]['index'])

    print(f"  Test prediction shape: {test_output.shape}")
    print(f"  Test prediction: {test_output[0]}")

except RuntimeError as e:
    print(f"Error")

print(f"\nGenerated files:")
print(f"  1.  {tflite_filename}")
print(f"  2.  {metadata_filename}")

In [None]:
#test model on new dataset

possible_zip_locations = [
    os.path.join(notebook_dir, "data", "RESpeckData_2526_fixed.zip"),
    os.path.join(notebook_dir, "RESpeckData_2526_fixed.zip"),
]

zip_path = None
for location in possible_zip_locations:
    if os.path.exists(location):
        zip_path = location
        break

if zip_path is None:
    raise FileNotFoundError("error")

extract_path = os.path.join(notebook_dir, "temp_test_data")

if os.path.exists(extract_path):
    shutil.rmtree(extract_path)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"✓ Dataset extracted to: {extract_path}")

data_path = os.path.join(extract_path, "RESpeckData_2526_fixed", "social_signal")
print(f"  Looking for data in: {data_path}")

keras_files = glob.glob('*.keras')
if keras_files:
    model_path = keras_files[0]
    print(f"  Found model: {model_path}")
else:
    model_path = f'best_{MODEL_TYPE}_model.keras'
    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model not found: {model_path}")

model = tf.keras.models.load_model(model_path)
print(f"Model loaded")
print(f"  Input shape:  {model.input_shape}")
print(f"  Output shape: {model.output_shape}")


test_window_size = window_size
test_step_size = step_size
test_scaler_mean = scaler.mean_
test_scaler_scale = scaler.scale_

test_activities = [str(act) for act in activities]

print(f"Configuration loaded:")
print(f"  Window size: {test_window_size}")
print(f"  Step size: {test_step_size}")
print(f"  Training activities: {test_activities}")

folder_to_training_label = {
    'breathing': 'breathingNormally',
    'Breathing': 'breathingNormally',
    'coughing': 'coughing',
    'Coughing': 'coughing',
    'hyperventilate': 'hyperventilation',
    'Hyperventilate': 'hyperventilation',
    'other': 'other',
    'Other': 'other'
}

if os.path.exists(data_path):
    available_folders = [f for f in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, f))]
    print(f"  Available folders: {available_folders}")
else:
    raise FileNotFoundError(f"error in: {data_path}")

folder_to_label = {
    folder: label
    for folder, label in folder_to_training_label.items()
    if folder in available_folders
}

if not folder_to_label:
    print(f"   Available: {available_folders}")
    print(f"   Expected: {list(folder_to_training_label.keys())}")
    raise ValueError("No matching folders in test dataset")

for folder, label in folder_to_label.items():
    print(f"  {folder:20s} → {label}")

test_labels = list(set(folder_to_label.values()))
print(f"\nSummary:")
print(f"   Test dataset has {len(test_labels)} classes: {test_labels}")
print(f"   Model trained on {len(test_activities)} classes: {test_activities}")
missing_classes = set(test_activities) - set(test_labels)
if missing_classes:
    print(f"Missing from test: {missing_classes}")


def load_test_data(data_path, folder_to_label, window_size, step_size):
    all_windows = []
    all_labels = []

    for folder_name, training_label in folder_to_label.items():
        folder_path = os.path.join(data_path, folder_name)

        if not os.path.exists(folder_path):
            print(f"Folder '{folder_name}' not found")
            continue

        csv_files = glob.glob(os.path.join(folder_path, '*.csv'))
        print(f"  Loading {folder_name:20s} → {training_label:20s}: {len(csv_files):3d} files", end='')

        windows_loaded = 0
        for csv_file in csv_files:
            try:

                df = pd.read_csv(csv_file)


                if not all(col in df.columns for col in ['accelX', 'accelY', 'accelZ']):
                    continue


                accel_data = df[['accelX', 'accelY', 'accelZ']].values


                num_windows = (len(accel_data) - window_size) // step_size + 1

                for i in range(num_windows):
                    start_idx = i * step_size
                    end_idx = start_idx + window_size

                    if end_idx <= len(accel_data):
                        window = accel_data[start_idx:end_idx]
                        all_windows.append(window)
                        all_labels.append(training_label)
                        windows_loaded += 1
            except Exception as e:
                continue

        print(f" → {windows_loaded} windows")

    return np.array(all_windows), np.array(all_labels)

X_test, y_test = load_test_data(data_path, folder_to_label, test_window_size, test_step_size)

print(f"\nTest data loaded:")
print(f"  Total windows: {len(X_test)}")

if len(X_test) == 0:
    raise ValueError("No test data loaded.")

unique_labels, counts = np.unique(y_test, return_counts=True)
print(f"\n  Class distribution (training labels):")
for label, count in zip(unique_labels, counts):
    print(f"    {label:20s}: {count:5d} windows")

#normalising data

X_test_reshaped = X_test.reshape(-1, 3)

X_test_normalized = (X_test_reshaped - test_scaler_mean) / (test_scaler_scale + 1e-8)
X_test_normalized = X_test_normalized.reshape(-1, test_window_size, 3)

#making predictions
predictions_proba = model.predict(X_test_normalized, batch_size=128, verbose=0)
predictions = np.argmax(predictions_proba, axis=1)


label_to_idx = {str(label): idx for idx, label in enumerate(test_activities)}

print(f"  Training label to index mapping:")
for label, idx in label_to_idx.items():
    print(f"    '{label}' → {idx}")

y_test_indices = []
unmapped_count = 0

for i, label in enumerate(y_test):
    label_str = str(label)
    if label_str in label_to_idx:
        y_test_indices.append(label_to_idx[label_str])
    else:
        print(f"erroe")
        unmapped_count += 1

y_test_indices = np.array(y_test_indices)

if unmapped_count > 0:
    print(f" {unmapped_count} samples had unmapped labels")
else:
    print(f"All {len(y_test_indices)} labels mapped")

test_accuracy = accuracy_score(y_test_indices, predictions)
print(f"overall accuracy: {test_accuracy*100:.2f}%")

activity_names = test_activities
class_accuracies = []

print(f"\nper class accuracy:")
for i, activity in enumerate(activity_names):
    mask = y_test_indices == i
    if np.sum(mask) > 0:
        class_acc = accuracy_score(y_test_indices[mask], predictions[mask])
        class_accuracies.append(class_acc)
        status = "PASS" if class_acc >= 0.75 else " FAIL"
        print(f"{status} {activity:20s}: {class_acc*100:6.2f}% ({np.sum(mask):5d} samples)")
    else:
        print(f"  - {activity:20s}: No samples in test set")
        class_accuracies.append(0.0)

mean_class_acc = np.mean([acc for acc in class_accuracies if acc > 0])
classes_with_samples = sum(1 for acc in class_accuracies if acc > 0)
classes_passing = sum(1 for acc in class_accuracies if acc >= 0.75)

print(f"\n{'Mean Class Accuracy':23s}: {mean_class_acc*100:6.2f}%")
print(f"Classes tested: {classes_with_samples}/{len(activity_names)}")
print(f"Classes ≥75%: {classes_passing}/{classes_with_samples}")

print(f"\nclassification report:")
print(classification_report(y_test_indices, predictions,
                           target_names=activity_names,
                           digits=4,
                           zero_division=0))

#confusion matrix calculations
cm = confusion_matrix(y_test_indices, predictions)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=activity_names, yticklabels=activity_names)
plt.title(f'Confusion Matrix - {MODEL_TYPE.upper()}', fontsize=14, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'test_confusion_matrix_{MODEL_TYPE}.png', dpi=300, bbox_inches='tight')
plt.show()
print(f"Saved: test_confusion_matrix_{MODEL_TYPE}.png")

cm_normalized = cm.astype('float') / (cm.sum(axis=1)[:, np.newaxis] + 1e-8)

plt.figure(figsize=(10, 8))
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='RdYlGn',
            xticklabels=activity_names, yticklabels=activity_names,
            vmin=0, vmax=1)
plt.title(f'Normalized Confusion Matrix - {MODEL_TYPE.upper()}', fontsize=14, fontweight='bold')
plt.ylabel('True Label', fontsize=12)
plt.xlabel('Predicted Label', fontsize=12)
plt.tight_layout()
plt.savefig(f'test_confusion_matrix_normalized_{MODEL_TYPE}.png', dpi=300, bbox_inches='tight')
plt.show()
print(f"✓ Saved: test_confusion_matrix_normalized_{MODEL_TYPE}.png")


results = {
    'model_type': MODEL_TYPE,
    'model_path': model_path,
    'test_dataset': zip_path,
    'label_mapping': folder_to_label,
    'overall_accuracy': float(test_accuracy),
    'mean_class_accuracy': float(mean_class_acc),
    'classes_tested': int(classes_with_samples),
    'classes_passing': int(classes_passing),
    'per_class_accuracy': {
        activity: float(class_accuracies[i])
        for i, activity in enumerate(activity_names)
        if class_accuracies[i] > 0
    },
    'test_samples': int(len(X_test))
}

with open(f'test_results_{MODEL_TYPE}_new_dataset.json', 'w') as f:
    json.dump(results, f, indent=2)

print(f"Results saved to: test_results_{MODEL_TYPE}_new_dataset.json")

#printing the summary:
print("summary")

print(f"Model:                {MODEL_TYPE.upper()}")
print(f"Overall Accuracy:     {test_accuracy*100:.2f}%")
print(f"Mean Class Accuracy:  {mean_class_acc*100:.2f}%")
print(f"Total Test Samples:   {len(X_test)}")
print(f"Classes Tested:       {classes_with_samples}/{len(activity_names)}")
print(f"Classes ≥75%:         {classes_passing}/{classes_with_samples}")

print(f"\nLabel Mapping:")
for test_label, train_label in folder_to_label.items():
    print(f"  {test_label:20s} → {train_label}")

print(f"\nPer-Class Results:")

for i, activity in enumerate(activity_names):
    if class_accuracies[i] > 0:
        status = "good" if class_accuracies[i] >= 0.75 else "not good"
        print(f"{status} {activity:20s}: {class_accuracies[i]*100:6.2f}%")
    else:
        print(f"  - {activity:20s}: Not in test set")

if mean_class_acc >= 0.75:
    print("\ntarget met")
else:
    print(f"\n Mean accuracy: {mean_class_acc*100:.1f}%")
    print(f"   Classes passing: {classes_passing}/{classes_with_samples}")

In [None]:
#k fold cross validation
activities = {
    'breathingNormally': 0,
    'coughing': 1,
    'hyperventilation': 2,
    'other': 3
}
activity_names = list(activities.keys())

subject_data = {}
subject_labels = {}

for activity_name, label in activities.items():
    activity_folder = os.path.join(your_dataset_path, activity_name)

    if not os.path.exists(activity_folder):
        print(f"Warning: Folder {activity_folder} not found")
        continue

    files = load_files_from_folder(activity_folder)
    print(f"{activity_name}: Found {len(files)} subject files")

    for file_idx, file_path in enumerate(files):
        subject_id = f"{activity_name}_S{file_idx}"

        df = pd.read_csv(file_path)
        accel_data = df[['accelX', 'accelY']].values


        windows = []
        for i in range(0, len(accel_data) - window_size + 1, step_size):
            window = accel_data[i:i + window_size]
            if len(window) == window_size:
                windows.append(window)

        if len(windows) > 0:
            if subject_id not in subject_data:
                subject_data[subject_id] = []
                subject_labels[subject_id] = []

            subject_data[subject_id].extend(windows)
            subject_labels[subject_id].extend([label] * len(windows))


for subject_id in subject_data.keys():
    subject_data[subject_id] = np.array(subject_data[subject_id])
    subject_labels[subject_id] = np.array(subject_labels[subject_id])

subject_ids = np.array(list(subject_data.keys()))
n_subjects = len(subject_ids)

print(f"\nTotal subjects: {n_subjects}")

N_FOLDS = 5
kf = KFold(n_splits=N_FOLDS, shuffle=True, random_state=42)

fold_results = []
all_predictions = []
all_true_labels = []
class_accuracies_per_fold = []

for fold_idx, (train_idx, test_idx) in enumerate(kf.split(subject_ids)):
    print(f"FOLD {fold_idx + 1}/{N_FOLDS}")

    train_subjects = subject_ids[train_idx]
    test_subjects = subject_ids[test_idx]

    print(f"Train subjects: {len(train_subjects)}, Test subjects: {len(test_subjects)}")

    X_train_fold = []
    y_train_fold = []
    for subj in train_subjects:
        X_train_fold.append(subject_data[subj])
        y_train_fold.append(subject_labels[subj])

    X_train_fold = np.vstack(X_train_fold)
    y_train_fold = np.concatenate(y_train_fold)

    X_test_fold = []
    y_test_fold = []
    for subj in test_subjects:
        X_test_fold.append(subject_data[subj])
        y_test_fold.append(subject_labels[subj])

    X_test_fold = np.vstack(X_test_fold)
    y_test_fold = np.concatenate(y_test_fold)

    print(f"Train samples: {len(X_train_fold)}, Test samples: {len(X_test_fold)}")

    print("\nClass distribution:")
    for i, activity in enumerate(activity_names):
        train_count = np.sum(y_train_fold == i)
        test_count = np.sum(y_test_fold == i)
        print(f"  {activity:20s}: Train={train_count:5d}, Test={test_count:4d}")

    scaler = StandardScaler()
    X_train_reshaped = X_train_fold.reshape(-1, X_train_fold.shape[-1])
    X_test_reshaped = X_test_fold.reshape(-1, X_test_fold.shape[-1])

    X_train_normalized = scaler.fit_transform(X_train_reshaped).reshape(X_train_fold.shape)
    X_test_normalized = scaler.transform(X_test_reshaped).reshape(X_test_fold.shape)

    y_train_one_hot = tf.keras.utils.to_categorical(y_train_fold, num_classes=len(activities))
    y_test_one_hot = tf.keras.utils.to_categorical(y_test_fold, num_classes=len(activities))

    input_shape = (window_size, 2)
    model = build_model(MODEL_TYPE, input_shape, len(activities))

    class_weights = compute_class_weight('balanced',
                                         classes=np.unique(y_train_fold),
                                         y=y_train_fold)
    class_weight_dict = {i: weight for i, weight in enumerate(class_weights)}

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True, verbose=0)

    #training the model
    history = model.fit(
        X_train_normalized, y_train_one_hot,
        validation_split=0.2,
        epochs=30,
        batch_size=batch_size,
        class_weight=class_weight_dict,
        callbacks=[early_stop],
        verbose=0
    )

    print(f"Training completed in {len(history.history['loss'])} epochs")

    y_pred_probs = model.predict(X_test_normalized, verbose=0)
    y_pred = np.argmax(y_pred_probs, axis=1)
    y_true = y_test_fold

    fold_acc = accuracy_score(y_true, y_pred)
    fold_f1 = f1_score(y_true, y_pred, average='weighted')
    fold_precision = precision_score(y_true, y_pred, average='weighted')
    fold_recall = recall_score(y_true, y_pred, average='weighted')

    cm = confusion_matrix(y_true, y_pred, labels=[0, 1, 2, 3])
    class_accs = []
    for i in range(len(activities)):
        if cm[i, :].sum() > 0:
            class_acc = cm[i, i] / cm[i, :].sum()
            class_accs.append(class_acc)
        else:
            class_accs.append(0.0)

    class_accuracies_per_fold.append(class_accs)

    print(f"\nFold {fold_idx + 1} Results:")
    print(f"  Overall Accuracy: {fold_acc:.4f} ({fold_acc*100:.2f}%)")
    print(f"  Precision:        {fold_precision:.4f}")
    print(f"  Recall:           {fold_recall:.4f}")
    print(f"  F1-Score:         {fold_f1:.4f}")
    print(f"\n  Per-class Accuracy:")
    for i, activity in enumerate(activity_names):
        status = "good" if class_accs[i] >= 0.75 else "not good"
        print(f"    {status} {activity:20s}: {class_accs[i]:.4f} ({class_accs[i]*100:.2f}%)")

    fold_results.append({
        'fold': fold_idx + 1,
        'n_train_subjects': len(train_subjects),
        'n_test_subjects': len(test_subjects),
        'accuracy': fold_acc,
        'precision': fold_precision,
        'recall': fold_recall,
        'f1_score': fold_f1,
        'class_accuracies': class_accs
    })

    all_predictions.extend(y_pred)
    all_true_labels.extend(y_true)



print("CV summary")


overall_cv_accuracy = accuracy_score(all_true_labels, all_predictions)
overall_cv_f1 = f1_score(all_true_labels, all_predictions, average='weighted')
overall_cv_precision = precision_score(all_true_labels, all_predictions, average='weighted')
overall_cv_recall = recall_score(all_true_labels, all_predictions, average='weighted')

print(f"\nOverall Cross-Validation Metrics (Aggregated across all folds):")
print(f"  Accuracy:  {overall_cv_accuracy:.4f} ({overall_cv_accuracy*100:.2f}%)")
print(f"  Precision: {overall_cv_precision:.4f} ({overall_cv_precision*100:.2f}%)")
print(f"  Recall:    {overall_cv_recall:.4f} ({overall_cv_recall*100:.2f}%)")
print(f"  F1-Score:  {overall_cv_f1:.4f} ({overall_cv_f1*100:.2f}%)")

fold_accuracies = [r['accuracy'] for r in fold_results]
fold_f1s = [r['f1_score'] for r in fold_results]

print(f"\nPer-Fold Statistics:")
print(f"  Accuracy: {np.mean(fold_accuracies):.4f} ± {np.std(fold_accuracies):.4f}")
print(f"  F1-Score: {np.mean(fold_f1s):.4f} ± {np.std(fold_f1s):.4f}")

class_accuracies_array = np.array(class_accuracies_per_fold)
mean_class_accs = np.mean(class_accuracies_array, axis=0)
std_class_accs = np.std(class_accuracies_array, axis=0)


print("per class CV accuracy")


print(f"\n{'Activity':<22} {'Mean Accuracy':<20} {'Status'}")

for i, activity in enumerate(activity_names):
    status = "good" if mean_class_accs[i] >= 0.75 else "not good"
    print(f"{activity:<22} {mean_class_accs[i]:.4f} ± {std_class_accs[i]:.4f} "
          f"({mean_class_accs[i]*100:.2f}%)    {status}")


print("Confusion Matrix (combined):")


cm_overall = confusion_matrix(all_true_labels, all_predictions, labels=[0, 1, 2, 3])

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))

sns.heatmap(cm_overall, annot=True, fmt='d', cmap='Blues', ax=ax1,
            xticklabels=activity_names, yticklabels=activity_names,
            cbar_kws={'label': 'Count'})
ax1.set_title(f'Confusion Matrix - Raw Counts\n{MODEL_TYPE.upper()}', fontsize=12, pad=15)
ax1.set_ylabel('True Label')
ax1.set_xlabel('Predicted Label')

cm_normalized = cm_overall.astype('float') / cm_overall.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_normalized, annot=True, fmt='.2%', cmap='Greens', ax=ax2,
            xticklabels=activity_names, yticklabels=activity_names,
            cbar_kws={'label': 'Percentage'})
ax2.set_title(f'Confusion Matrix - Normalized\n{MODEL_TYPE.upper()}', fontsize=12, pad=15)
ax2.set_ylabel('True Label')
ax2.set_xlabel('Predicted Label')

plt.tight_layout()
plt.show()

#Classification report

print("classification report")

print(classification_report(all_true_labels, all_predictions,
                          target_names=activity_names, digits=4))


cv_results_summary = {
    'model_type': MODEL_TYPE,
    'n_folds': N_FOLDS,
    'n_subjects': n_subjects,
    'overall_metrics': {
        'accuracy': float(overall_cv_accuracy),
        'precision': float(overall_cv_precision),
        'recall': float(overall_cv_recall),
        'f1_score': float(overall_cv_f1)
    },
    'fold_statistics': {
        'mean_accuracy': float(np.mean(fold_accuracies)),
        'std_accuracy': float(np.std(fold_accuracies)),
        'mean_f1': float(np.mean(fold_f1s)),
        'std_f1': float(np.std(fold_f1s))
    },
    'per_class_accuracy': {
        activity_names[i]: {
            'mean': float(mean_class_accs[i]),
            'std': float(std_class_accs[i]),
            'pass_threshold': bool(mean_class_accs[i] >= 0.75)
        }
        for i in range(len(activity_names))
    },
    'fold_details': fold_results
}

cv_results_file = f'cv_results_{MODEL_TYPE}_{N_FOLDS}fold.json'
with open(cv_results_file, 'w') as f:
    json.dump(cv_results_summary, f, indent=2)

print(f"\nCross-validation results saved to: {cv_results_file}")



print("Summary table for Report ")


print("\nTable: Cross-Validation Performance Metrics")

print(f"{'Metric':<30} {'Value':<20} {'Status'}")

print(f"{'Overall Accuracy':<30} {overall_cv_accuracy:.4f} ({overall_cv_accuracy*100:.2f}%)")
print(f"{'Overall F1-Score':<30} {overall_cv_f1:.4f}")
print(f"{'Number of Folds':<30} {N_FOLDS}")
print(f"{'Total Subjects':<30} {n_subjects}")

print("\nTable: Per-Class Accuracy (Cross-Validation)")

print(f"{'Activity Class':<30} {'Accuracy':<20} {'Status'}")

for i, activity in enumerate(activity_names):
    status = "good" if mean_class_accs[i] >= 0.75 else "not good"
    print(f"{activity:<30} {mean_class_accs[i]:.4f} ({mean_class_accs[i]*100:.2f}%)    {status}")