In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense, Dropout, BatchNormalization, Add
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.regularizers import l2
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
from scipy.interpolate import CubicSpline

# === Time-warping augmentation function ===
def time_warp(X, sigma=0.2):
    orig_steps = np.arange(X.shape[1])  # time steps, e.g. 0..9
    X_warped = np.zeros_like(X)
    for i in range(X.shape[0]):
        random_warp = np.random.normal(loc=1.0, scale=sigma, size=(X.shape[1],))
        warped_steps = np.cumsum(random_warp)
        # Normalize warped steps back to original scale (0 to seq_length-1)
        warped_steps = (warped_steps - warped_steps.min()) / (warped_steps.max() - warped_steps.min()) * (X.shape[1] - 1)

        cs = CubicSpline(orig_steps, X[i])
        X_warped[i] = cs(warped_steps)
    return X_warped

# === Jittering augmentation function ===
def add_jitter(X, sigma=0.05):
    return X + np.random.normal(loc=0.0, scale=sigma, size=X.shape)

# === Load and preprocess data ===
data = pd.read_csv(r"C:\Users\MSI\Desktop\Mitacs Project\Human Activity Recognition\HAR-WISDM\Data_WISDM\WISDM_cleaned.csv")

X = data.drop('class', axis=1).values
y = data['class'].values

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Reshape to 3D (samples, time_steps, features)
X_reshaped = X_scaled.reshape(-1, 10, 3)
y_reshaped = y_encoded.reshape(-1)

# === Build the CNN-LSTM Model ===
def build_cnn_lstm_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)

    # CNN Block 1
    x = Conv1D(64, kernel_size=5, padding='same', activation='relu', kernel_regularizer=l2(0.01))(inputs)
    x = BatchNormalization()(x)
    x = Dropout(0.25)(x)

    # Residual Connection
    conv1 = Conv1D(64, kernel_size=5, padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    conv1 = BatchNormalization()(conv1)
    x = Add()([x, conv1])
    x = Dropout(0.25)(x)

    # CNN Block 2
    x = Conv1D(64, kernel_size=5, padding='same', activation='relu', kernel_regularizer=l2(0.01))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.3)(x)

    # LSTM Layer
    x = LSTM(64, return_sequences=False, kernel_regularizer=l2(0.01))(x)
    x = Dropout(0.3)(x)

    # Dense Layers
    x = Dense(64, activation='relu', kernel_regularizer=l2(0.01))(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# === Cross-validation with jitter + time-warp augmentation on training data ===
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold = 1
f1_scores = []

for train_idx, val_idx in skf.split(X_reshaped, y_reshaped):
    print(f"\n📚 Training Fold {fold}")
    X_train, X_val = X_reshaped[train_idx], X_reshaped[val_idx]
    y_train, y_val = y_reshaped[train_idx], y_reshaped[val_idx]

    # Balance training data using jittering + time-warping
    X_aug, y_aug = [], []
    counter = Counter(y_train)
    max_count = max(counter.values())

    for cls in np.unique(y_train):
        X_cls = X_train[y_train == cls]
        y_cls = y_train[y_train == cls]

        n_to_add = max_count - len(X_cls)
        reps = n_to_add // len(X_cls) + 1

        # Generate augmentations
        X_jittered = np.concatenate([add_jitter(X_cls) for _ in range(reps)], axis=0)[:n_to_add]
        X_timewarped = np.concatenate([time_warp(X_cls) for _ in range(reps)], axis=0)[:n_to_add]

        # Combine augmented data
        X_augmented = np.concatenate([X_jittered, X_timewarped], axis=0)
        y_augmented = np.full(len(X_augmented), cls)

        # Print number of added samples per class
        print(f"Class {label_encoder.inverse_transform([cls])[0]}: original {len(X_cls)}, augmented {len(X_augmented)}")

        # Append original + augmented
        X_aug.append(np.concatenate([X_cls, X_augmented], axis=0))
        y_aug.append(np.concatenate([y_cls, y_augmented], axis=0))

    X_train_balanced = np.concatenate(X_aug, axis=0)
    y_train_balanced = np.concatenate(y_aug, axis=0)

    # Shuffle augmented training set
    indices = np.arange(len(X_train_balanced))
    np.random.shuffle(indices)
    X_train_balanced = X_train_balanced[indices]
    y_train_balanced = y_train_balanced[indices]

    # Compute class weights on balanced training set
    class_weights = compute_class_weight('balanced', classes=np.unique(y_train_balanced), y=y_train_balanced)
    class_weight_dict = dict(enumerate(class_weights))

    # Build and train model
    model = build_cnn_lstm_model(input_shape=(10, 3), num_classes=len(np.unique(y_reshaped)))

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

    history = model.fit(
        X_train_balanced, y_train_balanced,
        epochs=100,
        batch_size=32,
        class_weight=class_weight_dict,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

    # Evaluate
    y_pred = model.predict(X_val)
    y_pred_classes = np.argmax(y_pred, axis=1)

    print(f"\n🧾 Fold {fold} Classification Report:")
    print(classification_report(y_val, y_pred_classes, target_names=label_encoder.classes_))

    report = classification_report(y_val, y_pred_classes, output_dict=True)
    f1_scores.append(report['macro avg']['f1-score'])
    fold += 1

# === Final F1-score ===
print(f"\n✅ Average Macro F1-Score across {skf.n_splits} folds: {np.mean(f1_scores):.4f} ± {np.std(f1_scores):.4f}")



📚 Training Fold 1
Class Downstairs: original 422, augmented 2484
Class Jogging: original 1300, augmented 728
Class Sitting: original 245, augmented 2838
Class Standing: original 197, augmented 2934
Class Upstairs: original 506, augmented 2316
Class Walking: original 1664, augmented 0
Epoch 1/100
[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 9ms/step - accuracy: 0.4369 - loss: 2.7477 - val_accuracy: 0.4548 - val_loss: 1.3981 - learning_rate: 0.0010
Epoch 2/100
[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5593 - loss: 1.2594 - val_accuracy: 0.5581 - val_loss: 1.2867 - learning_rate: 0.0010
Epoch 3/100
[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5696 - loss: 1.1752 - val_accuracy: 0.5065 - val_loss: 1.3391 - learning_rate: 0.0010
Epoch 4/100
[1m489/489[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 8ms/step - accuracy: 0.5955 - loss: 1.1384 - val_accuracy: 0.5747 - val_los

KeyboardInterrupt: 