In [12]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Directory containing your 6 duration CSVs
durations = ['5s', '5s_overlap', '10s', '10s_overlap', '15s', '15s_overlap']
csv_dir = 'final_dataset/CSVs'

# Model builder for tabular data
def build_model(input_shape, num_classes):
    model = models.Sequential([
        layers.Dense(128, activation='relu', input_shape=(input_shape,)),
        layers.Dropout(0.5),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(num_classes, activation='softmax')
    ])
    return model

# Training parameters
batch_size = 32
epochs = 20
results = {}

# Feature columns to use (modify as needed)
feature_columns = [
    'mean_flow', 'std_flow', 'edge_ratio', 
    'keypoint_count', 'fft_peak1', 'fft_peak2', 'fft_peak3'
]

for group in durations:
    csv_path = os.path.join(csv_dir, f"{group}.csv")
    if not os.path.isfile(csv_path):
        print(f"Skipping {group}: CSV not found at {csv_path}")
        continue

    print(f"\n=== Training for group: {group} ===")
    
    # Load data
    meta_df = pd.read_csv(csv_path)
    
    # Extract features and labels
    X = meta_df[feature_columns].values.astype(np.float32)
    y = meta_df['class'].astype(str).values
    
    # Encode labels
    label_names = sorted(set(y))
    label_to_index = {name: idx for idx, name in enumerate(label_names)}
    y_encoded = np.array([label_to_index[l] for l in y])
    
    # Split data FIRST to prevent leakage
    X_train_val, X_test, y_train_val, y_test = train_test_split(
        X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42
    )
    X_train, X_val, y_train, y_val = train_test_split(
        X_train_val, y_train_val, test_size=0.25, stratify=y_train_val, random_state=42
    )
    
    # Normalize data AFTER splitting
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)  # Fit only on training data
    X_val = scaler.transform(X_val)          # Transform validation
    X_test = scaler.transform(X_test)         # Transform test
    
    # Build TensorFlow datasets
    train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)) \
        .shuffle(buffer_size=len(X_train)) \
        .batch(batch_size)
    
    val_ds = tf.data.Dataset.from_tensor_slices((X_val, y_val)) \
        .batch(batch_size)
    
    test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)) \
        .batch(batch_size)
    
    # Build, compile, and train model
    model = build_model(input_shape=X_train.shape[1], num_classes=len(label_names))
    model.compile(optimizer='adam', 
                  loss='sparse_categorical_crossentropy', 
                  metrics=['accuracy'])
    
    # Add early stopping to prevent overfitting
    early_stop = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=3, restore_best_weights=True
    )
    
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs,
        callbacks=[early_stop]
    )
    
    # Evaluate on test set
    test_loss, test_acc = model.evaluate(test_ds)
    results[group] = {'accuracy': test_acc, 'loss': test_loss}
    print(f"Group {group} — Test Accuracy: {test_acc:.4f}")

# Summary
print("\n=== Summary ===")
for grp, res in results.items():
    print(f"{grp}: Accuracy={res['accuracy']:.4f}, Loss={res['loss']:.4f}")


=== Training for group: 5s ===
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7574 - loss: 0.5577 - val_accuracy: 0.9451 - val_loss: 0.1068
Epoch 2/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9680 - loss: 0.1072 - val_accuracy: 1.0000 - val_loss: 0.0131
Epoch 3/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9933 - loss: 0.0326 - val_accuracy: 1.0000 - val_loss: 0.0026
Epoch 4/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9966 - loss: 0.0158 - val_accuracy: 1.0000 - val_loss: 0.0013
Epoch 5/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9987 - loss: 0.0091 - val_accuracy: 1.0000 - val_loss: 5.3853e-04
Epoch 6/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9991 - loss: 0.0052 - val_accuracy: 1.0000 - val_loss: 4.1434e-04
Epoch 7/20
[1m304/304[0m [32

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.7919 - loss: 0.4371 - val_accuracy: 1.0000 - val_loss: 0.0100
Epoch 2/20
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9936 - loss: 0.0303 - val_accuracy: 1.0000 - val_loss: 0.0013
Epoch 3/20
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9982 - loss: 0.0106 - val_accuracy: 1.0000 - val_loss: 2.3398e-04
Epoch 4/20
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9994 - loss: 0.0039 - val_accuracy: 1.0000 - val_loss: 1.9167e-04
Epoch 5/20
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9997 - loss: 0.0024 - val_accuracy: 1.0000 - val_loss: 1.0370e-04
Epoch 6/20
[1m608/608[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9998 - loss: 0.0019 - val_accuracy: 1.0000 - val_loss: 4.4454e-05
Epoch 7/20
[1m608/608

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7066 - loss: 0.6060 - val_accuracy: 1.0000 - val_loss: 0.0797
Epoch 2/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9790 - loss: 0.0885 - val_accuracy: 1.0000 - val_loss: 0.0061
Epoch 3/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9971 - loss: 0.0212 - val_accuracy: 1.0000 - val_loss: 0.0017
Epoch 4/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9974 - loss: 0.0126 - val_accuracy: 1.0000 - val_loss: 7.5774e-04
Epoch 5/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9995 - loss: 0.0058 - val_accuracy: 1.0000 - val_loss: 3.5268e-04
Epoch 6/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9996 - loss: 0.0038 - val_accuracy: 1.0000 - val_loss: 2.8933e-04
Epoch 7/20
[1m304/304[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8156 - loss: 0.4372 - val_accuracy: 1.0000 - val_loss: 0.0088
Epoch 2/20
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9963 - loss: 0.0233 - val_accuracy: 1.0000 - val_loss: 7.2544e-04
Epoch 3/20
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9986 - loss: 0.0081 - val_accuracy: 1.0000 - val_loss: 3.1612e-04
Epoch 4/20
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9991 - loss: 0.0034 - val_accuracy: 1.0000 - val_loss: 9.9501e-05
Epoch 5/20
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9996 - loss: 0.0027 - val_accuracy: 1.0000 - val_loss: 5.2860e-05
Epoch 6/20
[1m574/574[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9998 - loss: 0.0013 - val_accuracy: 1.0000 - val_loss: 2.8873e-05
Epoch 7/20
[1m574/

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7399 - loss: 0.5706 - val_accuracy: 0.9713 - val_loss: 0.0941
Epoch 2/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9681 - loss: 0.1022 - val_accuracy: 1.0000 - val_loss: 0.0080
Epoch 3/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9974 - loss: 0.0223 - val_accuracy: 1.0000 - val_loss: 0.0018
Epoch 4/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9982 - loss: 0.0100 - val_accuracy: 1.0000 - val_loss: 6.9480e-04
Epoch 5/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9992 - loss: 0.0057 - val_accuracy: 1.0000 - val_loss: 3.0786e-04
Epoch 6/20
[1m304/304[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9994 - loss: 0.0042 - val_accuracy: 1.0000 - val_loss: 2.3164e-04
Epoch 7/20
[1m304/304[0m 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.8113 - loss: 0.4134 - val_accuracy: 1.0000 - val_loss: 0.0092
Epoch 2/20
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9956 - loss: 0.0259 - val_accuracy: 1.0000 - val_loss: 0.0011
Epoch 3/20
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9995 - loss: 0.0064 - val_accuracy: 1.0000 - val_loss: 2.2942e-04
Epoch 4/20
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9994 - loss: 0.0040 - val_accuracy: 1.0000 - val_loss: 1.6740e-04
Epoch 5/20
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9997 - loss: 0.0019 - val_accuracy: 1.0000 - val_loss: 9.9618e-05
Epoch 6/20
[1m557/557[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9998 - loss: 0.0021 - val_accuracy: 1.0000 - val_loss: 7.5186e-05
Epoch 7/20
[1m557/557