# CNN 2D:

In [None]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout, Input

**Pre-Processing Functions:**

In [None]:
#Pre-processing functions:
 
def preprocess_spectrogram(df):#if the entire column is NaNs, we drop it, otherwise we replace the NaNs with the mean of the column
    #first we drop the columns with only NaNs
    df = df.dropna(axis=1, how='all')
    # Convert to numeric array
    matrix = df.to_numpy(dtype=float)            
    freqs = df.columns.to_numpy(dtype=float) 
    
    # Replace any NaNs with mean of the column without the Nans
    if np.isnan(matrix).any():
      col_means = np.nanmean(matrix, axis=0)
      inds = np.where(np.isnan(matrix))
      matrix[np.where(np.isnan(matrix))] = np.take(np.nanmean(matrix, axis=0), inds[1])

    return matrix, freqs

# Fonctions modifiees:
# L'ancienne did padding for 1D spectograms
# This function pads all 2D spectrograms to the same shape (max_time, max_freq)
def pad_2d_arrays(spec_list, pad_value=0.0):
    # Find the maximum shape in (time, frequency)
    max_time = max(s.shape[0] for s in spec_list)
    max_freq = max(s.shape[1] for s in spec_list)
    
    padded_specs = []
    for s in spec_list:
        # Calculate padding widths for time (axis 0) and frequency (axis 1)
        pad_width = ((0, max_time - s.shape[0]), (0, max_freq - s.shape[1]))
        padded = np.pad(s, pad_width, mode='constant', constant_values=pad_value)
        padded_specs.append(padded)
        
    return np.array(padded_specs)


# We now load all spectrograms (matrix) into a single list X_all
# and all labels into `y_all`.

base_path='/Users/beno/Desktop/TU DELFT/ML for EE/WICOS' #A changer pour toi
people = ['Person A', 'Person B', 'Person C', 'Person D', 'Person E', 'Person F']  
gestures = ['click', 'pinch', 'swipe', 'wave'] 
X_all = []
y_all = []

print("Loading and preprocessing spectrograms...")
for person in people:
    for gesture in gestures:
        gesture_path = os.path.join(base_path, person, gesture)
        if os.path.exists(gesture_path):
            files = [f for f in os.listdir(gesture_path) if f.endswith('.csv')]
            for f in files:
                df = pd.read_csv(os.path.join(gesture_path, f), dtype=np.float32)
                
                # We get the 2D spectrogram matrix here [cite: 11, 12]
                matrix, freqs = preprocess_spectrogram(df)
                
                # Instead of extracting 1D features, we append the 2D matrix
                X_all.append(matrix)
                y_all.append(gesture)
        else:
            print(f"Warning: Data directory not found for {person}/{gesture}")

print(f"Loaded {len(X_all)} total samples.")

**Data Preparation for 2D CNN:**

In [None]:
# Pad all 2D spectrograms to be the same size
X_padded = pad_2d_arrays(X_all)

# Convert labels to one-hot encoding (similar to your 1D CNN code )
label_encoder = LabelEncoder()
y_int = label_encoder.fit_transform(y_all)
onehot_encoder = OneHotEncoder(sparse_output=False)
y_oh = onehot_encoder.fit_transform(y_int.reshape(-1, 1))

# Add a "channels" dimension for the 2D CNN (from 3D to 4D)
# Shape becomes (num_samples, max_time, max_freq, 1)
X_cnn_2d = np.expand_dims(X_padded, axis=3)

# Get input shape and number of classes
input_shape = X_cnn_2d.shape[1:]
num_classes = y_oh.shape[1]

# Create Train/Test Split (Verify if prof provided a test set on Github)
X_train, X_test, y_train, y_test = train_test_split(
    X_cnn_2d, y_oh, test_size=0.2, random_state=42, stratify=y_all
)

print(f"X_train shape for 2D CNN: {X_train.shape}")
print(f"y_train shape for 2D CNN: {y_train.shape}")
print(f"Input shape for 2D CNN: {input_shape}")
print(f"Number of classes: {num_classes}")

**Defining the CNN model:**

In [None]:
model_2d = Sequential()

# Input Layer - shape is (time, frequency, 1)
model_2d.add(Input(shape=input_shape))

# Convolutional Block 1
model_2d.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model_2d.add(MaxPooling2D(pool_size=(2, 2)))

# Convolutional Block 2
model_2d.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model_2d.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the 2D feature maps to 1D
model_2d.add(Flatten())

# Dense (Fully Connected) Layers
model_2d.add(Dense(64, activation='relu'))
model_2d.add(Dropout(0.5))
model_2d.add(Dense(num_classes, activation='softmax')) # num_classes is 4

# Compile the model
model_2d.compile(loss='categorical_crossentropy',
                 optimizer='adam',
                 metrics=['accuracy'])

model_2d.summary()

**Model Training:**

In [1]:
# --- MODEL TRAINING ---
print("\nTraining 2D CNN model...")
history = model_2d.fit(X_train, y_train,
                       epochs=20,  # You can adjust the number of epochs
                       batch_size=32,
                       validation_data=(X_test, y_test))

# --- MODEL EVALUATION ---
print("\nEvaluating 2D CNN model...")
y_pred_probs = model_2d.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

accuracy = accuracy_score(y_test_labels, y_pred)
print(f"Test Accuracy: {accuracy * 100:.2f}%")
print("\nClassification Report:")
print(classification_report(y_test_labels, y_pred, target_names=label_encoder.classes_))

Loading and preprocessing spectrograms...
Loaded 960 total samples.
X_train shape for 2D CNN: (768, 799, 448, 1)
y_train shape for 2D CNN: (768, 4)
Input shape for 2D CNN: (799, 448, 1)
Number of classes: 4



Training 2D CNN model...
Epoch 1/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 4s/step - accuracy: 0.3828 - loss: 178.5987 - val_accuracy: 0.4635 - val_loss: 1.1090
Epoch 2/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 5s/step - accuracy: 0.3724 - loss: 1.1644 - val_accuracy: 0.4375 - val_loss: 0.9408
Epoch 3/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 5s/step - accuracy: 0.3737 - loss: 1.1334 - val_accuracy: 0.4740 - val_loss: 1.4232
Epoch 4/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m111s[0m 5s/step - accuracy: 0.3893 - loss: 1.1493 - val_accuracy: 0.4219 - val_loss: 0.9736
Epoch 5/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m113s[0m 5s/step - accuracy: 0.3789 - loss: 1.1138 - val_accuracy: 0.4427 - val_loss: 0.9689
Epoch 6/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m117s[0m 5s/step - accuracy: 0.3633 - loss: 1.1140 - val_accuracy: 0.4479 - val_loss: 0.9680
Epoch 

KeyboardInterrupt: 