In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

# Load the data
train_data = pd.read_csv('../Files/extracted_features/mfcc/mfcc_features_train.csv')
test_data = pd.read_csv('../Files/extracted_features/mfcc/mfcc_features_test.csv')

train_features = train_data.iloc[:, :-1].values  # All columns except the last
train_labels = train_data.iloc[:, -1].values     # The last column

test_features = test_data.iloc[:, :-1].values   # All columns except the last
test_labels = test_data.iloc[:, -1].values      # The last column

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[np.isin(test_labels, train_labels)]
test_features_filtered = test_features[np.isin(test_labels, train_labels)]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# CNN model (TensorFlow)
train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, num_features, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, num_features, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(train_features_cnn.shape[1], 1)),
    Conv1D(128, kernel_size=3, activation='relu'),
    Conv1D(256, kernel_size=3, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# Confusion Matrix
conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
conf_matrix_df = pd.DataFrame(conf_matrix, index=label_encoder.classes_, columns=label_encoder.classes_)

# Save confusion matrix as CSV
conf_matrix_csv_path = "../Files/confusion_matrix/baseline/age.csv"
conf_matrix_df.to_csv(conf_matrix_csv_path)
print(f"Confusion matrix saved to {conf_matrix_csv_path}")


ValueError: Found array with 0 sample(s) (shape=(0, 25)) while a minimum of 1 is required by StandardScaler.