In [None]:
# PyTorch CNN Model
import torch
from torch.utils.data import DataLoader, TensorDataset

X_train_tensor = torch.tensor(train_features_cnn, dtype=torch.float32).permute(0, 2, 1)  # Shape: (num_samples, 1, 1024)
y_train_tensor = torch.tensor(train_labels_encoded, dtype=torch.long)
X_test_tensor = torch.tensor(test_features_cnn, dtype=torch.float32).permute(0, 2, 1)    # Shape: (num_samples, 1, 1024)
y_test_tensor = torch.tensor(test_labels_encoded, dtype=torch.long)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

import torch.nn as nn
import torch.optim as optim

class CNN1DModel(nn.Module):
    def __init__(self, input_channels, num_classes):
        super(CNN1DModel, self).__init__()
        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm1d(32)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(32, 64, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, 128, kernel_size=5, stride=1, padding=2)
        self.bn3 = nn.BatchNorm1d(128)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(128 * 1024, 512)  # Updated to 1024 features
        self.fc2 = nn.Linear(512, num_classes)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.flatten(x)
        x = self.dropout(self.relu(self.fc1(x)))
        x = self.fc2(x)
        return x

input_channels = 1
num_classes = len(label_encoder.classes_)
model = CNN1DModel(input_channels, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# base 100h

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

# Load the data
train_data = np.load('../Files/extracted_features/layer_features_base_100/train_0.npz')
test_data = np.load('../Files/extracted_features/layer_features_base_100/test_0.npz')

train_features = train_data['features']  # Adjusted for 768 features
test_features = test_data['features']   # Adjusted for 768 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# CNN model (TensorFlow)
train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 768, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 768, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(768, 1)),  # Updated input shape to 768
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))


In [1]:
# hubert large

In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_0.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_0.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 0, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 0, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 0, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.7455
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.75      0.82        12
           2       0.71      0.62      0.67         8
           3       0.81      1.00      0.90        39
           4       1.00      0.83      0.91         6
           5       0.79      0.88      0.83        42
           6       1.00      0.38      0.55        16
           7       0.67      1.00      0.80         2
           8       1.00      0.50      0.67         2

    accuracy                           0.82       127
   macro avg       0.86      0.75      0.77       127
weighted avg       0.84      0.82      0.80       127



2025-01-04 12:42:10.047676: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-04 12:42:10.059107: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-04 12:42:10.062549: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-04 12:42:10.072745: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 470ms/step - accuracy: 0.2991 - loss: 4.5622 - val_accuracy: 0.2035 - val_loss: 2.1121
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.8010 - loss: 0.6068 - val_accuracy: 0.1221 - val_loss: 2.5511
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.9440 - loss: 0.2193 - val_accuracy: 0.2326 - val_loss: 2.5094
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.9699 - loss: 0.1066 - val_accuracy: 0.2151 - val_loss: 2.9579
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.9840 - loss: 0.0548 - val_accuracy: 0.1802 - val_loss: 5.2599
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.9787 - loss: 0.0821 - val_accuracy: 0.2267 - val_loss: 4.1916
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2406 - loss: 2.8899 - val_accuracy: 0.0581 - val_loss: 2.3758
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4067 - loss: 1.8449 - val_accuracy: 0.0581 - val_loss: 2.0914
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5486 - loss: 1.4320 - val_accuracy: 0.2558 - val_loss: 1.9059
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5539 - loss: 1.3311 - val_accuracy: 0.2209 - val_loss: 2.3667
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6215 - loss: 1.1141 - val_accuracy: 0.1744 - val_loss: 2.3284
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6953 - loss: 0.9271 - val_accuracy: 0.2442 - val_loss: 2.4687
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_0.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_0.png


In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_1.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_1.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 1, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 1, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 1, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.7846
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.86      0.75      0.80         8
           3       0.87      1.00      0.93        39
           4       0.83      0.83      0.83         6
           5       0.82      0.88      0.85        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.85       127
   macro avg       0.80      0.70      0.73       127
weighted avg       0.87      0.85      0.85       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 447ms/step - accuracy: 0.2989 - loss: 3.8327 - val_accuracy: 0.1860 - val_loss: 2.0369
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.7888 - loss: 0.7095 - val_accuracy: 0.1453 - val_loss: 2.7944
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.9215 - loss: 0.2357 - val_accuracy: 0.1279 - val_loss: 4.5243
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.9656 - loss: 0.0916 - val_accuracy: 0.2093 - val_loss: 4.2136
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9848 - loss: 0.0598 - val_accuracy: 0.1977 - val_loss: 4.6558
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 462ms/step - accuracy: 0.9967 - loss: 0.0227 - val_accuracy: 0.2326 - val_loss: 4.6285
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2561 - loss: 3.0725 - val_accuracy: 0.0000e+00 - val_loss: 2.6631
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4357 - loss: 1.8771 - val_accuracy: 0.0581 - val_loss: 2.3074
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5181 - loss: 1.5554 - val_accuracy: 0.1744 - val_loss: 2.1847
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5907 - loss: 1.2534 - val_accuracy: 0.1337 - val_loss: 2.4954
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6553 - loss: 1.0559 - val_accuracy: 0.2384 - val_loss: 2.1372
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6723 - loss: 1.0013 - val_accuracy: 0.2965 - val_loss: 2.3316
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_1.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_1.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_1.png


In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_2.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_2.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 2, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 2, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 2, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8573
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.89      1.00      0.94         8
           3       0.86      0.97      0.92        39
           4       1.00      1.00      1.00         6
           5       0.86      0.90      0.88        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.89       127
   macro avg       0.85      0.76      0.79       127
weighted avg       0.91      0.89      0.89       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 469ms/step - accuracy: 0.4062 - loss: 4.4613 - val_accuracy: 0.3140 - val_loss: 1.8582
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.8568 - loss: 0.3735 - val_accuracy: 0.3372 - val_loss: 2.2956
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.9774 - loss: 0.0744 - val_accuracy: 0.2093 - val_loss: 4.5425
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.9949 - loss: 0.0270 - val_accuracy: 0.2558 - val_loss: 3.5512
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 1.0000 - loss: 0.0058 - val_accuracy: 0.2151 - val_loss: 5.2051
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.9980 - loss: 0.0186 - val_accuracy: 0.2791 - val_loss: 3.0024
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2362 - loss: 3.0058 - val_accuracy: 0.0523 - val_loss: 2.4543
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5109 - loss: 1.6124 - val_accuracy: 0.2151 - val_loss: 2.1285
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5898 - loss: 1.2607 - val_accuracy: 0.1628 - val_loss: 2.5366
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6338 - loss: 1.0868 - val_accuracy: 0.2733 - val_loss: 2.1450
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6893 - loss: 0.9198 - val_accuracy: 0.2849 - val_loss: 2.5612
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7430 - loss: 0.7576 - val_accuracy: 0.2500 - val_loss: 3.3271
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_2.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_2.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_2.png


In [6]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_3.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_3.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 3, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 3, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 3, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.9230
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.89      1.00      0.94         8
           3       0.87      1.00      0.93        39
           4       1.00      1.00      1.00         6
           5       0.88      0.90      0.89        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.91       127
   macro avg       0.95      0.92      0.93       127
weighted avg       0.91      0.91      0.90       127



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 480ms/step - accuracy: 0.3651 - loss: 4.2401 - val_accuracy: 0.2384 - val_loss: 2.0787
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 475ms/step - accuracy: 0.8953 - loss: 0.3120 - val_accuracy: 0.1105 - val_loss: 4.2248
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 473ms/step - accuracy: 0.9813 - loss: 0.0736 - val_accuracy: 0.1512 - val_loss: 4.0564
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.9978 - loss: 0.0179 - val_accuracy: 0.2965 - val_loss: 4.0899
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 479ms/step - accuracy: 0.9944 - loss: 0.0169 - val_accuracy: 0.2733 - val_loss: 3.3692
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 472ms/step - accuracy: 0.9979 - loss: 0.0119 - val_accuracy: 0.2209 - val_loss: 3.9625
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2080 - loss: 2.9726 - val_accuracy: 0.2035 - val_loss: 1.9918
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4659 - loss: 1.6226 - val_accuracy: 0.2326 - val_loss: 1.9643
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6185 - loss: 1.1654 - val_accuracy: 0.3256 - val_loss: 2.1455
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6576 - loss: 0.9818 - val_accuracy: 0.3953 - val_loss: 2.0084
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7635 - loss: 0.7212 - val_accuracy: 0.3547 - val_loss: 2.7975
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7885 - loss: 0.6064 - val_accuracy: 0.3430 - val_loss: 3.0283
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_4.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_4.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 4, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 4, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 4, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.9304
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        12
           2       0.89      1.00      0.94         8
           3       0.87      1.00      0.93        39
           4       0.86      1.00      0.92         6
           5       0.90      0.88      0.89        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.91       127
   macro avg       0.94      0.93      0.93       127
weighted avg       0.91      0.91      0.90       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 476ms/step - accuracy: 0.3499 - loss: 4.3497 - val_accuracy: 0.2907 - val_loss: 1.8809
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.8598 - loss: 0.4391 - val_accuracy: 0.2558 - val_loss: 2.9281
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 474ms/step - accuracy: 0.9850 - loss: 0.0671 - val_accuracy: 0.2035 - val_loss: 4.3845
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 472ms/step - accuracy: 0.9922 - loss: 0.0325 - val_accuracy: 0.1919 - val_loss: 4.9503
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.9956 - loss: 0.0162 - val_accuracy: 0.1802 - val_loss: 4.7397
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 474ms/step - accuracy: 0.9937 - loss: 0.0160 - val_accuracy: 0.2151 - val_loss: 4.5439
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2715 - loss: 2.8221 - val_accuracy: 0.2384 - val_loss: 1.8544
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5099 - loss: 1.4960 - val_accuracy: 0.1919 - val_loss: 2.0139
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6204 - loss: 1.1771 - val_accuracy: 0.1686 - val_loss: 2.1947
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6999 - loss: 0.9677 - val_accuracy: 0.2674 - val_loss: 2.0874
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7422 - loss: 0.6909 - val_accuracy: 0.2674 - val_loss: 2.3887
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7909 - loss: 0.5701 - val_accuracy: 0.2035 - val_loss: 2.9772
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_4.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_4.png


In [8]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_5.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_5.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 5, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 5, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 5, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8371
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.92      0.92      0.92        12
           2       0.78      0.88      0.82         8
           3       0.89      1.00      0.94        39
           4       0.86      1.00      0.92         6
           5       0.88      0.90      0.89        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.88       127
   macro avg       0.81      0.74      0.76       127
weighted avg       0.90      0.88      0.88       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 467ms/step - accuracy: 0.3116 - loss: 4.5462 - val_accuracy: 0.0814 - val_loss: 2.3405
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.8602 - loss: 0.4925 - val_accuracy: 0.1570 - val_loss: 3.2293
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.9709 - loss: 0.0964 - val_accuracy: 0.3256 - val_loss: 3.2417
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.9857 - loss: 0.0635 - val_accuracy: 0.1221 - val_loss: 5.6192
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.9913 - loss: 0.0420 - val_accuracy: 0.2209 - val_loss: 4.5973
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.9924 - loss: 0.0162 - val_accuracy: 0.2267 - val_loss: 5.1752
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3016 - loss: 2.7092 - val_accuracy: 0.1279 - val_loss: 2.0897
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4857 - loss: 1.6880 - val_accuracy: 0.0988 - val_loss: 2.0294
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6012 - loss: 1.3099 - val_accuracy: 0.1279 - val_loss: 2.2324
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6507 - loss: 1.0243 - val_accuracy: 0.2035 - val_loss: 2.4439
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6866 - loss: 0.9015 - val_accuracy: 0.2965 - val_loss: 2.1821
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7618 - loss: 0.7152 - val_accuracy: 0.2965 - val_loss: 2.4500
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_5.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_5.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_5.png


In [9]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_6.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_6.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 6, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 6, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 6, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8419
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.78      0.88      0.82         8
           3       0.87      1.00      0.93        39
           4       0.86      1.00      0.92         6
           5       0.90      0.88      0.89        42
           6       0.90      0.56      0.69        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.88       127
   macro avg       0.81      0.75      0.76       127
weighted avg       0.89      0.88      0.88       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 460ms/step - accuracy: 0.3723 - loss: 3.7190 - val_accuracy: 0.1047 - val_loss: 2.6201
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.8502 - loss: 0.4049 - val_accuracy: 0.2267 - val_loss: 2.4446
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.9722 - loss: 0.0860 - val_accuracy: 0.2907 - val_loss: 2.8707
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.9946 - loss: 0.0236 - val_accuracy: 0.2558 - val_loss: 3.4519
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.9956 - loss: 0.0160 - val_accuracy: 0.1860 - val_loss: 4.8563
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 1.0000 - loss: 0.0063 - val_accuracy: 0.2151 - val_loss: 5.2217
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2739 - loss: 2.7557 - val_accuracy: 0.1047 - val_loss: 2.2275
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4951 - loss: 1.6711 - val_accuracy: 0.0698 - val_loss: 2.4844
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6157 - loss: 1.1541 - val_accuracy: 0.1279 - val_loss: 2.4965
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6816 - loss: 0.9437 - val_accuracy: 0.1744 - val_loss: 2.5043
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7591 - loss: 0.7326 - val_accuracy: 0.2384 - val_loss: 2.6735
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7730 - loss: 0.6858 - val_accuracy: 0.2500 - val_loss: 2.7388
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_6.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_6.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_6.png


In [10]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_7.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_7.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 7, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 7, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 7, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.9200
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.80      1.00      0.89         8
           3       0.87      1.00      0.93        39
           4       0.86      1.00      0.92         6
           5       0.90      0.88      0.89        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.90       127
   macro avg       0.93      0.92      0.91       127
weighted avg       0.91      0.90      0.89       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 474ms/step - accuracy: 0.3882 - loss: 3.5724 - val_accuracy: 0.2326 - val_loss: 2.0940
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.8515 - loss: 0.4541 - val_accuracy: 0.1279 - val_loss: 3.5535
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9718 - loss: 0.0862 - val_accuracy: 0.2151 - val_loss: 3.2077
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 0.9946 - loss: 0.0339 - val_accuracy: 0.1512 - val_loss: 3.5463
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 0.9960 - loss: 0.0221 - val_accuracy: 0.2616 - val_loss: 3.6820
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 1.0000 - loss: 0.0111 - val_accuracy: 0.1686 - val_loss: 4.9793
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.3075 - loss: 2.7543 - val_accuracy: 0.0581 - val_loss: 2.3477
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5236 - loss: 1.5168 - val_accuracy: 0.2035 - val_loss: 1.9827
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5341 - loss: 1.4187 - val_accuracy: 0.1163 - val_loss: 2.4482
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6936 - loss: 0.8956 - val_accuracy: 0.2267 - val_loss: 2.3654
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7118 - loss: 0.7644 - val_accuracy: 0.3023 - val_loss: 2.3332
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7837 - loss: 0.6366 - val_accuracy: 0.3081 - val_loss: 2.3761
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_7.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_7.png


In [11]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_8.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_8.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 8, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 8, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 8, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8441
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.67      1.00      0.80         8
           3       0.87      1.00      0.93        39
           4       0.86      1.00      0.92         6
           5       0.92      0.86      0.89        42
           6       0.90      0.56      0.69        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.87       127
   macro avg       0.80      0.75      0.76       127
weighted avg       0.89      0.87      0.87       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 463ms/step - accuracy: 0.2957 - loss: 4.7224 - val_accuracy: 0.0930 - val_loss: 2.2600
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.7625 - loss: 0.6614 - val_accuracy: 0.3372 - val_loss: 2.2146
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.9547 - loss: 0.1493 - val_accuracy: 0.3198 - val_loss: 2.7326
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 462ms/step - accuracy: 0.9824 - loss: 0.0764 - val_accuracy: 0.1919 - val_loss: 3.5134
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 470ms/step - accuracy: 0.9835 - loss: 0.0487 - val_accuracy: 0.2791 - val_loss: 3.1822
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 472ms/step - accuracy: 0.9878 - loss: 0.0369 - val_accuracy: 0.2849 - val_loss: 3.7340
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2443 - loss: 2.8653 - val_accuracy: 0.0407 - val_loss: 2.5018
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4914 - loss: 1.6297 - val_accuracy: 0.0872 - val_loss: 2.4773
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5726 - loss: 1.2582 - val_accuracy: 0.1570 - val_loss: 2.2388
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6976 - loss: 0.9076 - val_accuracy: 0.1977 - val_loss: 2.2918
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7102 - loss: 0.8340 - val_accuracy: 0.2442 - val_loss: 2.7197
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8010 - loss: 0.5761 - val_accuracy: 0.2384 - val_loss: 2.7290
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_9.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_9.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 9, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 9, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 9, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


SVM Accuracy: 0.8575
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.80      1.00      0.89         8
           3       0.89      1.00      0.94        39
           4       0.86      1.00      0.92         6
           5       0.90      0.88      0.89        42
           6       0.90      0.56      0.69        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.89       127
   macro avg       0.82      0.76      0.77       127
weighted avg       0.90      0.89      0.89       127



2025-01-04 17:46:54.287059: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-04 17:46:54.339736: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-04 17:46:54.355286: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-04 17:46:54.462031: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 468ms/step - accuracy: 0.3322 - loss: 3.3791 - val_accuracy: 0.0756 - val_loss: 2.8092
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.8571 - loss: 0.4218 - val_accuracy: 0.2035 - val_loss: 2.5874
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 0.9637 - loss: 0.1091 - val_accuracy: 0.2093 - val_loss: 4.0627
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9650 - loss: 0.0767 - val_accuracy: 0.2209 - val_loss: 3.7885
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.9971 - loss: 0.0242 - val_accuracy: 0.2267 - val_loss: 3.5587
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.9988 - loss: 0.0117 - val_accuracy: 0.2267 - val_loss: 5.4160
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1715 - loss: 3.3638 - val_accuracy: 0.0000e+00 - val_loss: 2.8223
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3904 - loss: 1.8542 - val_accuracy: 0.0233 - val_loss: 2.3866
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5198 - loss: 1.4586 - val_accuracy: 0.1279 - val_loss: 2.2836
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6092 - loss: 1.1219 - val_accuracy: 0.1105 - val_loss: 2.5378
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6610 - loss: 1.0284 - val_accuracy: 0.1221 - val_loss: 2.8124
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7352 - loss: 0.7817 - val_accuracy: 0.1395 - val_loss: 2.6677
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_9.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_9.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_9.png


In [13]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_10.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_10.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 10, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 10, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 10, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8936
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.64      0.88      0.74         8
           3       0.85      1.00      0.92        39
           4       0.86      1.00      0.92         6
           5       0.92      0.86      0.89        42
           6       0.89      0.50      0.64        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.89      0.89      0.88       127
weighted avg       0.88      0.87      0.87       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 485ms/step - accuracy: 0.3144 - loss: 6.1909 - val_accuracy: 0.0581 - val_loss: 2.2309
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.7104 - loss: 0.8680 - val_accuracy: 0.2500 - val_loss: 2.4152
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 480ms/step - accuracy: 0.9210 - loss: 0.2480 - val_accuracy: 0.2558 - val_loss: 2.7844
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 483ms/step - accuracy: 0.9732 - loss: 0.0996 - val_accuracy: 0.2035 - val_loss: 4.7625
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 478ms/step - accuracy: 0.9788 - loss: 0.0594 - val_accuracy: 0.2151 - val_loss: 4.5998
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 487ms/step - accuracy: 0.9884 - loss: 0.0455 - val_accuracy: 0.2384 - val_loss: 3.6461
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2567 - loss: 2.9560 - val_accuracy: 0.0174 - val_loss: 2.6359
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4560 - loss: 1.8131 - val_accuracy: 0.0640 - val_loss: 2.2994
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5272 - loss: 1.4893 - val_accuracy: 0.0930 - val_loss: 2.4999
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6096 - loss: 1.2266 - val_accuracy: 0.1453 - val_loss: 2.3318
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6680 - loss: 0.9935 - val_accuracy: 0.1686 - val_loss: 2.6035
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7272 - loss: 0.7996 - val_accuracy: 0.2500 - val_loss: 2.4538
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_10.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_10.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_10.png


In [1]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_11.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_11.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 11, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 11, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 11, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8858
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.92      0.92      0.92        12
           2       0.60      0.75      0.67         8
           3       0.87      1.00      0.93        39
           4       1.00      1.00      1.00         6
           5       0.92      0.86      0.89        42
           6       0.82      0.56      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.89      0.89      0.88       127
weighted avg       0.88      0.87      0.87       127



2025-01-04 15:30:07.192642: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-04 15:30:07.247271: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-04 15:30:07.263474: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-04 15:30:07.371547: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 457ms/step - accuracy: 0.3350 - loss: 4.2402 - val_accuracy: 0.0930 - val_loss: 2.5535
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.7625 - loss: 0.7513 - val_accuracy: 0.2209 - val_loss: 2.6002
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.9262 - loss: 0.2229 - val_accuracy: 0.1453 - val_loss: 4.0894
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.9786 - loss: 0.0937 - val_accuracy: 0.1686 - val_loss: 4.4705
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.9757 - loss: 0.0984 - val_accuracy: 0.2500 - val_loss: 3.5094
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.9797 - loss: 0.1051 - val_accuracy: 0.3314 - val_loss: 3.2550
Epoch 7/50
[1m22/22[

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1891 - loss: 2.9981 - val_accuracy: 0.0930 - val_loss: 2.5477
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4246 - loss: 1.9445 - val_accuracy: 0.0872 - val_loss: 2.1351
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4845 - loss: 1.5108 - val_accuracy: 0.0872 - val_loss: 2.2742
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5664 - loss: 1.3635 - val_accuracy: 0.2442 - val_loss: 2.0349
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6302 - loss: 1.0557 - val_accuracy: 0.1919 - val_loss: 2.3793
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6470 - loss: 1.0676 - val_accuracy: 0.2267 - val_loss: 2.4446
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_11.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_11.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_11.png


In [2]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_12.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_12.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 12, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 12, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 12, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8996
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.64      0.88      0.74         8
           3       0.85      1.00      0.92        39
           4       1.00      1.00      1.00         6
           5       0.93      0.90      0.92        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.89       127
   macro avg       0.93      0.90      0.90       127
weighted avg       0.91      0.89      0.88       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 468ms/step - accuracy: 0.2880 - loss: 3.8602 - val_accuracy: 0.0058 - val_loss: 2.5904
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 473ms/step - accuracy: 0.6555 - loss: 1.0630 - val_accuracy: 0.2384 - val_loss: 2.1576
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 462ms/step - accuracy: 0.8357 - loss: 0.5182 - val_accuracy: 0.2267 - val_loss: 3.4863
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.9529 - loss: 0.1709 - val_accuracy: 0.3198 - val_loss: 2.5562
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.9728 - loss: 0.1128 - val_accuracy: 0.2267 - val_loss: 3.4669
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.9827 - loss: 0.0703 - val_accuracy: 0.2965 - val_loss: 3.3364
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2919 - loss: 2.6573 - val_accuracy: 0.0000e+00 - val_loss: 2.5040
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4057 - loss: 1.8576 - val_accuracy: 0.0523 - val_loss: 2.4181
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5086 - loss: 1.6360 - val_accuracy: 0.0233 - val_loss: 2.5528
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5063 - loss: 1.5193 - val_accuracy: 0.1047 - val_loss: 2.2744
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5996 - loss: 1.1617 - val_accuracy: 0.1512 - val_loss: 2.2244
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6152 - loss: 1.0734 - val_accuracy: 0.1163 - val_loss: 2.6890
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_12.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_12.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_12.png


In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_13.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_13.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 13, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 13, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 13, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8856
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.55      0.75      0.63         8
           3       0.83      0.97      0.89        39
           4       1.00      1.00      1.00         6
           5       0.93      0.88      0.90        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.91      0.89      0.89       127
weighted avg       0.89      0.87      0.87       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 464ms/step - accuracy: 0.3124 - loss: 4.0333 - val_accuracy: 0.0116 - val_loss: 2.4827
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.6499 - loss: 1.0771 - val_accuracy: 0.1628 - val_loss: 2.5142
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.8124 - loss: 0.5210 - val_accuracy: 0.1512 - val_loss: 3.2921
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9329 - loss: 0.1800 - val_accuracy: 0.3023 - val_loss: 3.1474
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.9820 - loss: 0.1218 - val_accuracy: 0.2267 - val_loss: 4.4159
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.9891 - loss: 0.0403 - val_accuracy: 0.1570 - val_loss: 5.7928
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2728 - loss: 2.8633 - val_accuracy: 0.0116 - val_loss: 2.7717
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3750 - loss: 2.0249 - val_accuracy: 0.0000e+00 - val_loss: 2.6662
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4772 - loss: 1.6253 - val_accuracy: 0.0756 - val_loss: 2.2729
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5037 - loss: 1.5159 - val_accuracy: 0.0523 - val_loss: 2.3650
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5211 - loss: 1.3791 - val_accuracy: 0.1047 - val_loss: 2.5073
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5680 - loss: 1.2350 - val_accuracy: 0.1512 - val_loss: 2.3688
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_13.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_13.png


In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_14.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_14.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 14, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 14, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 14, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8533
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.83      0.87        12
           2       0.50      0.62      0.56         8
           3       0.82      0.95      0.88        39
           4       0.75      1.00      0.86         6
           5       0.92      0.86      0.89        42
           6       0.90      0.56      0.69        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.84       127
   macro avg       0.85      0.85      0.84       127
weighted avg       0.86      0.84      0.84       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 467ms/step - accuracy: 0.2781 - loss: 3.5923 - val_accuracy: 0.0349 - val_loss: 2.5966
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.5690 - loss: 1.2489 - val_accuracy: 0.1337 - val_loss: 2.6786
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.7431 - loss: 0.7990 - val_accuracy: 0.2267 - val_loss: 2.2437
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.9204 - loss: 0.3355 - val_accuracy: 0.2093 - val_loss: 3.3523
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.9486 - loss: 0.1771 - val_accuracy: 0.2558 - val_loss: 3.3032
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 0.9798 - loss: 0.1025 - val_accuracy: 0.1977 - val_loss: 4.4478
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2163 - loss: 3.1613 - val_accuracy: 0.0465 - val_loss: 2.5386
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3766 - loss: 2.2041 - val_accuracy: 0.0174 - val_loss: 2.5423
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4404 - loss: 1.8125 - val_accuracy: 0.0349 - val_loss: 2.5845
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4544 - loss: 1.6696 - val_accuracy: 0.0233 - val_loss: 2.5416
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4686 - loss: 1.5333 - val_accuracy: 0.0872 - val_loss: 2.4197
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5272 - loss: 1.3239 - val_accuracy: 0.0465 - val_loss: 2.5757
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [5]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_15.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_15.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 15, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 15, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 15, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8385
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.75      0.82        12
           2       0.56      0.62      0.59         8
           3       0.78      1.00      0.88        39
           4       1.00      1.00      1.00         6
           5       0.90      0.83      0.86        42
           6       0.89      0.50      0.64        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.83       127
   macro avg       0.88      0.84      0.85       127
weighted avg       0.85      0.83      0.83       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 490ms/step - accuracy: 0.3264 - loss: 3.9621 - val_accuracy: 0.0058 - val_loss: 2.3202
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 482ms/step - accuracy: 0.5662 - loss: 1.2778 - val_accuracy: 0.0872 - val_loss: 2.6149
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 471ms/step - accuracy: 0.7779 - loss: 0.7215 - val_accuracy: 0.2500 - val_loss: 1.9725
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 476ms/step - accuracy: 0.8713 - loss: 0.4014 - val_accuracy: 0.1802 - val_loss: 3.7768
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 477ms/step - accuracy: 0.9097 - loss: 0.3023 - val_accuracy: 0.2035 - val_loss: 3.6477
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 484ms/step - accuracy: 0.9182 - loss: 0.2132 - val_accuracy: 0.2500 - val_loss: 3.0165
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2070 - loss: 3.1748 - val_accuracy: 0.0000e+00 - val_loss: 2.6527
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3313 - loss: 2.2598 - val_accuracy: 0.0116 - val_loss: 2.2728
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4017 - loss: 1.9602 - val_accuracy: 0.0000e+00 - val_loss: 2.2659
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3880 - loss: 1.9834 - val_accuracy: 0.0000e+00 - val_loss: 2.3581
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4226 - loss: 1.8178 - val_accuracy: 0.0058 - val_loss: 2.2563
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4985 - loss: 1.5623 - val_accuracy: 0.0233 - val_loss: 2.3716
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_15.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_15.png


In [6]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_16.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_16.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 16, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 16, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 16, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8013
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.83      0.83      0.83        12
           2       0.50      0.50      0.50         8
           3       0.80      0.95      0.87        39
           4       0.83      0.83      0.83         6
           5       0.86      0.86      0.86        42
           6       0.78      0.44      0.56        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.81       127
   macro avg       0.83      0.80      0.81       127
weighted avg       0.81      0.81      0.80       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 464ms/step - accuracy: 0.2907 - loss: 5.1409 - val_accuracy: 0.0058 - val_loss: 3.0377
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.5371 - loss: 1.4550 - val_accuracy: 0.0523 - val_loss: 2.9331
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.6029 - loss: 1.2307 - val_accuracy: 0.0640 - val_loss: 2.5586
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.7507 - loss: 0.7346 - val_accuracy: 0.0698 - val_loss: 3.3848
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.7857 - loss: 0.5670 - val_accuracy: 0.2209 - val_loss: 3.2868
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.8758 - loss: 0.3497 - val_accuracy: 0.1686 - val_loss: 3.6780
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2199 - loss: 3.1393 - val_accuracy: 0.0058 - val_loss: 2.9940
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3323 - loss: 2.2440 - val_accuracy: 0.0233 - val_loss: 2.3400
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4122 - loss: 1.9916 - val_accuracy: 0.0000e+00 - val_loss: 2.3077
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3767 - loss: 2.0069 - val_accuracy: 0.0058 - val_loss: 2.3514
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4134 - loss: 1.7657 - val_accuracy: 0.0116 - val_loss: 2.2899
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4826 - loss: 1.5787 - val_accuracy: 0.1047 - val_loss: 2.1091
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_16.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_16.png


In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_17.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_17.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 17, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 17, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 17, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.7588
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.78      0.58      0.67        12
           2       0.38      0.38      0.38         8
           3       0.78      0.90      0.83        39
           4       0.62      0.83      0.71         6
           5       0.84      0.88      0.86        42
           6       0.89      0.50      0.64        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.78       127
   macro avg       0.79      0.76      0.76       127
weighted avg       0.79      0.78      0.77       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 452ms/step - accuracy: 0.2960 - loss: 4.1803 - val_accuracy: 0.0000e+00 - val_loss: 2.4850
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 445ms/step - accuracy: 0.4785 - loss: 1.5666 - val_accuracy: 0.0349 - val_loss: 2.4234
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.5298 - loss: 1.2905 - val_accuracy: 0.1686 - val_loss: 2.3538
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.6666 - loss: 0.9846 - val_accuracy: 0.1279 - val_loss: 2.3668
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.7206 - loss: 0.7927 - val_accuracy: 0.2442 - val_loss: 2.5100
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.7908 - loss: 0.5864 - val_accuracy: 0.2674 - val_loss: 2.6368
Epoch 7/50
[1m22/22[0m [32

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2507 - loss: 3.0361 - val_accuracy: 0.0116 - val_loss: 3.0643
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3521 - loss: 2.4180 - val_accuracy: 0.0174 - val_loss: 2.5044
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4001 - loss: 2.1246 - val_accuracy: 0.0000e+00 - val_loss: 2.5444
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3423 - loss: 2.0557 - val_accuracy: 0.0291 - val_loss: 2.6254
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4163 - loss: 1.8735 - val_accuracy: 0.0174 - val_loss: 2.6086
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4728 - loss: 1.7321 - val_accuracy: 0.0233 - val_loss: 2.5412
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_17.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_17.png


In [8]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_18.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_18.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 18, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 18, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 18, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.8006
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.83      0.87        12
           2       0.56      0.62      0.59         8
           3       0.82      0.79      0.81        39
           4       0.56      0.83      0.67         6
           5       0.79      0.88      0.83        42
           6       0.88      0.44      0.58        16
           7       0.67      1.00      0.80         2
           8       1.00      1.00      1.00         2

    accuracy                           0.78       127
   macro avg       0.77      0.80      0.77       127
weighted avg       0.79      0.78      0.77       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 433ms/step - accuracy: 0.2831 - loss: 4.6060 - val_accuracy: 0.0116 - val_loss: 2.5498
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 429ms/step - accuracy: 0.4775 - loss: 1.6195 - val_accuracy: 0.0058 - val_loss: 2.4060
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 429ms/step - accuracy: 0.5612 - loss: 1.3283 - val_accuracy: 0.0465 - val_loss: 2.7043
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 430ms/step - accuracy: 0.6615 - loss: 1.0247 - val_accuracy: 0.1919 - val_loss: 2.2852
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 431ms/step - accuracy: 0.7596 - loss: 0.7629 - val_accuracy: 0.1919 - val_loss: 2.3814
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 429ms/step - accuracy: 0.8207 - loss: 0.4831 - val_accuracy: 0.1686 - val_loss: 2.7568
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1941 - loss: 3.2875 - val_accuracy: 0.0000e+00 - val_loss: 2.6196
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2643 - loss: 2.5967 - val_accuracy: 0.0000e+00 - val_loss: 2.5869
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3828 - loss: 2.0673 - val_accuracy: 0.0058 - val_loss: 2.4582
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4045 - loss: 2.0003 - val_accuracy: 0.0233 - val_loss: 2.1288
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3817 - loss: 1.8794 - val_accuracy: 0.0349 - val_loss: 2.3268
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4216 - loss: 1.7849 - val_accuracy: 0.0349 - val_loss: 2.2337
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_18.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_18.png


In [9]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_19.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_19.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 19, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 19, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 19, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.6167
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.90      0.75      0.82        12
           2       0.55      0.75      0.63         8
           3       0.84      0.82      0.83        39
           4       0.62      0.83      0.71         6
           5       0.76      0.90      0.83        42
           6       0.86      0.38      0.52        16
           7       0.50      0.50      0.50         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.76       127
   macro avg       0.56      0.55      0.54       127
weighted avg       0.77      0.76      0.75       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 429ms/step - accuracy: 0.2929 - loss: 3.5448 - val_accuracy: 0.0000e+00 - val_loss: 3.0619
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 423ms/step - accuracy: 0.4975 - loss: 1.5647 - val_accuracy: 0.0000e+00 - val_loss: 2.5423
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 423ms/step - accuracy: 0.5344 - loss: 1.2396 - val_accuracy: 0.1279 - val_loss: 2.3658
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 425ms/step - accuracy: 0.6318 - loss: 1.0153 - val_accuracy: 0.1802 - val_loss: 2.5674
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 425ms/step - accuracy: 0.7278 - loss: 0.7624 - val_accuracy: 0.1919 - val_loss: 2.7150
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 427ms/step - accuracy: 0.8340 - loss: 0.5326 - val_accuracy: 0.1221 - val_loss: 3.7845
Epoch 7/50
[1m22/22[0m [32m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2350 - loss: 3.1390 - val_accuracy: 0.0116 - val_loss: 2.6658
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3170 - loss: 2.4546 - val_accuracy: 0.0116 - val_loss: 2.3907
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3500 - loss: 2.2028 - val_accuracy: 0.0058 - val_loss: 2.4478
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4284 - loss: 1.9028 - val_accuracy: 0.0291 - val_loss: 2.2291
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3988 - loss: 1.9811 - val_accuracy: 0.0233 - val_loss: 2.3243
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4717 - loss: 1.7014 - val_accuracy: 0.0058 - val_loss: 2.4086
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_19.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_19.png


In [10]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_20.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_20.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 20, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 20, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 20, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.5815
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.73      0.67      0.70        12
           2       0.45      0.62      0.53         8
           3       0.79      0.79      0.79        39
           4       0.56      0.83      0.67         6
           5       0.80      0.86      0.83        42
           6       0.86      0.38      0.52        16
           7       0.50      0.50      0.50         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.72       127
   macro avg       0.52      0.52      0.50       127
weighted avg       0.75      0.72      0.72       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 440ms/step - accuracy: 0.2939 - loss: 4.2048 - val_accuracy: 0.0000e+00 - val_loss: 2.5312
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 436ms/step - accuracy: 0.5119 - loss: 1.5335 - val_accuracy: 0.0174 - val_loss: 3.5355
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.5369 - loss: 1.3074 - val_accuracy: 0.1105 - val_loss: 2.2955
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 438ms/step - accuracy: 0.6636 - loss: 0.9178 - val_accuracy: 0.2267 - val_loss: 2.1566
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.7383 - loss: 0.7641 - val_accuracy: 0.1628 - val_loss: 2.9970
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.8034 - loss: 0.5642 - val_accuracy: 0.2442 - val_loss: 3.1011
Epoch 7/50
[1m22/22[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2160 - loss: 3.3541 - val_accuracy: 0.0058 - val_loss: 2.7978
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3368 - loss: 2.3734 - val_accuracy: 0.0000e+00 - val_loss: 2.4874
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3161 - loss: 2.1826 - val_accuracy: 0.0116 - val_loss: 2.5374
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3449 - loss: 2.0525 - val_accuracy: 0.0058 - val_loss: 2.4823
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3730 - loss: 1.9123 - val_accuracy: 0.0116 - val_loss: 2.5227
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4028 - loss: 1.7921 - val_accuracy: 0.0116 - val_loss: 2.5271
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_20.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_20.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_20.png


In [11]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_21.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_21.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 21, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 21, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 21, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.5737
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.80      0.67      0.73        12
           2       0.44      0.50      0.47         8
           3       0.82      0.79      0.81        39
           4       0.45      0.83      0.59         6
           5       0.75      0.86      0.80        42
           6       0.88      0.44      0.58        16
           7       1.00      0.50      0.67         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.72       127
   macro avg       0.57      0.51      0.52       127
weighted avg       0.75      0.72      0.72       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 459ms/step - accuracy: 0.2998 - loss: 4.2422 - val_accuracy: 0.0058 - val_loss: 2.3418
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.4735 - loss: 1.5424 - val_accuracy: 0.0174 - val_loss: 2.5467
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.5874 - loss: 1.2045 - val_accuracy: 0.1279 - val_loss: 2.4439
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.6647 - loss: 0.9029 - val_accuracy: 0.1686 - val_loss: 2.6862
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.7517 - loss: 0.6596 - val_accuracy: 0.1628 - val_loss: 3.1041
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.8076 - loss: 0.5477 - val_accuracy: 0.1802 - val_loss: 3.4091
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2214 - loss: 3.3472 - val_accuracy: 0.0058 - val_loss: 2.5699
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3231 - loss: 2.3539 - val_accuracy: 0.0116 - val_loss: 2.5390
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3526 - loss: 2.2511 - val_accuracy: 0.0174 - val_loss: 2.4392
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4027 - loss: 1.9188 - val_accuracy: 0.0174 - val_loss: 2.7205
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4038 - loss: 1.9163 - val_accuracy: 0.0523 - val_loss: 2.4922
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3960 - loss: 1.9438 - val_accuracy: 0.0000e+00 - val_loss: 2.5939
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_21.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_21.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_21.png


In [12]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_22.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_22.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 22, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 22, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 22, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.6230
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.70      0.58      0.64        12
           2       0.38      0.38      0.38         8
           3       0.80      0.90      0.84        39
           4       0.83      0.83      0.83         6
           5       0.80      0.86      0.83        42
           6       0.78      0.44      0.56        16
           7       0.67      1.00      0.80         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.75       127
   macro avg       0.55      0.55      0.54       127
weighted avg       0.75      0.75      0.74       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 466ms/step - accuracy: 0.3034 - loss: 3.7183 - val_accuracy: 0.0000e+00 - val_loss: 2.8494
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 431ms/step - accuracy: 0.4742 - loss: 1.6017 - val_accuracy: 0.0349 - val_loss: 2.8938
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 436ms/step - accuracy: 0.5592 - loss: 1.3162 - val_accuracy: 0.0756 - val_loss: 2.3892
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 437ms/step - accuracy: 0.6109 - loss: 1.1168 - val_accuracy: 0.1337 - val_loss: 2.2932
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.7040 - loss: 0.8557 - val_accuracy: 0.0988 - val_loss: 2.5618
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.7621 - loss: 0.6612 - val_accuracy: 0.2093 - val_loss: 2.3490
Epoch 7/50
[1m22/22[0m [32m

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2207 - loss: 3.2097 - val_accuracy: 0.0000e+00 - val_loss: 2.9457
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3029 - loss: 2.6075 - val_accuracy: 0.0407 - val_loss: 2.2437
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3374 - loss: 2.2519 - val_accuracy: 0.0000e+00 - val_loss: 2.5491
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3166 - loss: 2.1599 - val_accuracy: 0.0058 - val_loss: 2.4202
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4075 - loss: 1.9800 - val_accuracy: 0.0407 - val_loss: 2.1875
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4565 - loss: 1.7662 - val_accuracy: 0.0058 - val_loss: 2.3741
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_22.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_22.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_22.png


In [13]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_23.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_23.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 23, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 23, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 23, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.5806
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.70      0.58      0.64        12
           2       0.33      0.25      0.29         8
           3       0.83      0.90      0.86        39
           4       0.44      0.67      0.53         6
           5       0.76      0.81      0.78        42
           6       0.70      0.44      0.54        16
           7       0.67      1.00      0.80         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.72       127
   macro avg       0.49      0.52      0.49       127
weighted avg       0.71      0.72      0.71       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 463ms/step - accuracy: 0.3248 - loss: 4.9850 - val_accuracy: 0.0116 - val_loss: 2.3129
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.4578 - loss: 1.6526 - val_accuracy: 0.0407 - val_loss: 2.5083
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.5183 - loss: 1.4192 - val_accuracy: 0.0407 - val_loss: 2.4822
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.5819 - loss: 1.2008 - val_accuracy: 0.0581 - val_loss: 2.2403
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.6065 - loss: 1.1107 - val_accuracy: 0.1570 - val_loss: 2.0751
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.6793 - loss: 0.8948 - val_accuracy: 0.1047 - val_loss: 2.6759
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2782 - loss: 3.1997 - val_accuracy: 0.0407 - val_loss: 2.4385
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3417 - loss: 2.3663 - val_accuracy: 0.0058 - val_loss: 2.3543
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3146 - loss: 2.2009 - val_accuracy: 0.0291 - val_loss: 2.5032
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3357 - loss: 2.1668 - val_accuracy: 0.0349 - val_loss: 2.1550
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3784 - loss: 2.0644 - val_accuracy: 0.0407 - val_loss: 2.0920
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3889 - loss: 1.9763 - val_accuracy: 0.0523 - val_loss: 2.0744
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_23.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_23.png


In [14]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/hubert_large/train_24.npz')
test_data = np.load('../Files/extracted_features/hubert_large/test_24.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 24, "../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 24, "../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 24, "../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix")


SVM Accuracy: 0.4527
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.50      0.60        12
           2       0.33      0.38      0.35         8
           3       0.83      0.87      0.85        39
           4       0.40      0.67      0.50         6
           5       0.71      0.83      0.77        42
           6       0.75      0.38      0.50        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.69       127
   macro avg       0.42      0.40      0.40       127
weighted avg       0.70      0.69      0.68       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 468ms/step - accuracy: 0.3128 - loss: 4.3235 - val_accuracy: 0.0174 - val_loss: 2.5666
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.4578 - loss: 1.6197 - val_accuracy: 0.0116 - val_loss: 2.6297
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.5018 - loss: 1.3868 - val_accuracy: 0.0349 - val_loss: 2.3183
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.5399 - loss: 1.2545 - val_accuracy: 0.0814 - val_loss: 2.3908
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.6240 - loss: 0.9926 - val_accuracy: 0.1337 - val_loss: 2.8049
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.7084 - loss: 0.7977 - val_accuracy: 0.0756 - val_loss: 3.9333
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2092 - loss: 3.2384 - val_accuracy: 0.0000e+00 - val_loss: 3.4564
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2340 - loss: 3.0003 - val_accuracy: 0.0116 - val_loss: 2.5183
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3547 - loss: 2.2394 - val_accuracy: 0.0116 - val_loss: 2.4389
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3930 - loss: 2.1464 - val_accuracy: 0.0349 - val_loss: 2.4538
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3713 - loss: 2.0372 - val_accuracy: 0.0116 - val_loss: 2.5404
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4434 - loss: 1.8536 - val_accuracy: 0.0058 - val_loss: 2.4441
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_hubert/svm_conf_matrix/conf_matrix_layer_24.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_hubert/cnn_conf_matrix/conf_matrix_layer_24.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_hubert/ann_conf_matrix/conf_matrix_layer_24.png


In [2]:
#WavLM

In [3]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_0.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_0.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 0, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 0, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 0, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.7946
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.75      0.75      0.75         8
           3       0.85      1.00      0.92        39
           4       1.00      1.00      1.00         6
           5       0.86      0.86      0.86        42
           6       0.80      0.50      0.62        16
           7       0.67      1.00      0.80         2
           8       1.00      0.50      0.67         2

    accuracy                           0.84       127
   macro avg       0.84      0.79      0.80       127
weighted avg       0.84      0.84      0.83       127



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 453ms/step - accuracy: 0.3732 - loss: 4.6940 - val_accuracy: 0.1395 - val_loss: 1.9700
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.7923 - loss: 0.6847 - val_accuracy: 0.2035 - val_loss: 2.3118
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.9443 - loss: 0.2000 - val_accuracy: 0.0872 - val_loss: 4.6218
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.9681 - loss: 0.1192 - val_accuracy: 0.2500 - val_loss: 3.8419
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.9839 - loss: 0.0478 - val_accuracy: 0.2151 - val_loss: 3.8217
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.9928 - loss: 0.0254 - val_accuracy: 0.1802 - val_loss: 4.9493
Epoch 7/50
[1m22/22[

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2480 - loss: 3.1369 - val_accuracy: 0.0465 - val_loss: 2.9666
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4418 - loss: 1.8263 - val_accuracy: 0.1453 - val_loss: 2.1610
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4972 - loss: 1.5577 - val_accuracy: 0.1977 - val_loss: 2.3632
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5941 - loss: 1.2366 - val_accuracy: 0.2500 - val_loss: 2.1845
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5985 - loss: 1.1015 - val_accuracy: 0.2907 - val_loss: 2.2682
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6546 - loss: 1.0274 - val_accuracy: 0.2500 - val_loss: 2.3238
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_0.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_0.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_0.png


In [5]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_1.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_1.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 1, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 1, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 1, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8237
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.83      0.87        12
           2       0.86      0.75      0.80         8
           3       0.85      1.00      0.92        39
           4       0.86      1.00      0.92         6
           5       0.88      0.88      0.88        42
           6       1.00      0.62      0.77        16
           7       0.67      1.00      0.80         2
           8       1.00      0.50      0.67         2

    accuracy                           0.87       127
   macro avg       0.88      0.82      0.83       127
weighted avg       0.88      0.87      0.87       127



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 439ms/step - accuracy: 0.3731 - loss: 2.9782 - val_accuracy: 0.2965 - val_loss: 1.8221
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.8458 - loss: 0.3960 - val_accuracy: 0.1977 - val_loss: 3.0916
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 436ms/step - accuracy: 0.9687 - loss: 0.0887 - val_accuracy: 0.3430 - val_loss: 3.2202
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 436ms/step - accuracy: 0.9845 - loss: 0.0567 - val_accuracy: 0.0872 - val_loss: 5.6940
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 437ms/step - accuracy: 0.9807 - loss: 0.0460 - val_accuracy: 0.2500 - val_loss: 3.5327
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.9960 - loss: 0.0185 - val_accuracy: 0.2558 - val_loss: 4.7386
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3033 - loss: 2.6774 - val_accuracy: 0.0581 - val_loss: 2.2791
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4580 - loss: 1.8359 - val_accuracy: 0.0756 - val_loss: 2.2638
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5543 - loss: 1.3267 - val_accuracy: 0.0640 - val_loss: 2.3151
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6089 - loss: 1.1893 - val_accuracy: 0.0698 - val_loss: 2.2892
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6690 - loss: 1.0368 - val_accuracy: 0.1453 - val_loss: 2.4793
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6923 - loss: 0.8736 - val_accuracy: 0.2500 - val_loss: 2.2694
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_1.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_1.png


In [6]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_2.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_2.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 2, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 2, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 2, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8110
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.83      0.87        12
           2       0.75      0.75      0.75         8
           3       0.87      1.00      0.93        39
           4       1.00      1.00      1.00         6
           5       0.84      0.90      0.87        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.87       127
   macro avg       0.82      0.72      0.75       127
weighted avg       0.88      0.87      0.86       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 450ms/step - accuracy: 0.3628 - loss: 5.1580 - val_accuracy: 0.1105 - val_loss: 2.1124
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.8235 - loss: 0.5011 - val_accuracy: 0.2733 - val_loss: 2.5444
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9721 - loss: 0.0884 - val_accuracy: 0.2500 - val_loss: 2.7523
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.9914 - loss: 0.0480 - val_accuracy: 0.2849 - val_loss: 3.5687
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.9916 - loss: 0.0374 - val_accuracy: 0.2965 - val_loss: 3.1051
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.9926 - loss: 0.0388 - val_accuracy: 0.2326 - val_loss: 4.1109
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2753 - loss: 2.8023 - val_accuracy: 0.0581 - val_loss: 2.9542
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4767 - loss: 1.6779 - val_accuracy: 0.1221 - val_loss: 2.8454
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6064 - loss: 1.2254 - val_accuracy: 0.1744 - val_loss: 2.5270
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6625 - loss: 1.0786 - val_accuracy: 0.2616 - val_loss: 2.4635
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6782 - loss: 0.9384 - val_accuracy: 0.3023 - val_loss: 2.2979
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7739 - loss: 0.6845 - val_accuracy: 0.1919 - val_loss: 2.9436
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_2.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_2.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_2.png


In [7]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_3.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_3.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 3, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 3, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 3, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8344
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.83      0.87        12
           2       0.70      0.88      0.78         8
           3       0.87      1.00      0.93        39
           4       1.00      1.00      1.00         6
           5       0.90      0.90      0.90        42
           6       1.00      0.56      0.72        16
           7       0.67      1.00      0.80         2
           8       1.00      0.50      0.67         2

    accuracy                           0.88       127
   macro avg       0.88      0.83      0.83       127
weighted avg       0.89      0.88      0.88       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 449ms/step - accuracy: 0.3260 - loss: 4.2077 - val_accuracy: 0.0988 - val_loss: 2.0432
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.8299 - loss: 0.6013 - val_accuracy: 0.1686 - val_loss: 2.6556
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9706 - loss: 0.1286 - val_accuracy: 0.2965 - val_loss: 2.7618
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9815 - loss: 0.0691 - val_accuracy: 0.2616 - val_loss: 2.8918
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 1.0000 - loss: 0.0172 - val_accuracy: 0.2791 - val_loss: 3.5561
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.9987 - loss: 0.0139 - val_accuracy: 0.2558 - val_loss: 4.1470
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3418 - loss: 2.6131 - val_accuracy: 0.0407 - val_loss: 2.4244
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4594 - loss: 1.7204 - val_accuracy: 0.0930 - val_loss: 2.1943
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5946 - loss: 1.2331 - val_accuracy: 0.1628 - val_loss: 2.0882
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6406 - loss: 1.0952 - val_accuracy: 0.0814 - val_loss: 2.5286
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6952 - loss: 0.8986 - val_accuracy: 0.2151 - val_loss: 2.1296
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7500 - loss: 0.6898 - val_accuracy: 0.3023 - val_loss: 2.1686
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_3.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_3.png


In [8]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_4.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_4.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 4, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 4, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 4, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8344
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.91      0.83      0.87        12
           2       0.88      0.88      0.88         8
           3       0.89      1.00      0.94        39
           4       1.00      1.00      1.00         6
           5       0.84      0.90      0.87        42
           6       1.00      0.56      0.72        16
           7       0.67      1.00      0.80         2
           8       1.00      0.50      0.67         2

    accuracy                           0.88       127
   macro avg       0.90      0.83      0.84       127
weighted avg       0.89      0.88      0.88       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 461ms/step - accuracy: 0.3396 - loss: 3.9094 - val_accuracy: 0.1453 - val_loss: 2.4895
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.8634 - loss: 0.3979 - val_accuracy: 0.1453 - val_loss: 3.4931
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.9841 - loss: 0.0674 - val_accuracy: 0.2035 - val_loss: 4.7765
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.9841 - loss: 0.0431 - val_accuracy: 0.2035 - val_loss: 4.3604
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9964 - loss: 0.0244 - val_accuracy: 0.2035 - val_loss: 5.8072
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.9970 - loss: 0.0110 - val_accuracy: 0.2500 - val_loss: 4.9135
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2371 - loss: 2.8272 - val_accuracy: 0.0407 - val_loss: 2.2725
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4581 - loss: 1.8281 - val_accuracy: 0.1163 - val_loss: 2.0682
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5728 - loss: 1.2536 - val_accuracy: 0.1977 - val_loss: 2.1364
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6501 - loss: 1.0758 - val_accuracy: 0.2384 - val_loss: 2.0823
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6917 - loss: 0.9340 - val_accuracy: 0.2616 - val_loss: 2.2610
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7422 - loss: 0.7871 - val_accuracy: 0.2151 - val_loss: 2.6100
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_4.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_4.png


In [9]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_5.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_5.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 5, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 5, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 5, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8423
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.78      0.88      0.82         8
           3       0.87      1.00      0.93        39
           4       1.00      1.00      1.00         6
           5       0.90      0.90      0.90        42
           6       0.91      0.62      0.74        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.89       127
   macro avg       0.83      0.75      0.77       127
weighted avg       0.90      0.89      0.89       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 434ms/step - accuracy: 0.3060 - loss: 4.5992 - val_accuracy: 0.0988 - val_loss: 2.2256
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 430ms/step - accuracy: 0.8156 - loss: 0.5462 - val_accuracy: 0.1628 - val_loss: 2.8466
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9514 - loss: 0.1328 - val_accuracy: 0.1105 - val_loss: 5.2068
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.9778 - loss: 0.0663 - val_accuracy: 0.1919 - val_loss: 4.3038
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.9921 - loss: 0.0484 - val_accuracy: 0.2674 - val_loss: 4.1441
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.9937 - loss: 0.0183 - val_accuracy: 0.1860 - val_loss: 5.4616
Epoch 7/50
[1m22/22[0m [32m━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2929 - loss: 2.9129 - val_accuracy: 0.2209 - val_loss: 1.8630
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4832 - loss: 1.6513 - val_accuracy: 0.2558 - val_loss: 1.8555
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5681 - loss: 1.3393 - val_accuracy: 0.1512 - val_loss: 2.1268
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6194 - loss: 1.1347 - val_accuracy: 0.2093 - val_loss: 2.3778
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7204 - loss: 0.8884 - val_accuracy: 0.2267 - val_loss: 2.3941
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7445 - loss: 0.7516 - val_accuracy: 0.2209 - val_loss: 2.5352
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_5.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_5.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_5.png


In [10]:
# 6
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_6.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_6.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 6, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 6, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 6, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8579
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.73      1.00      0.84         8
           3       0.89      1.00      0.94        39
           4       1.00      1.00      1.00         6
           5       0.90      0.90      0.90        42
           6       1.00      0.62      0.77        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.90       127
   macro avg       0.84      0.76      0.78       127
weighted avg       0.92      0.90      0.90       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 459ms/step - accuracy: 0.3038 - loss: 3.6550 - val_accuracy: 0.0988 - val_loss: 2.3305
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.7970 - loss: 0.6285 - val_accuracy: 0.0814 - val_loss: 4.1440
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.9691 - loss: 0.1397 - val_accuracy: 0.1395 - val_loss: 3.6764
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.9741 - loss: 0.0788 - val_accuracy: 0.1512 - val_loss: 4.2013
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.9867 - loss: 0.0342 - val_accuracy: 0.1860 - val_loss: 4.1285
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.9924 - loss: 0.0336 - val_accuracy: 0.1628 - val_loss: 4.2781
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2412 - loss: 3.2476 - val_accuracy: 0.1512 - val_loss: 2.1390
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4591 - loss: 1.8693 - val_accuracy: 0.1802 - val_loss: 2.4342
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5826 - loss: 1.1909 - val_accuracy: 0.2035 - val_loss: 2.4303
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6226 - loss: 1.1086 - val_accuracy: 0.1977 - val_loss: 2.8832
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7231 - loss: 0.8221 - val_accuracy: 0.2442 - val_loss: 3.1785
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7559 - loss: 0.6737 - val_accuracy: 0.1919 - val_loss: 3.3451
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_6.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_6.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_6.png


In [11]:
# 7
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_7.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_7.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 7, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 7, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 7, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.9096
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.89      1.00      0.94         8
           3       0.85      1.00      0.92        39
           4       1.00      1.00      1.00         6
           5       0.86      0.88      0.87        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.89       127
   macro avg       0.95      0.91      0.92       127
weighted avg       0.90      0.89      0.88       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 463ms/step - accuracy: 0.3501 - loss: 4.1489 - val_accuracy: 0.1047 - val_loss: 3.2839
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.8600 - loss: 0.4290 - val_accuracy: 0.1686 - val_loss: 3.1100
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 462ms/step - accuracy: 0.9679 - loss: 0.0804 - val_accuracy: 0.1977 - val_loss: 3.4269
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9927 - loss: 0.0368 - val_accuracy: 0.2151 - val_loss: 3.0538
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.9976 - loss: 0.0245 - val_accuracy: 0.1802 - val_loss: 4.1270
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.9910 - loss: 0.0298 - val_accuracy: 0.1570 - val_loss: 4.2776
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1990 - loss: 3.1427 - val_accuracy: 0.0872 - val_loss: 2.3518
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4960 - loss: 1.6678 - val_accuracy: 0.1570 - val_loss: 2.4429
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5953 - loss: 1.3282 - val_accuracy: 0.2209 - val_loss: 2.1681
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6177 - loss: 1.1168 - val_accuracy: 0.1686 - val_loss: 2.6943
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7067 - loss: 0.8899 - val_accuracy: 0.1221 - val_loss: 2.8135
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7564 - loss: 0.7374 - val_accuracy: 0.1512 - val_loss: 3.1294
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_7.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_7.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_7.png


In [12]:
# 8
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_8.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_8.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 8, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 8, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 8, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.9018
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.89      1.00      0.94         8
           3       0.87      1.00      0.93        39
           4       0.75      1.00      0.86         6
           5       0.86      0.88      0.87        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.88       127
   macro avg       0.92      0.90      0.90       127
weighted avg       0.89      0.88      0.87       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 472ms/step - accuracy: 0.3467 - loss: 4.5533 - val_accuracy: 0.1686 - val_loss: 1.9496
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 475ms/step - accuracy: 0.8007 - loss: 0.6824 - val_accuracy: 0.2849 - val_loss: 2.3908
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 470ms/step - accuracy: 0.9319 - loss: 0.2069 - val_accuracy: 0.1395 - val_loss: 3.9201
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 470ms/step - accuracy: 0.9799 - loss: 0.0718 - val_accuracy: 0.2442 - val_loss: 3.6248
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 476ms/step - accuracy: 0.9949 - loss: 0.0312 - val_accuracy: 0.2035 - val_loss: 5.1838
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.9983 - loss: 0.0151 - val_accuracy: 0.1802 - val_loss: 5.4739
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2653 - loss: 2.8411 - val_accuracy: 0.0465 - val_loss: 2.3826
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4645 - loss: 1.7207 - val_accuracy: 0.0523 - val_loss: 2.5937
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5518 - loss: 1.4076 - val_accuracy: 0.1337 - val_loss: 2.2877
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6510 - loss: 1.0856 - val_accuracy: 0.1395 - val_loss: 2.7709
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6649 - loss: 0.9313 - val_accuracy: 0.2151 - val_loss: 2.5312
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7384 - loss: 0.7930 - val_accuracy: 0.1279 - val_loss: 3.2172
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [13]:
# 9
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_9.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_9.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 9, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 9, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 9, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8988
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.80      1.00      0.89         8
           3       0.83      1.00      0.91        39
           4       1.00      1.00      1.00         6
           5       0.86      0.86      0.86        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.94      0.90      0.90       127
weighted avg       0.89      0.87      0.87       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 486ms/step - accuracy: 0.2734 - loss: 4.8092 - val_accuracy: 0.0058 - val_loss: 2.4143
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.6989 - loss: 0.8729 - val_accuracy: 0.1860 - val_loss: 2.7822
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.9446 - loss: 0.2093 - val_accuracy: 0.3314 - val_loss: 2.6120
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9662 - loss: 0.1052 - val_accuracy: 0.2558 - val_loss: 3.0600
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9744 - loss: 0.0720 - val_accuracy: 0.1628 - val_loss: 3.8445
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.9888 - loss: 0.0661 - val_accuracy: 0.1860 - val_loss: 3.5412
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1987 - loss: 2.9994 - val_accuracy: 0.0407 - val_loss: 2.3043
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4033 - loss: 1.8221 - val_accuracy: 0.1163 - val_loss: 1.8675
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5482 - loss: 1.5599 - val_accuracy: 0.2151 - val_loss: 1.8124
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5449 - loss: 1.2832 - val_accuracy: 0.1860 - val_loss: 1.9269
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6170 - loss: 1.1233 - val_accuracy: 0.1919 - val_loss: 1.9873
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6467 - loss: 1.0480 - val_accuracy: 0.3372 - val_loss: 1.7894
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_9.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_9.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_9.png


In [1]:
# 10
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_10.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_10.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 10, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 10, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 10, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.9092
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.92      0.96        12
           2       0.73      1.00      0.84         8
           3       0.89      1.00      0.94        39
           4       0.86      1.00      0.92         6
           5       0.88      0.86      0.87        42
           6       0.89      0.50      0.64        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.88       127
   macro avg       0.90      0.91      0.90       127
weighted avg       0.89      0.88      0.87       127



2025-01-05 14:58:30.276363: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-05 14:58:30.347706: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-05 14:58:30.367735: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 14:58:30.530995: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-01-05 14:58:33.253465: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 430ms/step - accuracy: 0.3300 - loss: 4.0342 - val_accuracy: 0.0233 - val_loss: 2.6856
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 424ms/step - accuracy: 0.7587 - loss: 0.7747 - val_accuracy: 0.0988 - val_loss: 3.4926
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 426ms/step - accuracy: 0.9011 - loss: 0.2870 - val_accuracy: 0.2500 - val_loss: 3.3327
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 429ms/step - accuracy: 0.9796 - loss: 0.0876 - val_accuracy: 0.1105 - val_loss: 5.8023
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 430ms/step - accuracy: 0.9825 - loss: 0.0574 - val_accuracy: 0.2035 - val_loss: 3.3176
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 432ms/step - accuracy: 0.9888 - loss: 0.0508 - val_accuracy: 0.1860 - val_loss: 3.9103
Epoch 7/50
[1m22/22[0m [32m━━━━━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2225 - loss: 2.9134 - val_accuracy: 0.0756 - val_loss: 2.1636
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4062 - loss: 1.9107 - val_accuracy: 0.0756 - val_loss: 2.3551
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5298 - loss: 1.5364 - val_accuracy: 0.1512 - val_loss: 2.1451
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5399 - loss: 1.3578 - val_accuracy: 0.1802 - val_loss: 2.1561
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6186 - loss: 1.1059 - val_accuracy: 0.2267 - val_loss: 2.1492
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6652 - loss: 1.0198 - val_accuracy: 0.1802 - val_loss: 2.5849
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [2]:
# 11
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_11.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_11.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 11, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 11, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 11, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8936
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.92      0.92      0.92        12
           2       0.88      0.88      0.88         8
           3       0.89      1.00      0.94        39
           4       0.86      1.00      0.92         6
           5       0.84      0.86      0.85        42
           6       0.89      0.50      0.64        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.91      0.89      0.89       127
weighted avg       0.87      0.87      0.87       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 442ms/step - accuracy: 0.3583 - loss: 3.2646 - val_accuracy: 0.0174 - val_loss: 3.1182
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 437ms/step - accuracy: 0.7717 - loss: 0.6483 - val_accuracy: 0.3256 - val_loss: 2.3690
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.9263 - loss: 0.2316 - val_accuracy: 0.2035 - val_loss: 4.3205
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 437ms/step - accuracy: 0.9856 - loss: 0.0610 - val_accuracy: 0.2616 - val_loss: 3.1276
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 438ms/step - accuracy: 0.9844 - loss: 0.0477 - val_accuracy: 0.3314 - val_loss: 3.5721
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.9948 - loss: 0.0404 - val_accuracy: 0.0988 - val_loss: 4.8517
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2643 - loss: 2.9451 - val_accuracy: 0.0756 - val_loss: 2.3619
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4176 - loss: 2.0105 - val_accuracy: 0.0233 - val_loss: 2.6987
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5372 - loss: 1.4883 - val_accuracy: 0.1977 - val_loss: 2.1479
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5638 - loss: 1.2940 - val_accuracy: 0.1453 - val_loss: 2.5301
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6317 - loss: 1.1831 - val_accuracy: 0.1628 - val_loss: 2.8940
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6408 - loss: 1.0639 - val_accuracy: 0.1860 - val_loss: 2.8156
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [3]:
# 12
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_12.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_12.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 12, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 12, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 12, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8731
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.92      0.92      0.92        12
           2       0.86      0.75      0.80         8
           3       0.87      1.00      0.93        39
           4       0.86      1.00      0.92         6
           5       0.82      0.88      0.85        42
           6       1.00      0.44      0.61        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.91      0.87      0.88       127
weighted avg       0.88      0.87      0.86       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 453ms/step - accuracy: 0.3057 - loss: 3.7670 - val_accuracy: 0.0291 - val_loss: 2.3303
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.7056 - loss: 0.8003 - val_accuracy: 0.2674 - val_loss: 2.2492
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.9135 - loss: 0.3533 - val_accuracy: 0.3779 - val_loss: 2.2511
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.9650 - loss: 0.1118 - val_accuracy: 0.1919 - val_loss: 3.4007
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.9666 - loss: 0.1416 - val_accuracy: 0.0756 - val_loss: 4.1198
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.9836 - loss: 0.0587 - val_accuracy: 0.1512 - val_loss: 3.7946
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2631 - loss: 2.9184 - val_accuracy: 0.0233 - val_loss: 2.5339
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3882 - loss: 2.0321 - val_accuracy: 0.0698 - val_loss: 2.4564
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4781 - loss: 1.6028 - val_accuracy: 0.0930 - val_loss: 2.4906
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5198 - loss: 1.3717 - val_accuracy: 0.1453 - val_loss: 2.5058
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5786 - loss: 1.3278 - val_accuracy: 0.1337 - val_loss: 2.7468
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6403 - loss: 1.1459 - val_accuracy: 0.1105 - val_loss: 2.5442
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [4]:
# 13
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_13.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_13.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 13, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 13, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 13, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8702
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.85      0.92      0.88        12
           2       0.83      0.62      0.71         8
           3       0.85      1.00      0.92        39
           4       1.00      1.00      1.00         6
           5       0.84      0.86      0.85        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.92      0.87      0.88       127
weighted avg       0.87      0.87      0.86       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 448ms/step - accuracy: 0.3009 - loss: 4.4884 - val_accuracy: 0.0174 - val_loss: 2.2147
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 438ms/step - accuracy: 0.6058 - loss: 1.0729 - val_accuracy: 0.2965 - val_loss: 1.8975
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 436ms/step - accuracy: 0.8235 - loss: 0.5236 - val_accuracy: 0.2849 - val_loss: 2.2298
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.9289 - loss: 0.2424 - val_accuracy: 0.2151 - val_loss: 3.0099
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.9680 - loss: 0.1337 - val_accuracy: 0.2093 - val_loss: 4.1744
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.9633 - loss: 0.1276 - val_accuracy: 0.1512 - val_loss: 3.7089
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2352 - loss: 2.9191 - val_accuracy: 0.0116 - val_loss: 2.4663
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3909 - loss: 2.0150 - val_accuracy: 0.0465 - val_loss: 2.3589
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4530 - loss: 1.8016 - val_accuracy: 0.0756 - val_loss: 2.0895
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4547 - loss: 1.5603 - val_accuracy: 0.0930 - val_loss: 2.2368
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5642 - loss: 1.2786 - val_accuracy: 0.1163 - val_loss: 2.2786
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6151 - loss: 1.1928 - val_accuracy: 0.1686 - val_loss: 2.0931
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [6]:
# 14
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_14.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_14.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 14, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 14, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 14, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8234
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.62      0.62      0.62         8
           3       0.83      0.97      0.89        39
           4       0.83      0.83      0.83         6
           5       0.86      0.90      0.88        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.84       127
   macro avg       0.87      0.82      0.84       127
weighted avg       0.85      0.84      0.83       127



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 455ms/step - accuracy: 0.3099 - loss: 3.2556 - val_accuracy: 0.0116 - val_loss: 2.2373
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.5703 - loss: 1.2369 - val_accuracy: 0.0756 - val_loss: 2.4017
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.7956 - loss: 0.6186 - val_accuracy: 0.3081 - val_loss: 2.0687
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.9202 - loss: 0.2598 - val_accuracy: 0.2326 - val_loss: 2.9503
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.9494 - loss: 0.1464 - val_accuracy: 0.2965 - val_loss: 3.0394
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.9693 - loss: 0.1086 - val_accuracy: 0.1337 - val_loss: 4.6586
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1956 - loss: 3.3525 - val_accuracy: 0.0116 - val_loss: 2.9543
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4122 - loss: 2.1057 - val_accuracy: 0.0291 - val_loss: 2.4613
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3922 - loss: 2.1438 - val_accuracy: 0.0872 - val_loss: 2.3526
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4785 - loss: 1.7047 - val_accuracy: 0.0407 - val_loss: 2.4971
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4765 - loss: 1.6419 - val_accuracy: 0.0756 - val_loss: 2.2992
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4925 - loss: 1.5598 - val_accuracy: 0.1047 - val_loss: 2.2195
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [7]:
# 15
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_15.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_15.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model44.58
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 15, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 15, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 15, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8501
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.67      0.75      0.71         8
           3       0.85      1.00      0.92        39
           4       0.83      0.83      0.83         6
           5       0.90      0.90      0.90        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.88      0.85      0.86       127
weighted avg       0.88      0.87      0.86       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 475ms/step - accuracy: 0.3038 - loss: 4.1190 - val_accuracy: 0.0407 - val_loss: 2.3637
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.5807 - loss: 1.1945 - val_accuracy: 0.0872 - val_loss: 2.5228
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 483ms/step - accuracy: 0.7291 - loss: 0.7689 - val_accuracy: 0.1453 - val_loss: 2.5719
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.8397 - loss: 0.4621 - val_accuracy: 0.3081 - val_loss: 2.6144
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.9078 - loss: 0.2717 - val_accuracy: 0.1919 - val_loss: 4.0851
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.9362 - loss: 0.1881 - val_accuracy: 0.1802 - val_loss: 3.8432
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.2264 - loss: 3.1205 - val_accuracy: 0.0291 - val_loss: 2.4916
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3736 - loss: 2.1878 - val_accuracy: 0.0349 - val_loss: 2.2917
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3275 - loss: 1.9638 - val_accuracy: 0.0000e+00 - val_loss: 2.4703
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4142 - loss: 1.9251 - val_accuracy: 0.0000e+00 - val_loss: 2.4143
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4564 - loss: 1.7050 - val_accuracy: 0.1105 - val_loss: 2.2182
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4735 - loss: 1.5723 - val_accuracy: 0.0988 - val_loss: 2.2762
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━

In [9]:
# 16
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_16.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_16.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 16, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 16, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 16, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8418
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.67      0.75      0.71         8
           3       0.86      0.95      0.90        39
           4       0.71      0.83      0.77         6
           5       0.89      0.95      0.92        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.86       127
   macro avg       0.87      0.84      0.84       127
weighted avg       0.87      0.86      0.85       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 470ms/step - accuracy: 0.2906 - loss: 4.4743 - val_accuracy: 0.0058 - val_loss: 2.4158
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.4772 - loss: 1.5917 - val_accuracy: 0.2442 - val_loss: 1.8297
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 440ms/step - accuracy: 0.5673 - loss: 1.2518 - val_accuracy: 0.0872 - val_loss: 2.5305
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.6977 - loss: 0.8440 - val_accuracy: 0.1802 - val_loss: 2.7292
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.7879 - loss: 0.6118 - val_accuracy: 0.2151 - val_loss: 2.5820
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.8817 - loss: 0.3642 - val_accuracy: 0.2907 - val_loss: 2.3753
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2167 - loss: 3.1390 - val_accuracy: 0.0000e+00 - val_loss: 2.3982
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3164 - loss: 2.4980 - val_accuracy: 0.0407 - val_loss: 2.4613
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4227 - loss: 1.8758 - val_accuracy: 0.1047 - val_loss: 2.0275
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4282 - loss: 1.8338 - val_accuracy: 0.0698 - val_loss: 2.0945
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4244 - loss: 1.8268 - val_accuracy: 0.1047 - val_loss: 2.0414
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4489 - loss: 1.7390 - val_accuracy: 0.1163 - val_loss: 2.0198
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_16.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_16.png


In [10]:
# 17
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_17.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_17.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 17, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 17, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 17, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8106
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.85      0.92      0.88        12
           2       0.56      0.62      0.59         8
           3       0.89      0.87      0.88        39
           4       0.44      0.67      0.53         6
           5       0.83      0.90      0.86        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.82       127
   macro avg       0.82      0.81      0.80       127
weighted avg       0.84      0.82      0.82       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 452ms/step - accuracy: 0.3124 - loss: 4.1345 - val_accuracy: 0.0116 - val_loss: 2.4101
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.5223 - loss: 1.5191 - val_accuracy: 0.0058 - val_loss: 2.4429
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.5734 - loss: 1.2052 - val_accuracy: 0.2326 - val_loss: 1.9299
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.7360 - loss: 0.7554 - val_accuracy: 0.1802 - val_loss: 2.6258
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.7995 - loss: 0.6226 - val_accuracy: 0.0756 - val_loss: 4.6888
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.7970 - loss: 0.5303 - val_accuracy: 0.2500 - val_loss: 2.5190
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.1980 - loss: 3.3271 - val_accuracy: 0.0174 - val_loss: 2.2264
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3384 - loss: 2.3185 - val_accuracy: 0.0349 - val_loss: 2.2503
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3496 - loss: 2.1024 - val_accuracy: 0.0000e+00 - val_loss: 2.4105
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3826 - loss: 1.9072 - val_accuracy: 0.0174 - val_loss: 2.2348
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3739 - loss: 1.9225 - val_accuracy: 0.0174 - val_loss: 2.3609
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3995 - loss: 1.8097 - val_accuracy: 0.0581 - val_loss: 2.2283
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_17.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_17.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_17.png


In [11]:
# 18
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_18.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_18.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 18, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 18, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 18, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8067
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.79      0.92      0.85        12
           2       0.67      0.50      0.57         8
           3       0.95      0.97      0.96        39
           4       0.57      0.67      0.62         6
           5       0.78      0.83      0.80        42
           6       0.82      0.56      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.83       127
   macro avg       0.82      0.81      0.81       127
weighted avg       0.83      0.83      0.82       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 489ms/step - accuracy: 0.2453 - loss: 5.2343 - val_accuracy: 0.0058 - val_loss: 2.4854
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.4940 - loss: 1.5979 - val_accuracy: 0.0349 - val_loss: 2.2509
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.5769 - loss: 1.2671 - val_accuracy: 0.1047 - val_loss: 2.1646
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.6792 - loss: 0.9329 - val_accuracy: 0.0930 - val_loss: 2.5257
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.7888 - loss: 0.6122 - val_accuracy: 0.1279 - val_loss: 2.4858
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.8030 - loss: 0.5894 - val_accuracy: 0.1802 - val_loss: 2.5976
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.1739 - loss: 3.4595 - val_accuracy: 0.0058 - val_loss: 2.3993
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3386 - loss: 2.3608 - val_accuracy: 0.0116 - val_loss: 2.4087
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3940 - loss: 1.9744 - val_accuracy: 0.0407 - val_loss: 2.3102
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4036 - loss: 1.8282 - val_accuracy: 0.0291 - val_loss: 2.4314
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4448 - loss: 1.7377 - val_accuracy: 0.1279 - val_loss: 2.0455
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4406 - loss: 1.7352 - val_accuracy: 0.0523 - val_loss: 2.1807
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_18.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_18.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_18.png


In [12]:
# 19
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_19.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_19.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 19, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 19, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 19, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.8012
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.38      0.38      0.38         8
           3       0.92      0.90      0.91        39
           4       0.62      0.83      0.71         6
           5       0.85      0.93      0.89        42
           6       0.91      0.62      0.74        16
           7       1.00      1.00      1.00         2
           8       0.67      1.00      0.80         2

    accuracy                           0.83       127
   macro avg       0.77      0.80      0.78       127
weighted avg       0.83      0.83      0.83       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 473ms/step - accuracy: 0.2730 - loss: 5.7675 - val_accuracy: 0.0058 - val_loss: 2.4382
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 478ms/step - accuracy: 0.4896 - loss: 1.5916 - val_accuracy: 0.0465 - val_loss: 2.1300
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 478ms/step - accuracy: 0.6027 - loss: 1.1850 - val_accuracy: 0.2151 - val_loss: 1.9891
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 470ms/step - accuracy: 0.7238 - loss: 0.8186 - val_accuracy: 0.2558 - val_loss: 2.3587
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 475ms/step - accuracy: 0.8040 - loss: 0.6007 - val_accuracy: 0.3314 - val_loss: 2.2309
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 472ms/step - accuracy: 0.8354 - loss: 0.4841 - val_accuracy: 0.2151 - val_loss: 3.4657
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2303 - loss: 3.0253 - val_accuracy: 0.0000e+00 - val_loss: 2.6023
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2944 - loss: 2.3757 - val_accuracy: 0.0058 - val_loss: 2.2804
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4018 - loss: 2.1278 - val_accuracy: 0.0233 - val_loss: 2.3615
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4204 - loss: 1.9643 - val_accuracy: 0.0174 - val_loss: 2.4418
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4095 - loss: 1.8633 - val_accuracy: 0.0349 - val_loss: 2.3959
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4052 - loss: 1.9038 - val_accuracy: 0.0116 - val_loss: 2.3433
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_19.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_19.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_19.png


In [1]:
# 20
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_20.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_20.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 20, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 20, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 20, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.6827
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.89      0.67      0.76        12
           2       0.40      0.50      0.44         8
           3       0.84      0.92      0.88        39
           4       0.50      0.50      0.50         6
           5       0.79      0.81      0.80        42
           6       0.90      0.56      0.69        16
           7       0.50      1.00      0.67         2
           8       0.50      0.50      0.50         2

    accuracy                           0.76       127
   macro avg       0.66      0.68      0.66       127
weighted avg       0.78      0.76      0.76       127



2025-01-05 17:55:05.540295: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-05 17:55:05.595750: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-05 17:55:05.612500: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 17:55:05.724395: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 459ms/step - accuracy: 0.2942 - loss: 4.4891 - val_accuracy: 0.0174 - val_loss: 2.0356
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.4564 - loss: 1.6101 - val_accuracy: 0.1453 - val_loss: 1.9310
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.5792 - loss: 1.1730 - val_accuracy: 0.1105 - val_loss: 2.2164
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 462ms/step - accuracy: 0.6822 - loss: 0.9279 - val_accuracy: 0.2558 - val_loss: 2.3331
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.7507 - loss: 0.6933 - val_accuracy: 0.3140 - val_loss: 2.1450
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.8411 - loss: 0.4617 - val_accuracy: 0.2442 - val_loss: 2.3768
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2299 - loss: 2.9875 - val_accuracy: 0.0058 - val_loss: 3.0453
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3344 - loss: 2.5712 - val_accuracy: 0.0349 - val_loss: 2.6789
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3457 - loss: 2.3370 - val_accuracy: 0.0000e+00 - val_loss: 2.3126
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4200 - loss: 1.8990 - val_accuracy: 0.0000e+00 - val_loss: 2.3102
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3989 - loss: 1.8446 - val_accuracy: 0.0233 - val_loss: 2.4530
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4102 - loss: 1.7521 - val_accuracy: 0.0349 - val_loss: 2.4572
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_20.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_20.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_20.png


In [2]:
# 21
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_21.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_21.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 21, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 21, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 21, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.7739
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.73      0.67      0.70        12
           2       0.50      0.62      0.56         8
           3       0.90      0.92      0.91        39
           4       0.50      0.67      0.57         6
           5       0.83      0.81      0.82        42
           6       0.80      0.50      0.62        16
           7       0.67      1.00      0.80         2
           8       0.50      1.00      0.67         2

    accuracy                           0.78       127
   macro avg       0.68      0.77      0.70       127
weighted avg       0.79      0.78      0.78       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 454ms/step - accuracy: 0.2652 - loss: 3.6583 - val_accuracy: 0.0058 - val_loss: 2.2957
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.5126 - loss: 1.4967 - val_accuracy: 0.0465 - val_loss: 2.4761
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.5848 - loss: 1.1855 - val_accuracy: 0.2616 - val_loss: 2.1681
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.7308 - loss: 0.7982 - val_accuracy: 0.2674 - val_loss: 2.3663
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.7966 - loss: 0.5792 - val_accuracy: 0.2674 - val_loss: 2.6042
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.8380 - loss: 0.4647 - val_accuracy: 0.2849 - val_loss: 3.2071
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2206 - loss: 3.1852 - val_accuracy: 0.0058 - val_loss: 3.2334
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3385 - loss: 2.4832 - val_accuracy: 0.0407 - val_loss: 2.2751
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3882 - loss: 2.0756 - val_accuracy: 0.0116 - val_loss: 2.8716
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3731 - loss: 2.1012 - val_accuracy: 0.0349 - val_loss: 2.5037
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3985 - loss: 1.8263 - val_accuracy: 0.0581 - val_loss: 2.2739
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4541 - loss: 1.7046 - val_accuracy: 0.0930 - val_loss: 2.2479
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_21.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_21.png


In [3]:
# 22
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_22.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_22.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 22, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 22, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 22, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.7534
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.73      0.67      0.70        12
           2       0.50      0.50      0.50         8
           3       0.86      0.92      0.89        39
           4       0.67      0.67      0.67         6
           5       0.76      0.83      0.80        42
           6       0.88      0.44      0.58        16
           7       0.67      1.00      0.80         2
           8       0.67      1.00      0.80         2

    accuracy                           0.77       127
   macro avg       0.72      0.75      0.72       127
weighted avg       0.78      0.77      0.76       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 437ms/step - accuracy: 0.2845 - loss: 3.5843 - val_accuracy: 0.0058 - val_loss: 2.3029
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.5384 - loss: 1.4298 - val_accuracy: 0.0523 - val_loss: 2.2250
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.6563 - loss: 1.0084 - val_accuracy: 0.2209 - val_loss: 1.8909
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.7116 - loss: 0.7881 - val_accuracy: 0.3081 - val_loss: 1.7350
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.8644 - loss: 0.4612 - val_accuracy: 0.3081 - val_loss: 1.9618
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.8770 - loss: 0.3920 - val_accuracy: 0.3430 - val_loss: 2.0151
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2571 - loss: 3.3638 - val_accuracy: 0.0000e+00 - val_loss: 2.8833
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3377 - loss: 2.2763 - val_accuracy: 0.0174 - val_loss: 2.5863
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3846 - loss: 2.0840 - val_accuracy: 0.0233 - val_loss: 2.4603
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3879 - loss: 1.9928 - val_accuracy: 0.0291 - val_loss: 2.4833
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4304 - loss: 1.9055 - val_accuracy: 0.0640 - val_loss: 2.3253
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3800 - loss: 1.8629 - val_accuracy: 0.0581 - val_loss: 2.2808
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_22.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_22.png


In [4]:
# 23
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_23.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_23.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 23, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 23, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 23, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.6866
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.80      0.67      0.73        12
           2       0.44      0.50      0.47         8
           3       0.88      0.90      0.89        39
           4       0.44      0.67      0.53         6
           5       0.80      0.76      0.78        42
           6       0.80      0.50      0.62        16
           7       0.40      1.00      0.57         2
           8       0.50      0.50      0.50         2
           9       0.00      0.00      0.00         0

    accuracy                           0.74       127
   macro avg       0.51      0.55      0.51       127
weighted avg       0.77      0.74      0.75       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 448ms/step - accuracy: 0.2503 - loss: 5.1732 - val_accuracy: 0.0233 - val_loss: 2.4608
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.4449 - loss: 1.7134 - val_accuracy: 0.0233 - val_loss: 2.4056
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.4893 - loss: 1.4525 - val_accuracy: 0.1163 - val_loss: 2.2144
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 445ms/step - accuracy: 0.6231 - loss: 1.1478 - val_accuracy: 0.1919 - val_loss: 2.0828
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.6710 - loss: 0.8812 - val_accuracy: 0.1744 - val_loss: 2.5074
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.7857 - loss: 0.5904 - val_accuracy: 0.1047 - val_loss: 3.3411
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2311 - loss: 2.9465 - val_accuracy: 0.0116 - val_loss: 2.8264
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3047 - loss: 2.4219 - val_accuracy: 0.0116 - val_loss: 2.6332
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3429 - loss: 2.1937 - val_accuracy: 0.0174 - val_loss: 2.5119
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3706 - loss: 2.1323 - val_accuracy: 0.0116 - val_loss: 2.5632
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3484 - loss: 2.0173 - val_accuracy: 0.0174 - val_loss: 2.2733
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3754 - loss: 1.9284 - val_accuracy: 0.0116 - val_loss: 2.3153
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_23.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_23.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_23.png


In [5]:
# 24
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/micro-wavllm-large/train_24.npz')
test_data = np.load('../Files/extracted_features/micro-wavllm-large/test_24.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 24, "../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 24, "../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 24, "../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix")


SVM Accuracy: 0.5730
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.70      0.58      0.64        12
           2       0.38      0.38      0.38         8
           3       0.87      0.85      0.86        39
           4       0.29      0.67      0.40         6
           5       0.79      0.74      0.77        42
           6       0.75      0.38      0.50        16
           7       0.20      0.50      0.29         2
           8       0.33      0.50      0.40         2
           9       0.00      0.00      0.00         0

    accuracy                           0.68       127
   macro avg       0.43      0.46      0.42       127
weighted avg       0.74      0.68      0.69       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 458ms/step - accuracy: 0.2708 - loss: 3.7506 - val_accuracy: 0.0349 - val_loss: 2.3584
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.4573 - loss: 1.7156 - val_accuracy: 0.0233 - val_loss: 2.3361
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.4972 - loss: 1.4798 - val_accuracy: 0.0523 - val_loss: 2.4826
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.5979 - loss: 1.1345 - val_accuracy: 0.1279 - val_loss: 2.4528
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.7149 - loss: 0.8549 - val_accuracy: 0.1628 - val_loss: 2.7601
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.7448 - loss: 0.6866 - val_accuracy: 0.2791 - val_loss: 2.2983
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2226 - loss: 3.2766 - val_accuracy: 0.0291 - val_loss: 3.0131
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2867 - loss: 2.7057 - val_accuracy: 0.0058 - val_loss: 2.3894
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3417 - loss: 2.3128 - val_accuracy: 0.0058 - val_loss: 2.5928
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3533 - loss: 2.1178 - val_accuracy: 0.0000e+00 - val_loss: 2.5317
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4100 - loss: 1.9217 - val_accuracy: 0.0058 - val_loss: 2.2944
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4068 - loss: 2.0893 - val_accuracy: 0.0058 - val_loss: 2.3421
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_wavlm/svm_conf_matrix/conf_matrix_layer_24.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_wavlm/cnn_conf_matrix/conf_matrix_layer_24.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_wavlm/ann_conf_matrix/conf_matrix_layer_24.png


In [6]:
#data2vec

In [7]:
# 0
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_0.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_0.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 0, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 0, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 0, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.7820
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.82      0.75      0.78        12
           2       0.71      0.62      0.67         8
           3       0.83      1.00      0.91        39
           4       0.75      1.00      0.86         6
           5       0.88      0.88      0.88        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.84       127
   macro avg       0.78      0.70      0.71       127
weighted avg       0.86      0.84      0.84       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 456ms/step - accuracy: 0.2701 - loss: 5.8260 - val_accuracy: 0.3023 - val_loss: 1.6349
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.7259 - loss: 0.8596 - val_accuracy: 0.2326 - val_loss: 2.4145
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 451ms/step - accuracy: 0.8751 - loss: 0.3950 - val_accuracy: 0.2674 - val_loss: 3.4920
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 445ms/step - accuracy: 0.9473 - loss: 0.1562 - val_accuracy: 0.2791 - val_loss: 3.0954
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.9655 - loss: 0.1073 - val_accuracy: 0.2326 - val_loss: 3.9978
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.9765 - loss: 0.0688 - val_accuracy: 0.2500 - val_loss: 5.0331
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2171 - loss: 3.0965 - val_accuracy: 0.0523 - val_loss: 2.9313
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4359 - loss: 1.9404 - val_accuracy: 0.1977 - val_loss: 2.1858
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5218 - loss: 1.4992 - val_accuracy: 0.2035 - val_loss: 2.3819
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5988 - loss: 1.3113 - val_accuracy: 0.2733 - val_loss: 2.0416
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6141 - loss: 1.3171 - val_accuracy: 0.2791 - val_loss: 1.9393
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6517 - loss: 1.0554 - val_accuracy: 0.2151 - val_loss: 2.5534
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_0.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_0.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_0.png


In [1]:
# 1
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_1.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_1.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 1, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 1, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 1, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2025-01-05 19:04:48.146251: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-05 19:04:48.198748: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-05 19:04:48.213964: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


SVM Accuracy: 0.8315
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.78      0.88      0.82         8
           3       0.83      1.00      0.91        39
           4       0.86      1.00      0.92         6
           5       0.90      0.88      0.89        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.87       127
   macro avg       0.82      0.74      0.76       127
weighted avg       0.89      0.87      0.87       127



2025-01-05 19:04:48.321744: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-01-05 19:04:51.727463: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 428ms/step - accuracy: 0.3624 - loss: 4.0991 - val_accuracy: 0.2384 - val_loss: 2.0948
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 423ms/step - accuracy: 0.8109 - loss: 0.6136 - val_accuracy: 0.2674 - val_loss: 3.0763
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 428ms/step - accuracy: 0.9459 - loss: 0.2121 - val_accuracy: 0.3314 - val_loss: 2.6210
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 428ms/step - accuracy: 0.9715 - loss: 0.1122 - val_accuracy: 0.3081 - val_loss: 3.8824
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 435ms/step - accuracy: 0.9791 - loss: 0.0691 - val_accuracy: 0.3547 - val_loss: 3.0951
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.9910 - loss: 0.0368 - val_accuracy: 0.3721 - val_loss: 3.3905
Epoch 7/50
[1m22/22[0m [32m━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2649 - loss: 3.0639 - val_accuracy: 0.0174 - val_loss: 2.6705
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4361 - loss: 1.7725 - val_accuracy: 0.0465 - val_loss: 2.5421
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4955 - loss: 1.6039 - val_accuracy: 0.1860 - val_loss: 2.4502
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5709 - loss: 1.2767 - val_accuracy: 0.3488 - val_loss: 2.3588
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6204 - loss: 1.0962 - val_accuracy: 0.2965 - val_loss: 2.3249
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7038 - loss: 0.9132 - val_accuracy: 0.3081 - val_loss: 2.4360
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_1.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_1.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_1.png


In [2]:
# 2
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_2.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_2.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 2, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 2, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 2, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.8315
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.88      0.88      0.88         8
           3       0.85      1.00      0.92        39
           4       0.75      1.00      0.86         6
           5       0.88      0.88      0.88        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.87       127
   macro avg       0.82      0.74      0.76       127
weighted avg       0.89      0.87      0.87       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 452ms/step - accuracy: 0.3815 - loss: 3.8298 - val_accuracy: 0.2558 - val_loss: 2.1472
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.8557 - loss: 0.4162 - val_accuracy: 0.1221 - val_loss: 3.5047
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 443ms/step - accuracy: 0.9845 - loss: 0.0904 - val_accuracy: 0.2267 - val_loss: 4.2496
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9848 - loss: 0.0387 - val_accuracy: 0.1512 - val_loss: 5.1680
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9973 - loss: 0.0285 - val_accuracy: 0.2151 - val_loss: 4.4292
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.9969 - loss: 0.0169 - val_accuracy: 0.2151 - val_loss: 5.3031
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2456 - loss: 3.0390 - val_accuracy: 0.1686 - val_loss: 2.2579
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4622 - loss: 1.6433 - val_accuracy: 0.2616 - val_loss: 2.1889
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5978 - loss: 1.3439 - val_accuracy: 0.3198 - val_loss: 2.2082
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6228 - loss: 1.1808 - val_accuracy: 0.3256 - val_loss: 2.4035
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6525 - loss: 1.0101 - val_accuracy: 0.2907 - val_loss: 2.9675
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7356 - loss: 0.7870 - val_accuracy: 0.2442 - val_loss: 3.5770
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [3]:
# 3
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_3.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_3.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 3, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 3, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 3, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.8296
SVM Classification Report:
              precision    recall  f1-score   support

           1       1.00      0.83      0.91        12
           2       0.78      0.88      0.82         8
           3       0.85      1.00      0.92        39
           4       1.00      1.00      1.00         6
           5       0.89      0.93      0.91        42
           6       1.00      0.50      0.67        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.88       127
   macro avg       0.83      0.74      0.77       127
weighted avg       0.90      0.88      0.88       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 452ms/step - accuracy: 0.3130 - loss: 4.1763 - val_accuracy: 0.1337 - val_loss: 2.3658
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 445ms/step - accuracy: 0.7930 - loss: 0.5714 - val_accuracy: 0.2093 - val_loss: 2.5011
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 445ms/step - accuracy: 0.9517 - loss: 0.1776 - val_accuracy: 0.1919 - val_loss: 3.8613
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.9903 - loss: 0.0727 - val_accuracy: 0.1802 - val_loss: 4.9740
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.9851 - loss: 0.0460 - val_accuracy: 0.2209 - val_loss: 3.9854
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9936 - loss: 0.0232 - val_accuracy: 0.2209 - val_loss: 4.4347
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2566 - loss: 2.9747 - val_accuracy: 0.1163 - val_loss: 2.0129
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4603 - loss: 1.8300 - val_accuracy: 0.2500 - val_loss: 1.9442
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5373 - loss: 1.4959 - val_accuracy: 0.1453 - val_loss: 2.2083
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6018 - loss: 1.3798 - val_accuracy: 0.2326 - val_loss: 2.1361
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6322 - loss: 1.0860 - val_accuracy: 0.2384 - val_loss: 2.2552
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6832 - loss: 0.9398 - val_accuracy: 0.2558 - val_loss: 2.3559
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_3.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_3.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_3.png


In [4]:
# 4
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_4.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_4.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 4, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 4, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 4, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.8583
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.86      1.00      0.92        12
           2       0.78      0.88      0.82         8
           3       0.93      1.00      0.96        39
           4       0.86      1.00      0.92         6
           5       0.93      0.93      0.93        42
           6       1.00      0.56      0.72        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.91       127
   macro avg       0.82      0.76      0.77       127
weighted avg       0.92      0.91      0.90       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 452ms/step - accuracy: 0.3414 - loss: 3.8348 - val_accuracy: 0.0698 - val_loss: 2.3201
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.7944 - loss: 0.7199 - val_accuracy: 0.0756 - val_loss: 3.1882
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9122 - loss: 0.2324 - val_accuracy: 0.2035 - val_loss: 3.4370
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.9710 - loss: 0.1040 - val_accuracy: 0.2209 - val_loss: 3.1983
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9849 - loss: 0.0369 - val_accuracy: 0.1628 - val_loss: 3.8188
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 445ms/step - accuracy: 0.9903 - loss: 0.0353 - val_accuracy: 0.1453 - val_loss: 4.7787
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2581 - loss: 2.8247 - val_accuracy: 0.0291 - val_loss: 2.4337
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4343 - loss: 1.8260 - val_accuracy: 0.0349 - val_loss: 2.5522
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4985 - loss: 1.6124 - val_accuracy: 0.0523 - val_loss: 2.3083
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5355 - loss: 1.3835 - val_accuracy: 0.0988 - val_loss: 2.3849
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6369 - loss: 1.1683 - val_accuracy: 0.0988 - val_loss: 2.4664
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6759 - loss: 0.9763 - val_accuracy: 0.1395 - val_loss: 2.3259
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_4.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_4.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_4.png


In [5]:
# 5
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_5.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_5.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 5, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 5, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 5, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.8338
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.92      0.92      0.92        12
           2       0.75      0.75      0.75         8
           3       0.88      0.97      0.93        39
           4       0.75      1.00      0.86         6
           5       0.90      0.90      0.90        42
           6       1.00      0.62      0.77        16
           7       1.00      1.00      1.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.88       127
   macro avg       0.80      0.74      0.75       127
weighted avg       0.90      0.88      0.88       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 483ms/step - accuracy: 0.3230 - loss: 4.5187 - val_accuracy: 0.0465 - val_loss: 2.8063
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 473ms/step - accuracy: 0.7098 - loss: 0.8647 - val_accuracy: 0.1221 - val_loss: 2.8983
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.9054 - loss: 0.3098 - val_accuracy: 0.2442 - val_loss: 2.4629
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 473ms/step - accuracy: 0.9678 - loss: 0.1230 - val_accuracy: 0.1279 - val_loss: 3.5807
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 475ms/step - accuracy: 0.9574 - loss: 0.1531 - val_accuracy: 0.2151 - val_loss: 3.0083
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 474ms/step - accuracy: 0.9919 - loss: 0.0545 - val_accuracy: 0.1337 - val_loss: 3.8185
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2521 - loss: 2.9063 - val_accuracy: 0.0058 - val_loss: 2.6022
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3964 - loss: 2.0655 - val_accuracy: 0.0174 - val_loss: 2.4613
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5174 - loss: 1.6392 - val_accuracy: 0.0174 - val_loss: 2.2019
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5253 - loss: 1.4278 - val_accuracy: 0.0872 - val_loss: 2.1891
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6011 - loss: 1.3338 - val_accuracy: 0.0698 - val_loss: 2.0323
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6317 - loss: 1.0963 - val_accuracy: 0.0988 - val_loss: 2.2287
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_5.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_5.png


In [6]:
# 6
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_6.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_6.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 6, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 6, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 6, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.8515
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.85      0.92      0.88        12
           2       0.60      0.75      0.67         8
           3       0.90      0.95      0.93        39
           4       0.80      0.67      0.73         6
           5       0.86      0.90      0.88        42
           6       1.00      0.62      0.77        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.87       127
   macro avg       0.88      0.85      0.86       127
weighted avg       0.88      0.87      0.86       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 475ms/step - accuracy: 0.3451 - loss: 4.0240 - val_accuracy: 0.0523 - val_loss: 2.2499
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.6848 - loss: 0.9493 - val_accuracy: 0.0756 - val_loss: 2.8240
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.8532 - loss: 0.4339 - val_accuracy: 0.1628 - val_loss: 3.0397
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.9435 - loss: 0.2015 - val_accuracy: 0.2209 - val_loss: 4.1141
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.9692 - loss: 0.1078 - val_accuracy: 0.1337 - val_loss: 3.7899
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9600 - loss: 0.1026 - val_accuracy: 0.1860 - val_loss: 4.3408
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2873 - loss: 2.5736 - val_accuracy: 0.0233 - val_loss: 2.8559
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3530 - loss: 2.2973 - val_accuracy: 0.0581 - val_loss: 2.4302
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4839 - loss: 1.8054 - val_accuracy: 0.0930 - val_loss: 2.2817
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4679 - loss: 1.7242 - val_accuracy: 0.0523 - val_loss: 2.4575
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5673 - loss: 1.4466 - val_accuracy: 0.0988 - val_loss: 2.3586
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5677 - loss: 1.3217 - val_accuracy: 0.1105 - val_loss: 2.4280
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

In [7]:
# 7
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_7.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_7.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 7, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 7, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 7, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.8196
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.71      0.83      0.77        12
           2       0.50      0.62      0.56         8
           3       0.95      0.92      0.94        39
           4       0.56      0.83      0.67         6
           5       0.86      0.90      0.88        42
           6       0.88      0.44      0.58        16
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         2

    accuracy                           0.83       127
   macro avg       0.81      0.82      0.80       127
weighted avg       0.84      0.83      0.82       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 455ms/step - accuracy: 0.3390 - loss: 4.3702 - val_accuracy: 0.0407 - val_loss: 2.5612
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.5230 - loss: 1.3542 - val_accuracy: 0.1105 - val_loss: 2.3926
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.6806 - loss: 0.8163 - val_accuracy: 0.1337 - val_loss: 2.2983
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.8210 - loss: 0.5310 - val_accuracy: 0.1919 - val_loss: 2.4539
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.8998 - loss: 0.2897 - val_accuracy: 0.1744 - val_loss: 3.1041
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.9328 - loss: 0.1921 - val_accuracy: 0.2209 - val_loss: 3.4616
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2080 - loss: 3.2156 - val_accuracy: 0.0000e+00 - val_loss: 2.8425
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3386 - loss: 2.2752 - val_accuracy: 0.0407 - val_loss: 2.3915
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3834 - loss: 1.9804 - val_accuracy: 0.0349 - val_loss: 2.2293
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4258 - loss: 1.7569 - val_accuracy: 0.0581 - val_loss: 2.2863
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4781 - loss: 1.5993 - val_accuracy: 0.0407 - val_loss: 2.3461
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4789 - loss: 1.6369 - val_accuracy: 0.0698 - val_loss: 2.3324
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

In [8]:
# 8
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_8.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_8.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 8, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 8, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 8, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.7487
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.67      0.80        12
           2       0.55      0.75      0.63         8
           3       0.88      0.90      0.89        39
           4       0.50      0.83      0.62         6
           5       0.83      0.90      0.86        42
           6       1.00      0.44      0.61        16
           7       1.00      0.50      0.67         2
           8       1.00      1.00      1.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.80       127
   macro avg       0.67      0.60      0.61       127
weighted avg       0.85      0.80      0.81       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 476ms/step - accuracy: 0.2724 - loss: 4.0166 - val_accuracy: 0.0058 - val_loss: 2.1289
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.5365 - loss: 1.5164 - val_accuracy: 0.0465 - val_loss: 2.7745
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.6315 - loss: 1.0434 - val_accuracy: 0.1105 - val_loss: 2.4129
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.7587 - loss: 0.6534 - val_accuracy: 0.1279 - val_loss: 2.7158
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.8547 - loss: 0.4505 - val_accuracy: 0.2035 - val_loss: 2.6291
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.9223 - loss: 0.2350 - val_accuracy: 0.1802 - val_loss: 3.9344
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2374 - loss: 3.0176 - val_accuracy: 0.0174 - val_loss: 3.3099
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3311 - loss: 2.3314 - val_accuracy: 0.0465 - val_loss: 2.6623
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3648 - loss: 2.1688 - val_accuracy: 0.0523 - val_loss: 2.4003
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4234 - loss: 1.8515 - val_accuracy: 0.0291 - val_loss: 2.6486
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4621 - loss: 1.6446 - val_accuracy: 0.0174 - val_loss: 2.4739
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4881 - loss: 1.6019 - val_accuracy: 0.1105 - val_loss: 2.3982
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_8.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_8.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_8.png


In [9]:
# 9
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_9.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_9.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 9, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 9, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 9, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5487
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.50      0.60        12
           2       0.57      0.50      0.53         8
           3       0.72      0.87      0.79        39
           4       0.38      0.83      0.53         6
           5       0.83      0.81      0.82        42
           6       0.67      0.38      0.48        16
           7       0.00      0.00      0.00         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.71       127
   macro avg       0.55      0.49      0.49       127
weighted avg       0.72      0.71      0.70       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 458ms/step - accuracy: 0.2974 - loss: 6.2109 - val_accuracy: 0.0000e+00 - val_loss: 2.2231
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 462ms/step - accuracy: 0.4523 - loss: 1.6711 - val_accuracy: 0.0116 - val_loss: 2.8229
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.5132 - loss: 1.3840 - val_accuracy: 0.0814 - val_loss: 2.5347
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.5945 - loss: 1.1231 - val_accuracy: 0.0523 - val_loss: 3.1229
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 455ms/step - accuracy: 0.6873 - loss: 0.8365 - val_accuracy: 0.1453 - val_loss: 2.9547
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.8093 - loss: 0.5649 - val_accuracy: 0.1628 - val_loss: 3.3801
Epoch 7/50
[1m22/22[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2694 - loss: 2.8509 - val_accuracy: 0.0058 - val_loss: 2.7944
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3449 - loss: 2.2944 - val_accuracy: 0.0116 - val_loss: 2.5557
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3688 - loss: 2.0801 - val_accuracy: 0.0291 - val_loss: 2.5219
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4234 - loss: 1.9144 - val_accuracy: 0.0465 - val_loss: 2.2847
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3690 - loss: 2.0366 - val_accuracy: 0.0233 - val_loss: 2.5999
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4173 - loss: 1.8086 - val_accuracy: 0.0058 - val_loss: 2.5187
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_9.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_9.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_9.png


In [10]:
# 10
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_10.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_10.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 10, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 10, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 10, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5398
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.50      0.60        12
           2       0.71      0.62      0.67         8
           3       0.78      0.79      0.78        39
           4       0.27      0.67      0.38         6
           5       0.75      0.86      0.80        42
           6       0.86      0.38      0.52        16
           7       0.00      0.00      0.00         2
           8       0.50      0.50      0.50         2

    accuracy                           0.70       127
   macro avg       0.58      0.54      0.53       127
weighted avg       0.73      0.70      0.70       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 472ms/step - accuracy: 0.2757 - loss: 4.7828 - val_accuracy: 0.0000e+00 - val_loss: 2.7759
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.4572 - loss: 1.7003 - val_accuracy: 0.0000e+00 - val_loss: 2.9298
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 476ms/step - accuracy: 0.5370 - loss: 1.3971 - val_accuracy: 0.0407 - val_loss: 2.8730
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.5808 - loss: 1.1773 - val_accuracy: 0.0465 - val_loss: 2.5253
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 473ms/step - accuracy: 0.6822 - loss: 0.8718 - val_accuracy: 0.0756 - val_loss: 2.8583
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.7628 - loss: 0.7318 - val_accuracy: 0.0988 - val_loss: 3.6679
Epoch 7/50
[1m22/22[0m 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2146 - loss: 3.2860 - val_accuracy: 0.0349 - val_loss: 2.9832
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3124 - loss: 2.4058 - val_accuracy: 0.0000e+00 - val_loss: 2.8027
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3933 - loss: 2.0475 - val_accuracy: 0.0058 - val_loss: 2.6088
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3944 - loss: 2.0034 - val_accuracy: 0.0174 - val_loss: 2.3663
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4105 - loss: 1.8700 - val_accuracy: 0.0349 - val_loss: 2.3143
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4346 - loss: 1.8376 - val_accuracy: 0.0523 - val_loss: 2.3320
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_10.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_10.png


In [11]:
# 11
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_11.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_11.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 11, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 11, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 11, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.4900
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.70      0.58      0.64        12
           2       0.50      0.50      0.50         8
           3       0.76      0.67      0.71        39
           4       0.20      0.50      0.29         6
           5       0.73      0.86      0.79        42
           6       0.71      0.31      0.43        16
           7       0.00      0.00      0.00         2
           8       0.50      0.50      0.50         2
           9       0.00      0.00      0.00         0

    accuracy                           0.65       127
   macro avg       0.46      0.44      0.43       127
weighted avg       0.68      0.65      0.65       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 465ms/step - accuracy: 0.2907 - loss: 3.4062 - val_accuracy: 0.0000e+00 - val_loss: 2.7495
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.4909 - loss: 1.5957 - val_accuracy: 0.0349 - val_loss: 2.4128
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.5505 - loss: 1.2392 - val_accuracy: 0.0814 - val_loss: 2.5727
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 463ms/step - accuracy: 0.6935 - loss: 0.9231 - val_accuracy: 0.0988 - val_loss: 2.7482
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.7705 - loss: 0.7306 - val_accuracy: 0.1163 - val_loss: 2.6131
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.8282 - loss: 0.5314 - val_accuracy: 0.1628 - val_loss: 3.2095
Epoch 7/50
[1m22/22[0m [32

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1762 - loss: 3.5044 - val_accuracy: 0.0000e+00 - val_loss: 2.8180
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3127 - loss: 2.5184 - val_accuracy: 0.0058 - val_loss: 2.2327
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3329 - loss: 2.2163 - val_accuracy: 0.0174 - val_loss: 2.3811
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3828 - loss: 1.9606 - val_accuracy: 0.0174 - val_loss: 2.3299
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4035 - loss: 1.9056 - val_accuracy: 0.0174 - val_loss: 2.1521
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4242 - loss: 1.7939 - val_accuracy: 0.0233 - val_loss: 2.1145
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_11.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_11.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_11.png


In [12]:
# 12
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_12.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_12.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 12, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 12, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 12, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.4822
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.50      0.60        12
           2       0.75      0.38      0.50         8
           3       0.78      0.79      0.78        39
           4       0.29      0.67      0.40         6
           5       0.71      0.83      0.77        42
           6       0.43      0.19      0.26        16
           7       0.50      0.50      0.50         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.65       127
   macro avg       0.47      0.43      0.42       127
weighted avg       0.67      0.65      0.64       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 461ms/step - accuracy: 0.2949 - loss: 3.9075 - val_accuracy: 0.0058 - val_loss: 2.7900
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.4798 - loss: 1.5673 - val_accuracy: 0.0174 - val_loss: 2.7557
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.5315 - loss: 1.3506 - val_accuracy: 0.0698 - val_loss: 2.4298
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 470ms/step - accuracy: 0.6315 - loss: 1.0001 - val_accuracy: 0.1221 - val_loss: 2.5666
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 472ms/step - accuracy: 0.7424 - loss: 0.7143 - val_accuracy: 0.2442 - val_loss: 2.5853
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 464ms/step - accuracy: 0.8302 - loss: 0.5185 - val_accuracy: 0.1686 - val_loss: 3.2836
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2088 - loss: 3.3078 - val_accuracy: 0.0000e+00 - val_loss: 2.5622
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3525 - loss: 2.2661 - val_accuracy: 0.0058 - val_loss: 2.4600
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3878 - loss: 2.0376 - val_accuracy: 0.0000e+00 - val_loss: 2.4237
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3908 - loss: 1.9166 - val_accuracy: 0.0058 - val_loss: 2.3339
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3923 - loss: 2.0309 - val_accuracy: 0.0116 - val_loss: 2.3024
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3646 - loss: 1.9734 - val_accuracy: 0.0116 - val_loss: 2.2655
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_12.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_12.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_12.png


In [13]:
# 13
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_13.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_13.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 13, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 13, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 13, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.3775
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.75      0.50      0.60        12
           2       0.50      0.38      0.43         8
           3       0.81      0.74      0.77        39
           4       0.15      0.33      0.21         6
           5       0.70      0.88      0.78        42
           6       0.50      0.19      0.27        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.63       127
   macro avg       0.34      0.30      0.31       127
weighted avg       0.65      0.63      0.62       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 474ms/step - accuracy: 0.3071 - loss: 3.9299 - val_accuracy: 0.0116 - val_loss: 2.5443
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 467ms/step - accuracy: 0.4514 - loss: 1.6842 - val_accuracy: 0.0116 - val_loss: 2.9252
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 468ms/step - accuracy: 0.4811 - loss: 1.4988 - val_accuracy: 0.0233 - val_loss: 2.8221
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 447ms/step - accuracy: 0.5767 - loss: 1.2187 - val_accuracy: 0.1570 - val_loss: 2.0905
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 0.6634 - loss: 0.9731 - val_accuracy: 0.2151 - val_loss: 2.0955
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.7241 - loss: 0.8229 - val_accuracy: 0.1570 - val_loss: 3.0190
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2650 - loss: 3.0640 - val_accuracy: 0.0116 - val_loss: 3.2583
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3338 - loss: 2.5548 - val_accuracy: 0.0174 - val_loss: 2.5466
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3967 - loss: 2.1375 - val_accuracy: 0.0058 - val_loss: 2.7908
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3681 - loss: 1.9768 - val_accuracy: 0.0058 - val_loss: 2.3714
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4065 - loss: 1.8485 - val_accuracy: 0.0058 - val_loss: 2.4773
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4171 - loss: 1.8270 - val_accuracy: 0.0116 - val_loss: 2.5308
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_13.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_13.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_13.png


In [1]:
# 14
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_14.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_14.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 14, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 14, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 14, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
2025-01-05 20:57:06.645757: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-05 20:57:06.725607: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-05 20:57:06.750060: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 20:57:06.914037: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instruct

SVM Accuracy: 0.3439
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.71      0.42      0.53        12
           2       0.25      0.25      0.25         8
           3       0.79      0.77      0.78        39
           4       0.17      0.33      0.22         6
           5       0.68      0.86      0.76        42
           6       0.50      0.12      0.20        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.61       127
   macro avg       0.31      0.28      0.27       127
weighted avg       0.62      0.61      0.59       127



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2025-01-05 20:57:10.283733: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 452ms/step - accuracy: 0.2842 - loss: 3.8584 - val_accuracy: 0.0058 - val_loss: 2.0602
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.5022 - loss: 1.6013 - val_accuracy: 0.0233 - val_loss: 2.3054
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.5621 - loss: 1.3155 - val_accuracy: 0.0640 - val_loss: 2.0918
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.6586 - loss: 1.0818 - val_accuracy: 0.1686 - val_loss: 1.8955
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.6472 - loss: 0.9204 - val_accuracy: 0.1221 - val_loss: 2.8175
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.7476 - loss: 0.6975 - val_accuracy: 0.2093 - val_loss: 2.8909
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2316 - loss: 3.1502 - val_accuracy: 0.0000e+00 - val_loss: 2.4247
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3226 - loss: 2.4349 - val_accuracy: 0.0174 - val_loss: 2.4491
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3504 - loss: 2.2777 - val_accuracy: 0.0116 - val_loss: 2.5054
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3627 - loss: 2.1083 - val_accuracy: 0.0000e+00 - val_loss: 2.3759
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3997 - loss: 1.8812 - val_accuracy: 0.0000e+00 - val_loss: 2.4560
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4022 - loss: 1.8625 - val_accuracy: 0.0058 - val_loss: 2.3418
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_14.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_14.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_14.png


In [2]:
# 15
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_15.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_15.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 15, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 15, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 15, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.4115
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.67      0.50      0.57        12
           2       0.43      0.38      0.40         8
           3       0.74      0.79      0.77        39
           4       0.21      0.50      0.30         6
           5       0.76      0.81      0.78        42
           6       0.71      0.31      0.43        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.65       127
   macro avg       0.39      0.37      0.36       127
weighted avg       0.67      0.65      0.64       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 453ms/step - accuracy: 0.2152 - loss: 4.6455 - val_accuracy: 0.0000e+00 - val_loss: 2.8770
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.5129 - loss: 1.5733 - val_accuracy: 0.0000e+00 - val_loss: 2.6553
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.5052 - loss: 1.4262 - val_accuracy: 0.0407 - val_loss: 2.4934
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.5651 - loss: 1.2203 - val_accuracy: 0.1453 - val_loss: 2.5222
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.6758 - loss: 0.9729 - val_accuracy: 0.1105 - val_loss: 3.1977
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.7077 - loss: 0.7721 - val_accuracy: 0.2093 - val_loss: 3.2144
Epoch 7/50
[1m22/22[0m 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2224 - loss: 3.1316 - val_accuracy: 0.0000e+00 - val_loss: 2.9279
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3306 - loss: 2.5625 - val_accuracy: 0.0116 - val_loss: 2.6278
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3479 - loss: 2.1855 - val_accuracy: 0.0058 - val_loss: 2.5334
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4123 - loss: 2.0394 - val_accuracy: 0.0116 - val_loss: 2.3918
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4045 - loss: 1.9535 - val_accuracy: 0.0233 - val_loss: 2.4003
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4191 - loss: 1.8088 - val_accuracy: 0.0407 - val_loss: 2.3521
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_15.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_15.png


In [3]:
# 16
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_16.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_16.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 16, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 16, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 16, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5032
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.55      0.50      0.52        12
           2       0.67      0.25      0.36         8
           3       0.71      0.82      0.76        39
           4       0.22      0.33      0.27         6
           5       0.71      0.81      0.76        42
           6       0.83      0.31      0.45        16
           7       1.00      0.50      0.67         2
           8       0.33      0.50      0.40         2
           9       0.00      0.00      0.00         0

    accuracy                           0.65       127
   macro avg       0.56      0.45      0.47       127
weighted avg       0.68      0.65      0.64       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 451ms/step - accuracy: 0.2669 - loss: 4.5356 - val_accuracy: 0.0000e+00 - val_loss: 2.5710
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.5033 - loss: 1.5998 - val_accuracy: 0.0116 - val_loss: 2.3339
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.5066 - loss: 1.3654 - val_accuracy: 0.0581 - val_loss: 2.4038
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 448ms/step - accuracy: 0.6377 - loss: 1.0798 - val_accuracy: 0.1512 - val_loss: 2.1497
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.6929 - loss: 0.9301 - val_accuracy: 0.1570 - val_loss: 2.1082
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.7857 - loss: 0.6668 - val_accuracy: 0.1337 - val_loss: 2.7638
Epoch 7/50
[1m22/22[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1731 - loss: 3.2952 - val_accuracy: 0.0116 - val_loss: 2.6510
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3002 - loss: 2.6515 - val_accuracy: 0.0058 - val_loss: 2.4236
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3509 - loss: 2.2134 - val_accuracy: 0.0174 - val_loss: 2.5788
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4079 - loss: 2.0317 - val_accuracy: 0.0058 - val_loss: 2.5141
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4219 - loss: 1.8484 - val_accuracy: 0.0233 - val_loss: 2.5090
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4245 - loss: 1.7733 - val_accuracy: 0.0174 - val_loss: 2.3958
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_16.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_16.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_16.png


In [4]:
# 17
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_17.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_17.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 17, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 17, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 17, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5388
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.75      0.50      0.60        12
           2       0.60      0.38      0.46         8
           3       0.80      0.85      0.82        39
           4       0.36      0.83      0.50         6
           5       0.74      0.88      0.80        42
           6       0.86      0.38      0.52        16
           7       1.00      0.50      0.67         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.72       127
   macro avg       0.57      0.48      0.49       127
weighted avg       0.74      0.72      0.70       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 450ms/step - accuracy: 0.3054 - loss: 3.1953 - val_accuracy: 0.0174 - val_loss: 2.2094
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.5486 - loss: 1.3946 - val_accuracy: 0.0814 - val_loss: 2.6174
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.6069 - loss: 1.0099 - val_accuracy: 0.1047 - val_loss: 2.6413
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 447ms/step - accuracy: 0.7193 - loss: 0.7782 - val_accuracy: 0.1453 - val_loss: 2.9688
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 446ms/step - accuracy: 0.8052 - loss: 0.5499 - val_accuracy: 0.2151 - val_loss: 2.7136
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.8838 - loss: 0.3274 - val_accuracy: 0.1744 - val_loss: 3.8213
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2420 - loss: 3.1184 - val_accuracy: 0.0349 - val_loss: 3.0583
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3710 - loss: 2.1759 - val_accuracy: 0.0116 - val_loss: 2.6169
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3557 - loss: 2.0068 - val_accuracy: 0.0058 - val_loss: 2.7511
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4255 - loss: 2.0257 - val_accuracy: 0.0407 - val_loss: 2.7033
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4341 - loss: 1.8159 - val_accuracy: 0.0465 - val_loss: 2.4810
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4111 - loss: 1.8269 - val_accuracy: 0.0291 - val_loss: 2.2728
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_17.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_17.png


In [5]:
# 18
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_18.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_18.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 18, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 18, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 18, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5751
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.62      0.42      0.50        12
           2       0.50      0.38      0.43         8
           3       0.76      0.74      0.75        39
           4       0.26      0.83      0.40         6
           5       0.82      0.86      0.84        42
           6       0.86      0.38      0.52        16
           7       1.00      0.50      0.67         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.68       127
   macro avg       0.58      0.46      0.48       127
weighted avg       0.75      0.68      0.69       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 444ms/step - accuracy: 0.2598 - loss: 3.8955 - val_accuracy: 0.0058 - val_loss: 2.3690
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 440ms/step - accuracy: 0.5060 - loss: 1.5153 - val_accuracy: 0.0233 - val_loss: 2.5008
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 440ms/step - accuracy: 0.5458 - loss: 1.3061 - val_accuracy: 0.1570 - val_loss: 1.9506
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.6616 - loss: 0.9641 - val_accuracy: 0.1919 - val_loss: 2.2145
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.7239 - loss: 0.7394 - val_accuracy: 0.2442 - val_loss: 2.5238
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.8244 - loss: 0.4999 - val_accuracy: 0.1860 - val_loss: 3.0637
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2298 - loss: 3.0742 - val_accuracy: 0.0291 - val_loss: 2.8174
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3372 - loss: 2.2960 - val_accuracy: 0.0000e+00 - val_loss: 2.6890
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3692 - loss: 2.1691 - val_accuracy: 0.0291 - val_loss: 2.4989
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3888 - loss: 1.8819 - val_accuracy: 0.0233 - val_loss: 2.6287
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4029 - loss: 1.8055 - val_accuracy: 0.0465 - val_loss: 2.6485
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4225 - loss: 1.7390 - val_accuracy: 0.0407 - val_loss: 2.5436
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_18.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_18.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_18.png


In [6]:
# 19
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_19.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_19.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 19, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 19, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 19, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5413
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.60      0.50      0.55        12
           2       0.43      0.38      0.40         8
           3       0.76      0.79      0.78        39
           4       0.20      0.50      0.29         6
           5       0.79      0.79      0.79        42
           6       0.67      0.38      0.48        16
           7       1.00      0.50      0.67         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.66       127
   macro avg       0.60      0.48      0.51       127
weighted avg       0.70      0.66      0.67       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 444ms/step - accuracy: 0.3140 - loss: 3.6957 - val_accuracy: 0.0000e+00 - val_loss: 2.4701
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 439ms/step - accuracy: 0.4972 - loss: 1.5803 - val_accuracy: 0.0407 - val_loss: 3.0212
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 440ms/step - accuracy: 0.5362 - loss: 1.2966 - val_accuracy: 0.0756 - val_loss: 2.6015
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 441ms/step - accuracy: 0.6988 - loss: 0.9618 - val_accuracy: 0.1570 - val_loss: 2.3676
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 444ms/step - accuracy: 0.7691 - loss: 0.6551 - val_accuracy: 0.1686 - val_loss: 2.6770
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 442ms/step - accuracy: 0.8309 - loss: 0.5057 - val_accuracy: 0.2093 - val_loss: 2.8571
Epoch 7/50
[1m22/22[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2150 - loss: 3.2654 - val_accuracy: 0.0116 - val_loss: 2.5229
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3480 - loss: 2.3276 - val_accuracy: 0.0058 - val_loss: 2.7560
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3710 - loss: 2.1490 - val_accuracy: 0.0116 - val_loss: 2.4643
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3974 - loss: 1.9453 - val_accuracy: 0.0058 - val_loss: 2.4075
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3797 - loss: 2.0073 - val_accuracy: 0.0116 - val_loss: 2.3601
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3716 - loss: 1.8899 - val_accuracy: 0.0058 - val_loss: 2.6340
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_19.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_19.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_19.png


In [7]:
# 20
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_20.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_20.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 20, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 20, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 20, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5505
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.56      0.42      0.48        12
           2       0.25      0.25      0.25         8
           3       0.83      0.74      0.78        39
           4       0.25      0.83      0.38         6
           5       0.75      0.79      0.77        42
           6       0.67      0.38      0.48        16
           7       1.00      0.50      0.67         2
           8       1.00      0.50      0.67         2

    accuracy                           0.65       127
   macro avg       0.66      0.55      0.56       127
weighted avg       0.70      0.65      0.65       127

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 457ms/step - accuracy: 0.2870 - loss: 3.8551 - val_accuracy: 0.0058 - val_loss: 2.2059
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 449ms/step - accuracy: 0.4545 - loss: 1.6132 - val_accuracy: 0.0349 - val_loss: 2.4834
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.5361 - loss: 1.3366 - val_accuracy: 0.0465 - val_loss: 2.7706
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 450ms/step - accuracy: 0.6221 - loss: 1.1195 - val_accuracy: 0.1395 - val_loss: 2.1658
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.6560 - loss: 0.9193 - val_accuracy: 0.1453 - val_loss: 2.6042
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.7507 - loss: 0.7021 - val_accuracy: 0.1977 - val_loss: 2.2942
Epoch 7/50
[1m22/22[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2105 - loss: 3.2625 - val_accuracy: 0.0407 - val_loss: 2.4850
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2802 - loss: 2.3800 - val_accuracy: 0.0116 - val_loss: 2.5702
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3525 - loss: 2.1533 - val_accuracy: 0.0116 - val_loss: 2.3782
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3277 - loss: 2.0757 - val_accuracy: 0.0116 - val_loss: 2.4322
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4434 - loss: 1.8279 - val_accuracy: 0.0349 - val_loss: 2.4013
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4072 - loss: 1.8910 - val_accuracy: 0.0756 - val_loss: 2.2716
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_20.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_20.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_20.png


In [8]:
# 21
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_21.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_21.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 21, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 21, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 21, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.5267
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.50      0.50      0.50        12
           2       0.44      0.50      0.47         8
           3       0.78      0.74      0.76        39
           4       0.14      0.33      0.20         6
           5       0.76      0.76      0.76        42
           6       0.75      0.38      0.50        16
           7       0.50      0.50      0.50         2
           8       1.00      0.50      0.67         2
           9       0.00      0.00      0.00         0

    accuracy                           0.64       127
   macro avg       0.49      0.42      0.44       127
weighted avg       0.69      0.64      0.65       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 462ms/step - accuracy: 0.3099 - loss: 3.9819 - val_accuracy: 0.0000e+00 - val_loss: 2.6688
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.4431 - loss: 1.5875 - val_accuracy: 0.0407 - val_loss: 2.4858
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.5212 - loss: 1.4087 - val_accuracy: 0.0233 - val_loss: 2.2029
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 452ms/step - accuracy: 0.5943 - loss: 1.1875 - val_accuracy: 0.1105 - val_loss: 2.1159
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.6297 - loss: 0.9838 - val_accuracy: 0.0814 - val_loss: 2.5447
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 453ms/step - accuracy: 0.7185 - loss: 0.7757 - val_accuracy: 0.1337 - val_loss: 2.5719
Epoch 7/50
[1m22/22[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.2282 - loss: 3.2605 - val_accuracy: 0.0000e+00 - val_loss: 2.6761
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3024 - loss: 2.4622 - val_accuracy: 0.0058 - val_loss: 2.7240
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3323 - loss: 2.2003 - val_accuracy: 0.0174 - val_loss: 2.5775
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3541 - loss: 2.1571 - val_accuracy: 0.0000e+00 - val_loss: 2.4777
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3686 - loss: 2.1739 - val_accuracy: 0.0000e+00 - val_loss: 2.4557
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3634 - loss: 1.9610 - val_accuracy: 0.0058 - val_loss: 2.4849
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_21.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_21.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_21.png


In [9]:
# 22
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_22.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_22.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 22, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 22, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 22, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.3522
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.57      0.33      0.42        12
           2       0.30      0.38      0.33         8
           3       0.68      0.69      0.68        39
           4       0.19      0.50      0.27         6
           5       0.68      0.67      0.67        42
           6       0.50      0.25      0.33        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.54       127
   macro avg       0.29      0.28      0.27       127
weighted avg       0.58      0.54      0.55       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 471ms/step - accuracy: 0.2913 - loss: 4.1635 - val_accuracy: 0.0000e+00 - val_loss: 2.4602
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 469ms/step - accuracy: 0.4595 - loss: 1.6235 - val_accuracy: 0.0407 - val_loss: 2.2774
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 471ms/step - accuracy: 0.5030 - loss: 1.4417 - val_accuracy: 0.0581 - val_loss: 2.0151
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.5552 - loss: 1.2451 - val_accuracy: 0.0465 - val_loss: 2.7645
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 466ms/step - accuracy: 0.6128 - loss: 1.0340 - val_accuracy: 0.1163 - val_loss: 2.5098
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 465ms/step - accuracy: 0.7022 - loss: 0.8553 - val_accuracy: 0.1047 - val_loss: 2.4565
Epoch 7/50
[1m22/22[0m [32

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2035 - loss: 3.2690 - val_accuracy: 0.0058 - val_loss: 2.4389
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3359 - loss: 2.3223 - val_accuracy: 0.0000e+00 - val_loss: 2.5198
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3155 - loss: 2.2992 - val_accuracy: 0.0000e+00 - val_loss: 2.2208
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3495 - loss: 2.0368 - val_accuracy: 0.0058 - val_loss: 2.2786
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3706 - loss: 1.9459 - val_accuracy: 0.0000e+00 - val_loss: 2.5431
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3354 - loss: 2.0569 - val_accuracy: 0.0058 - val_loss: 2.2802
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_22.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_22.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_22.png


In [10]:
# 23
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_23.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_23.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 23, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 23, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 23, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.3360
SVM Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       0.56      0.42      0.48        12
           2       0.30      0.38      0.33         8
           3       0.68      0.72      0.70        39
           4       0.15      0.33      0.21         6
           5       0.57      0.60      0.58        42
           6       0.67      0.25      0.36        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2
           9       0.00      0.00      0.00         0

    accuracy                           0.53       127
   macro avg       0.29      0.27      0.27       127
weighted avg       0.56      0.53      0.53       127

Epoch 1/50


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 464ms/step - accuracy: 0.2816 - loss: 3.3883 - val_accuracy: 0.0116 - val_loss: 2.2798
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.4538 - loss: 1.7131 - val_accuracy: 0.0233 - val_loss: 2.5602
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.4871 - loss: 1.4556 - val_accuracy: 0.0174 - val_loss: 2.7055
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.4697 - loss: 1.4157 - val_accuracy: 0.0291 - val_loss: 2.5480
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 454ms/step - accuracy: 0.6078 - loss: 1.1130 - val_accuracy: 0.1570 - val_loss: 2.1182
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 456ms/step - accuracy: 0.6367 - loss: 0.9539 - val_accuracy: 0.0814 - val_loss: 2.9597
Epoch 7/50
[1m22/22[0m [32m━━━

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.2386 - loss: 3.1530 - val_accuracy: 0.0058 - val_loss: 2.6531
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3189 - loss: 2.7062 - val_accuracy: 0.0000e+00 - val_loss: 2.6421
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3140 - loss: 2.4423 - val_accuracy: 0.0058 - val_loss: 2.4547
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3429 - loss: 2.0465 - val_accuracy: 0.0058 - val_loss: 2.3697
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3621 - loss: 2.0608 - val_accuracy: 0.0058 - val_loss: 2.3244
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3611 - loss: 1.9677 - val_accuracy: 0.0000e+00 - val_loss: 2.7932
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_23.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_23.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_23.png


In [1]:
# 24
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
train_data = np.load('../Files/extracted_features/data2vec_large/train_24.npz')
test_data = np.load('../Files/extracted_features/data2vec_large/test_24.npz')

train_features = train_data['features']  # Ensure these have 1024 features
test_features = test_data['features']    # Ensure these have 1024 features

# Read Age labels instead of Gender
train_labels = pd.read_csv('../Files/labels/y_train.csv').Age
test_labels = pd.read_csv('../Files/labels/y_test.csv').Age

# Encode the labels
label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)

# Handle unseen labels in test set by filtering out labels not present in train set
test_labels_filtered = test_labels[test_labels.isin(train_labels.unique())]
test_features_filtered = test_features[test_labels.isin(train_labels.unique())]
test_labels_encoded = label_encoder.transform(test_labels_filtered)

# Standardize features
scaler = StandardScaler()
train_features_normalized = scaler.fit_transform(train_features)
test_features_normalized = scaler.transform(test_features_filtered)

# SVM model
svm_model = SVC(kernel='linear', C=10.0, gamma='scale', random_state=42)
svm_model.fit(train_features_normalized, train_labels_encoded)

# SVM Prediction and Evaluation
svm_predictions = svm_model.predict(test_features_normalized)
svm_accuracy = balanced_accuracy_score(test_labels_encoded, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy:.4f}")
print("SVM Classification Report:")
print(classification_report(test_labels_encoded, svm_predictions))

# CNN model (TensorFlow)
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout

train_features_cnn = np.expand_dims(train_features_normalized, axis=-1)  # Shape: (num_samples, 1024, 1)
test_features_cnn = np.expand_dims(test_features_normalized, axis=-1)    # Shape: (num_samples, 1024, 1)

cnn_model = Sequential([
    Conv1D(64, kernel_size=5, activation='relu', input_shape=(1024, 1)),  # Updated input shape to 1024
    Conv1D(128, kernel_size=5, activation='relu'),
    Conv1D(256, kernel_size=5, activation='relu'),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

cnn_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

cnn_model.fit(train_features_cnn, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# CNN Prediction and Evaluation
cnn_predictions = np.argmax(cnn_model.predict(test_features_cnn), axis=-1)
cnn_accuracy = balanced_accuracy_score(test_labels_encoded, cnn_predictions)
print(f"CNN Accuracy: {cnn_accuracy:.4f}")
print("CNN Classification Report:")
print(classification_report(test_labels_encoded, cnn_predictions))

# ANN model (TensorFlow)
ann_model = Sequential([
    Dense(512, activation='relu', input_shape=(1024,)),  # Updated input shape to 1024
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(label_encoder.classes_), activation='softmax')
])

ann_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

ann_model.fit(train_features_normalized, train_labels_encoded, epochs=50, batch_size=32, validation_split=0.2)

# ANN Prediction and Evaluation
ann_predictions = np.argmax(ann_model.predict(test_features_normalized), axis=-1)
ann_accuracy = balanced_accuracy_score(test_labels_encoded, ann_predictions)
print(f"ANN Accuracy: {ann_accuracy:.4f}")
print("ANN Classification Report:")
print(classification_report(test_labels_encoded, ann_predictions))

# Function to save confusion matrix as heatmap
def save_confusion_matrix(conf_matrix, labels, model_name, layer_number, folder_name):
    os.makedirs(folder_name, exist_ok=True)  # Ensure the folder exists
    plt.figure(figsize=(10, 8))
    sns.set(font_scale=1.2)

    # Fixed design for confusion matrix
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='viridis', xticklabels=labels, yticklabels=labels)

    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.tight_layout()
    save_path = os.path.join(folder_name, f'conf_matrix_layer_{layer_number}.png')
    plt.savefig(save_path)
    plt.close()
    print(f"Saved confusion matrix for {model_name}: {save_path}")

# SVM Confusion Matrix
svm_conf_matrix = confusion_matrix(test_labels_encoded, svm_predictions)
save_confusion_matrix(svm_conf_matrix, label_encoder.classes_, "SVM", 24, "../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix")

# CNN Confusion Matrix
cnn_conf_matrix = confusion_matrix(test_labels_encoded, cnn_predictions)
save_confusion_matrix(cnn_conf_matrix, label_encoder.classes_, "CNN", 24, "../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix")

# ANN Confusion Matrix
ann_conf_matrix = confusion_matrix(test_labels_encoded, ann_predictions)
save_confusion_matrix(ann_conf_matrix, label_encoder.classes_, "ANN", 24, "../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix")


SVM Accuracy: 0.3067
SVM Classification Report:
              precision    recall  f1-score   support

           1       0.50      0.33      0.40        12
           2       0.17      0.25      0.20         8
           3       0.58      0.56      0.57        39
           4       0.21      0.50      0.30         6
           5       0.62      0.62      0.62        42
           6       0.33      0.19      0.24        16
           7       0.00      0.00      0.00         2
           8       0.00      0.00      0.00         2

    accuracy                           0.47       127
   macro avg       0.30      0.31      0.29       127
weighted avg       0.49      0.47      0.48       127



2025-01-05 22:25:37.161330: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-05 22:25:37.234212: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-05 22:25:37.254955: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-05 22:25:37.407468: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(activity_regularizer=activity_regu

Epoch 1/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 470ms/step - accuracy: 0.2779 - loss: 4.6104 - val_accuracy: 0.0058 - val_loss: 2.7521
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 460ms/step - accuracy: 0.4907 - loss: 1.5218 - val_accuracy: 0.0233 - val_loss: 2.5675
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 457ms/step - accuracy: 0.5324 - loss: 1.3802 - val_accuracy: 0.0349 - val_loss: 2.9781
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - accuracy: 0.5523 - loss: 1.2030 - val_accuracy: 0.1337 - val_loss: 2.3584
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - accuracy: 0.6701 - loss: 0.9909 - val_accuracy: 0.0930 - val_loss: 3.2427
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - accuracy: 0.7584 - loss: 0.7117 - val_accuracy: 0.1453 - val_loss: 3.1825
Epoch 7/50
[1m22/22[

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.1911 - loss: 3.2074 - val_accuracy: 0.0058 - val_loss: 2.9325
Epoch 2/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3272 - loss: 2.3270 - val_accuracy: 0.0058 - val_loss: 2.6409
Epoch 3/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3454 - loss: 2.1073 - val_accuracy: 0.0116 - val_loss: 2.5474
Epoch 4/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4065 - loss: 2.0138 - val_accuracy: 0.0407 - val_loss: 2.2987
Epoch 5/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3978 - loss: 1.9133 - val_accuracy: 0.0116 - val_loss: 2.6510
Epoch 6/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4190 - loss: 1.7967 - val_accuracy: 0.0465 - val_loss: 2.3671
Epoch 7/50
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved confusion matrix for SVM: ../Files/confusion_matrix/balanced_accuracy_data2vec/svm_conf_matrix/conf_matrix_layer_24.png
Saved confusion matrix for CNN: ../Files/confusion_matrix/balanced_accuracy_data2vec/cnn_conf_matrix/conf_matrix_layer_24.png
Saved confusion matrix for ANN: ../Files/confusion_matrix/balanced_accuracy_data2vec/ann_conf_matrix/conf_matrix_layer_24.png
