In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Load the data
X_train = pd.read_csv('hubert_features_train_3.csv').values  # Load train features from CSV
X_test = pd.read_csv('hubert_features_test_3.csv').values  # Load test features from CSV
y_train = pd.read_csv('y_train_age_group.csv')  # CSV containing Age, Gender, and Age_Group columns
y_test = pd.read_csv('y_test_age_group.csv')

# Convert gender labels to binary (m -> 0, f -> 1)
y_train['Gender'] = y_train['Gender'].map({'m': 0, 'f': 1})
y_test['Gender'] = y_test['Gender'].map({'m': 0, 'f': 1})

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to build the CNN model
def build_model(input_shape, num_classes):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Conv1D(32, kernel_size=5, activation='relu', input_shape=input_shape))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Conv1D(64, kernel_size=5, activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Conv1D(128, kernel_size=5, activation='relu'))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(512, activation='relu'))
    model.add(tf.keras.layers.Dense(num_classes))  # Output layer for regression or binary classification
    return model

# Filter and evaluate based on Age Group
age_groups = ['4-6', '7-10', '11-14']

for group in age_groups:
    # Filter train and test data for the current age group
    train_group = y_train[y_train['Age_Group'] == group]
    test_group = y_test[y_test['Age_Group'] == group]
    
    X_train_group = X_train[train_group.index].reshape(-1, 1024, 1)  # Reshape for Conv1D
    X_test_group = X_test[test_group.index].reshape(-1, 1024, 1)
    
    y_train_age = train_group['Age'].values
    y_test_age = test_group['Age'].values
    
    y_train_gender = train_group['Gender'].values
    y_test_gender = test_group['Gender'].values
    
    # Build and compile models
    model_age = build_model((1024, 1), 1)  # Regression for age
    model_gender = build_model((1024, 1), 1)  # Binary classification for gender

    model_age.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    model_gender.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Train models
    model_age.fit(X_train_group, y_train_age, epochs=50, batch_size=32, verbose=1)
    model_gender.fit(X_train_group, y_train_gender, epochs=50, batch_size=32, verbose=1)

    # Evaluate age prediction
    y_pred_age = model_age.predict(X_test_group).flatten()
    age_accuracy = accuracy_score(np.round(y_test_age), np.round(y_pred_age))

    # Evaluate gender prediction
    y_pred_gender = model_gender.predict(X_test_group).flatten()
    y_pred_gender = (y_pred_gender > 0.5).astype(int)  # Convert logits to binary class
    gender_accuracy = accuracy_score(y_test_gender, y_pred_gender)

    # Print results
    print(f"\nAge Group: {group}")
    print(f"Age classification accuracy: {age_accuracy * 100:.2f}%")
    print(f"Gender classification accuracy: {gender_accuracy * 100:.2f}%")
    
    # Classification Reports
    print("\nAge Classification Report:")
    age_report = classification_report(np.round(y_test_age), np.round(y_pred_age), output_dict=True)
    print(classification_report(np.round(y_test_age), np.round(y_pred_age)))

    print("\nGender Classification Report:")
    gender_report = classification_report(y_test_gender, y_pred_gender, output_dict=True)
    print(classification_report(y_test_gender, y_pred_gender))
    
    # Individual accuracies for age
    print("\nIndividual Age Accuracies:")
    for age in np.unique(y_test_age):
        age_count = np.sum(y_test_age == age)
        correct_count = np.sum(np.round(y_pred_age)[y_test_age == age])
        individual_age_accuracy = correct_count / age_count * 100 if age_count > 0 else 0
        print(f"Age {age}: {individual_age_accuracy:.2f}%")

    # Individual accuracies for gender
    cm_gender = confusion_matrix(y_test_gender, y_pred_gender)
    gender_accuracies = {
        'm': cm_gender[0, 0] / (cm_gender[0, 0] + cm_gender[0, 1]) * 100,
        'f': cm_gender[1, 1] / (cm_gender[1, 0] + cm_gender[1, 1]) * 100
    }
    print("\nIndividual Gender Accuracies:")
    for gender, accuracy in gender_accuracies.items():
        print(f"Gender {gender}: {accuracy:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 439ms/step - loss: 65108.5117 - mae: 147.9364
Epoch 2/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 459ms/step - loss: 964.0497 - mae: 19.9554
Epoch 3/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 454ms/step - loss: 2029.3983 - mae: 24.2520
Epoch 4/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 458ms/step - loss: 859.0090 - mae: 17.4905
Epoch 5/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 458ms/step - loss: 244.0408 - mae: 10.6437
Epoch 6/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 457ms/step - loss: 122.0374 - mae: 8.6768
Epoch 7/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 437ms/step - loss: 74.2539 - mae: 6.8013
Epoch 8/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 458ms/step - loss: 60.8305 - mae: 5.8859
Epoch 9/50
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize

Epoch 1/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 457ms/step - loss: 2827.4690 - mae: 33.5612
Epoch 2/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 459ms/step - loss: 2625.6055 - mae: 9.5870
Epoch 3/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - loss: 79.1007 - mae: 5.8869
Epoch 4/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - loss: 11.8639 - mae: 2.5510
Epoch 5/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - loss: 5.1733 - mae: 1.5909
Epoch 6/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 458ms/step - loss: 2.7129 - mae: 0.9946
Epoch 7/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - loss: 1.4095 - mae: 0.7442
Epoch 8/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 461ms/step - loss: 0.8999 - mae: 0.6346
Epoch 9/50
[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 486ms/step - loss: 41157.6055 - mae: 111.2211
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 480ms/step - loss: 801.6915 - mae: 18.1424
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 490ms/step - loss: 1186.6736 - mae: 20.0702
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 490ms/step - loss: 662.6646 - mae: 15.8090
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 479ms/step - loss: 272.1086 - mae: 14.0408
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 482ms/step - loss: 191.9098 - mae: 13.0886
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 477ms/step - loss: 190.7247 - mae: 12.9870
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 480ms/step - loss: 143.3752 - mae: 11.4875
Epoch 9/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize