In [2]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

# Load the data
train_features = pd.read_csv('hubert_features_train_3.csv'  ).values  # (samples, 1024)
test_features = pd.read_csv('hubert_features_test_3.csv').values    # (samples, 1024)
train_labels = pd.read_csv('y_train_age_group.csv')['Age_Group']
test_labels = pd.read_csv('y_test_age_group.csv')['Age_Group']

# Encode the age groups
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)  # Convert '4-6', '7-9', '10-14' to numerical values
test_labels = label_encoder.transform(test_labels)

# Reshape the features for 1D CNN input
train_features = train_features.reshape(-1, 1024, 1)
test_features = test_features.reshape(-1, 1024, 1)

# Build the CNN model
def create_cnn_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')  # 3 output neurons for 3 age groups
    ])
    return model

# Initialize the model
input_shape = (1024, 1)
model = create_cnn_model(input_shape)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_features, train_labels, epochs=20, batch_size=32, validation_data=(test_features, test_labels))

# Evaluate the model
predictions = model.predict(test_features)
predicted_labels = np.argmax(predictions, axis=1)

# Calculate and print classification report and overall accuracy
print("Classification Report:")
print(classification_report(test_labels, predicted_labels, target_names=label_encoder.classes_))

# Calculate individual accuracy for each age group
age_groups = label_encoder.classes_
individual_accuracies = {}

for idx, age_group in enumerate(age_groups):
    group_indices = (test_labels == idx)  # Find all indices of this specific age group
    group_accuracy = accuracy_score(test_labels[group_indices], predicted_labels[group_indices])
    individual_accuracies[age_group] = group_accuracy * 100

# Print individual accuracies
for age_group, accuracy in individual_accuracies.items():
    print(f"Accuracy for age group {age_group}: {accuracy:.2f}%")

# Print overall accuracy
overall_accuracy = accuracy_score(test_labels, predicted_labels)
print(f"Overall Accuracy: {overall_accuracy * 100:.2f}%")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 457ms/step - accuracy: 0.5602 - loss: 67.1915 - val_accuracy: 0.6124 - val_loss: 1.1308
Epoch 2/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 441ms/step - accuracy: 0.8619 - loss: 0.8889 - val_accuracy: 0.7829 - val_loss: 0.6651
Epoch 3/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 441ms/step - accuracy: 0.9655 - loss: 0.1144 - val_accuracy: 0.8140 - val_loss: 0.5433
Epoch 4/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 457ms/step - accuracy: 0.9714 - loss: 0.0594 - val_accuracy: 0.8605 - val_loss: 0.3914
Epoch 5/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 467ms/step - accuracy: 0.9962 - loss: 0.0315 - val_accuracy: 0.8217 - val_loss: 0.4527
Epoch 6/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 468ms/step - accuracy: 0.9973 - loss: 0.0159 - val_accuracy: 0.8450 - val_loss: 0.3752
Epoch 7/20
[1m27/27

In [12]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score

# Load the data
train_features = pd.read_csv('mfcc_features_train.csv').values  # (samples, 26)
test_features = pd.read_csv('mfcc_features_test.csv').values    # (samples, 26)
train_labels = pd.read_csv('y_train_age_group.csv')['Age_Group']
test_labels = pd.read_csv('y_test_age_group.csv')['Age_Group']

# Encode the age groups
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)  # Convert '4-6', '7-9', '10-14' to numerical values
test_labels = label_encoder.transform(test_labels)

# Reshape the features for 1D CNN input
train_features = train_features.reshape(-1, 26, 1)  # Adjusted for 26 features
test_features = test_features.reshape(-1, 26, 1)    # Adjusted for 26 features

# Build the CNN model
def create_cnn_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(3, activation='softmax')  # 3 output neurons for 3 age groups
    ])
    return model

# Initialize the model
input_shape = (26, 1)  # Input shape for 26 features
model = create_cnn_model(input_shape)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(train_features, train_labels, epochs=20, batch_size=32, validation_data=(test_features, test_labels))

# Evaluate the model
predictions = model.predict(test_features)
predicted_labels = np.argmax(predictions, axis=1)

# Calculate and print classification report and overall accuracy
print("Classification Report:")
print(classification_report(test_labels, predicted_labels, target_names=label_encoder.classes_))

# Calculate individual accuracy for each age group
age_groups = label_encoder.classes_
individual_accuracies = {}

for idx, age_group in enumerate(age_groups):
    group_indices = (test_labels == idx)  # Find all indices of this specific age group
    group_accuracy = accuracy_score(test_labels[group_indices], predicted_labels[group_indices])
    individual_accuracies[age_group] = group_accuracy * 100

# Print individual accuracies
for age_group, accuracy in individual_accuracies.items():
    print(f"Accuracy for age group {age_group}: {accuracy:.2f}%")

# Print overall accuracy
overall_accuracy = accuracy_score(test_labels, predicted_labels)
print(f"Overall Accuracy: {overall_accuracy * 100:.2f}%")


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.6347 - loss: 1.0840 - val_accuracy: 0.6667 - val_loss: 0.7175
Epoch 2/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8236 - loss: 0.4617 - val_accuracy: 0.8140 - val_loss: 0.5342
Epoch 3/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8519 - loss: 0.3979 - val_accuracy: 0.8450 - val_loss: 0.5450
Epoch 4/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8845 - loss: 0.3419 - val_accuracy: 0.8527 - val_loss: 0.4473
Epoch 5/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9050 - loss: 0.2491 - val_accuracy: 0.8682 - val_loss: 0.3973
Epoch 6/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.9353 - loss: 0.1893 - val_accuracy: 0.8760 - val_loss: 0.4094
Epoch 7/20
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━

In [None]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Load features and labels
train_features = pd.read_csv('hubert_features_train_3.csv').values  # Training features
test_features = pd.read_csv('hubert_features_test_3.csv').values    # Testing features
train_data = pd.read_csv('y_train_age_group.csv')  # Contains Age_Group, Age, Gender columns
test_data = pd.read_csv('y_test_age_group.csv')    # Contains Age_Group, Age, Gender columns

# Combine train and test labels for consistent encoding
combined_data = pd.concat([train_data, test_data])

# Encode the age groups, ages, and genders
age_group_encoder = LabelEncoder()
combined_data['Age_Group'] = age_group_encoder.fit_transform(combined_data['Age_Group'])
train_data['Age_Group'] = age_group_encoder.transform(train_data['Age_Group'])
test_data['Age_Group'] = age_group_encoder.transform(test_data['Age_Group'])

age_encoder = LabelEncoder()
combined_data['Age'] = age_encoder.fit_transform(combined_data['Age'])
train_data['Age'] = age_encoder.transform(train_data['Age'])
test_data['Age'] = age_encoder.transform(test_data['Age'])

gender_encoder = LabelEncoder()
combined_data['Gender'] = gender_encoder.fit_transform(combined_data['Gender'])
train_data['Gender'] = gender_encoder.transform(train_data['Gender'])
test_data['Gender'] = gender_encoder.transform(test_data['Gender'])

# Prepare features for the CNN model
train_features = train_features.reshape(-1, 1024, 1)
test_features = test_features.reshape(-1, 1024, 1)

# Build the CNN model
def create_cnn_model(input_shape, output_neurons):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(128, kernel_size=3, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.5),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(output_neurons, activation='softmax')
    ])
    return model

# Train Age Group Model
age_group_model = create_cnn_model((1024, 1), output_neurons=3)  # 3 Age Groups
age_group_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
age_group_model.fit(train_features, train_data['Age_Group'], epochs=20, batch_size=32, validation_split=0.2)

# Train Age Model
age_model = create_cnn_model((1024, 1), output_neurons=len(age_encoder.classes_))  # Number of unique ages
age_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
age_model.fit(train_features, train_data['Age'], epochs=20, batch_size=32, validation_split=0.2)

# Train Gender Model
gender_model = create_cnn_model((1024, 1), output_neurons=2)  # Male and Female
gender_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
gender_model.fit(train_features, train_data['Gender'], epochs=20, batch_size=32, validation_split=0.2)

# Predict Age Group, Age, and Gender
age_group_predictions = age_group_model.predict(test_features)
age_group_pred_labels = np.argmax(age_group_predictions, axis=1)

age_predictions = age_model.predict(test_features)
age_pred_labels = np.argmax(age_predictions, axis=1)

gender_predictions = gender_model.predict(test_features)
gender_pred_labels = np.argmax(gender_predictions, axis=1)

# Initialize dictionaries to store overall accuracies for ages and genders within each age group
group_accuracies = {}

# Iterate over each age group and calculate overall accuracy for ages and genders
for idx, age_group_name in enumerate(age_group_encoder.classes_):
    # Find indices where the predicted age group matches the actual age group
    correct_group_indices = (age_group_pred_labels == idx) & (test_data['Age_Group'].values == idx)
    
    # Extract the true and predicted ages for samples in this age group
    true_ages = test_data['Age'][correct_group_indices].values
    pred_ages = age_pred_labels[correct_group_indices]
    
    # Extract the true and predicted genders for samples in this age group
    true_genders = test_data['Gender'][correct_group_indices].values
    pred_genders = gender_pred_labels[correct_group_indices]
    
    # Calculate overall accuracy for age and gender identification within the group
    if len(true_ages) > 0:
        age_accuracy = accuracy_score(true_ages, pred_ages) * 100
        gender_accuracy = accuracy_score(true_genders, pred_genders) * 100
        
        # Store results
        group_accuracies[age_group_name] = {
            "Age Accuracy": age_accuracy,
            "Gender Accuracy": gender_accuracy
        }

# Print overall accuracy for ages and genders within each group
print("Age Group Classification Report:")
print(classification_report(test_data['Age_Group'], age_group_pred_labels, target_names=age_group_encoder.classes_))

for group, metrics in group_accuracies.items():
    print(f"Group {group}:")
    print(f"  Overall Age Identification Accuracy: {metrics['Age Accuracy']:.2f}%")
    print(f"  Overall Gender Identification Accuracy: {metrics['Gender Accuracy']:.2f}%")
