# Age Detection - Performance on Different Groups

#### Oliver Li, Dustin Huang, & Kate Nazzaro

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import h5py
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

model = load_model('/content/drive/MyDrive/model_best_mse.h5')

data = pd.read_csv('drive/MyDrive/cleaned_age_gender_ethnicity_data.csv')

In [None]:
df1 = pd.DataFrame(data)
df1['pixels'] = df1.pixels.apply(lambda x: np.reshape(np.array(x.split(' '), dtype='float32'), (48, 48)))
df1['pixels'] = df1['pixels'] / 255
X = np.zeros(shape=(23705, 48, 48))
for i in range(len(df1["pixels"])):
    X[i] = df1["pixels"][i]

ethnicity_column = df1["ethnicity"]

# Age
Y_regression = df1["age"].values

In [None]:
df1.head()

Unnamed: 0,age,ethnicity,gender,img_name,pixels
0,1,2,0,20161219203650636.jpg.chip.jpg,"[[0.5058824, 0.5019608, 0.5019608, 0.49411765,..."
1,1,2,0,20161219222752047.jpg.chip.jpg,"[[0.6431373, 0.2901961, 0.43529412, 0.65882355..."
2,1,2,0,20161219222832191.jpg.chip.jpg,"[[0.2627451, 0.27450982, 0.2784314, 0.27450982..."
3,1,2,0,20161220144911423.jpg.chip.jpg,"[[0.75686276, 0.77254903, 0.7764706, 0.7843137..."
4,1,2,0,20161220144914327.jpg.chip.jpg,"[[0.7921569, 0.8039216, 0.81960785, 0.8235294,..."


In [None]:
from sklearn.model_selection import train_test_split

# Split the data into train, val, and test sets
X_train, X_test, Y_train, Y_test, ethnicity_train, ethnicity_test = train_test_split(
    X, Y_regression, ethnicity_column, test_size=0.2, random_state=42)
X_train, X_val, Y_train, Y_val, ethnicity_train, ethnicity_val = train_test_split(
    X_train, Y_train, ethnicity_train, test_size=0.25, random_state=42)

# Normalize the input data
X_train = X_train / 255.0
X_val = X_val / 255.0
X_test = X_test / 255.0

In [None]:
predictions = model.predict(X_test)
predicted_ages = predictions.flatten()
results_df = pd.DataFrame({'True Age': Y_test, 'Predicted Age': predicted_ages, 'Ethnicity': ethnicity_test})

# analyze the performance for each ethnicity flag (0-4)
for ethnicity_flag in range(5):
    ethnicity_subset = results_df[results_df['Ethnicity'] == ethnicity_flag]

    # calculate mae for each ethnicity flag
    mae = np.mean(np.abs(ethnicity_subset['True Age'] - ethnicity_subset['Predicted Age']))

    print(f"Ethnicity {ethnicity_flag}: Mean Absolute Error (MAE) = {mae}")


Ethnicity 0: Mean Absolute Error (MAE) = 7.483409810543759
Ethnicity 1: Mean Absolute Error (MAE) = 6.7639646546634955
Ethnicity 2: Mean Absolute Error (MAE) = 5.255632928562672
Ethnicity 3: Mean Absolute Error (MAE) = 6.286912899334633
Ethnicity 4: Mean Absolute Error (MAE) = 5.270391984913487


In [None]:
results_df['Ethnicity'].describe()

count    4741.000000
mean        1.254166
std         1.344996
min         0.000000
25%         0.000000
50%         1.000000
75%         2.000000
max         4.000000
Name: Ethnicity, dtype: float64