In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf

IMG_SIZE = (114, 114)

In [None]:
# remove images from folder that are not faces
def delete_files_without_keyword(folder_path, keyword):
    for filename in os.listdir(folder_path):
        if keyword not in filename:
            file_path = os.path.join(folder_path, filename)
            if os.path.isfile(file_path):
                os.remove(file_path)
                print(f"Deleted: {filename}")


In [None]:
# open the csv with filenames and real_age
def open_csv(path):
    df = pd.read_csv(path)
    df = df.drop_duplicates(subset=['file_name']).reset_index(drop=True)
    print(df.head())
    return df
# len(df_train) = 4113

In [None]:
def open_imgs(imgs_path, df, img_size):
    imgs = np.zeros((len(df), img_size[0], img_size[1]), dtype=int)  # Grayscale doesn't require an extra dimension
    for index, row in df.iterrows():
        tmp = Image.open(os.path.join(imgs_path, (row['file_name'] + '_face.jpg'))).convert('L')  # Convert to grayscale
        resized_image = tmp.resize((img_size[0], img_size[1]))
        imgs[index] = np.array(resized_image)

    return imgs

In [None]:
df_train = open_csv('/Users/mattia/Downloads/appa-real-release/gt_train.csv')
train_path = '/Users/mattia/Downloads/appa-real-release/train'
x_train = open_imgs(train_path, df_train, IMG_SIZE)
y_train = df_train['real_age']

df_valid = open_csv('/Users/mattia/Downloads/appa-real-release/gt_valid.csv')
valid_path = '/Users/mattia/Downloads/appa-real-release/valid'
x_valid = open_imgs(valid_path, df_valid, IMG_SIZE)
y_valid = df_valid['real_age']

In [None]:
# map each feature value from its current representation (an integer between 0 and 255) 
# to a floating-point value between 0 and 1.0
x_train_normalized = x_train / 255.0
x_valid_normalized = x_valid / 255.0

x_train_normalized = np.expand_dims(x_train_normalized, axis=-1)
x_valid_normalized = np.expand_dims(x_valid_normalized, axis=-1)
print(x_train_normalized.shape, x_valid_normalized.shape)

In [None]:
plt.hist(y_train, bins=100, color='blue', alpha=0.7)
plt.title('Distribution of Values')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.show()

In [None]:
def age2class(age):
    if 0 <= age <= 4:
        return 0
    elif 5 <= age <= 9:
        return 1
    elif 10 <= age <= 14:
        return 2
    elif 15 <= age <= 20:
        return 3
    elif 21 <= age <= 26:
        return 4
    elif 27 <= age <= 35:
        return 5
    elif 36 <= age <= 43:
        return 6
    elif 44 <= age <= 50:
        return 7
    elif 51 <= age <= 62:
        return 8
    else:  # 63 <= age <= 100:
        return 9

vectorized_age2class = np.vectorize(age2class)

# Apply the vectorized function to the array
y_train_binned = vectorized_age2class(y_train)
y_valid_binned = vectorized_age2class(y_valid)


In [None]:
def plot_curve(epochs, hist, list_of_metrics):
  """Plot a curve of one or more classification metrics vs. epoch."""
  # list_of_metrics should be one of the names shown in:
  # https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#define_the_model_and_metrics

  plt.figure()
  plt.xlabel("Epoch")
  plt.ylabel("Value")

  for m in list_of_metrics:
    x = hist[m]
    plt.plot(epochs[1:], x[1:], label=m)

  plt.legend()

print("Loaded the plot_curve function.")

In [None]:
# custom cnn found at https://medium.com/mlearning-ai/age-detection-using-cnn-with-keras-with-source-code-easiest-way-easy-implementation-57c107b23bc4
class MyAgeEstimator(tf.keras.Model):
    def __init__(self, num_classes, learning_rate):
        super(MyAgeEstimator, self).__init__()

        self.model = tf.keras.models.Sequential([
            tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=(114, 114, 1)),
            tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
            tf.keras.layers.Conv2D(filters=64, kernel_size=3, activation='relu'),
            tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
            tf.keras.layers.Conv2D(filters=128, kernel_size=3, activation='relu'),
            tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
            tf.keras.layers.Conv2D(filters=256, kernel_size=3, activation='relu'),
            tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),
            tf.keras.layers.GlobalAveragePooling2D(),
            tf.keras.layers.Dense(132, activation='relu'),
            tf.keras.layers.Dense(num_classes, activation='softmax')
        ])

        self.model.compile(
            #optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
            optimizer='adam',
            loss="sparse_categorical_crossentropy",
            metrics=['accuracy']
        )

    def call(self, inputs):
        return self.model(inputs)


In [None]:
num_classes = 10
learning_rate = 1e-5
batch_size = 128
epochs = 25

#ageEst = MyAgeEstimator(num_classes, learning_rate)
#ageEst.model.summary()

In [None]:
history = ageEst.model.fit(x=x_train_normalized, y=y_train_binned, batch_size=batch_size, epochs=epochs, validation_data=(x_valid_normalized, y_valid_binned), shuffle=False)
epochs = history.epoch
hist = pd.DataFrame(history.history)
list_of_metrics_to_plot = ['accuracy']
plot_curve(epochs, hist, list_of_metrics_to_plot)

In [None]:
plotting_data_dict = history.history

plt.figure(figsize=(12,8))

valid_loss = plotting_data_dict['val_loss']
training_loss = plotting_data_dict['loss']
valid_accuracy = plotting_data_dict['val_accuracy']
training_accuracy = plotting_data_dict['accuracy']

epochs = range(1,len(valid_loss)+1)

plt.subplot(121)
plt.plot(epochs,valid_loss,marker='X',label='valid_loss')
plt.plot(epochs,training_loss,marker='X',label='training_loss')
plt.legend()

plt.subplot(122)
plt.plot(epochs,valid_accuracy,marker='X',label='valid_accuracy')
plt.plot(epochs,training_accuracy,marker='X',label='training_accuracy')
plt.legend()