In [1]:
import os
import numpy as np
import pandas as pd
from datetime import datetime
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing import image
import tensorflow as tf
import re

In [2]:
def extract_age_from_filename(filename):
    try:
        parts = filename.split('_')
        dob_str = parts[1]
        photo_taken_year_str = parts[2].split('.')[0]
        
        # Validate and parse the date of birth string
        if re.match(r'\d{4}-\d{2}-\d{2}', dob_str) is None:
            return None
        
        dob = datetime.strptime(dob_str, '%Y-%m-%d')
        photo_taken_year = int(photo_taken_year_str)
        
        age = photo_taken_year - dob.year
        if dob.month > 1 or (dob.month == 1 and dob.day > 1):  # If birthday hasn't occurred yet this year
            age -= 1
        
        return age
    except Exception as e:
        # Return None for invalid date formats
        return None


In [3]:
class AgeDataGenerator(Sequence):
    def __init__(self, image_filenames, labels, batch_size, img_size, base_img_dir):
        self.image_filenames = image_filenames
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.base_img_dir = base_img_dir
        self.on_epoch_end()
    
    def __len__(self):
        return len(self.image_filenames) // self.batch_size
    
    def __getitem__(self, idx):
        batch_x = self.image_filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
        
        return np.array([self.preprocess_image(file_name) for file_name in batch_x]), np.array(batch_y)
    
    def on_epoch_end(self):
        pass
    
    def preprocess_image(self, file_name):
        img_path = os.path.join(self.base_img_dir, file_name)
        if not os.path.exists(img_path):
            raise FileNotFoundError(f"File not found: {img_path}")
        img = image.load_img(img_path, target_size=self.img_size)
        img = image.img_to_array(img)
        img /= 255.0
        return img



In [4]:
base_img_dir = './wiki_crop'
folders = [str(i).zfill(2) for i in range(100)]
image_filenames = []

for folder in folders:
    folder_path = os.path.join(base_img_dir, folder)
    if os.path.exists(folder_path):
        image_filenames += [os.path.join(folder, fname) for fname in os.listdir(folder_path)]

ages = [extract_age_from_filename(os.path.basename(f)) for f in image_filenames]

# Filter out None values from ages and corresponding filenames
valid_data = [(f, age) for f, age in zip(image_filenames, ages) if age is not None]
image_filenames, labels = zip(*valid_data)

batch_size = 32
img_size = (128, 128)

data_generator = AgeDataGenerator(image_filenames, labels, batch_size, img_size, base_img_dir)



In [5]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint

In [6]:
def create_cnn_model(input_shape):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Flatten(),
        Dense(256, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='linear')
    ])
    
    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    return model

input_shape = (128, 128, 3)
model = create_cnn_model(input_shape)

  super().__init__(


In [7]:
checkpoint_path = 'model_checkpoints/epoch-{epoch:02d}_mae-{val_mae:.2f}.keras'
checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, save_weights_only=False, save_best_only=True, monitor='val_mae', mode='min')

In [9]:
model.fit(data_generator, epochs=8, callbacks=[checkpoint_callback], validation_data=data_generator)

Epoch 1/8
[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m567s[0m 291ms/step - loss: 13.2525 - mae: 13.2525 - val_loss: 11.5787 - val_mae: 11.5787
Epoch 2/8
[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m303s[0m 155ms/step - loss: 12.0245 - mae: 12.0245 - val_loss: 11.0971 - val_mae: 11.0971
Epoch 3/8
[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m305s[0m 157ms/step - loss: 11.5028 - mae: 11.5028 - val_loss: 10.6224 - val_mae: 10.6224
Epoch 4/8
[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 155ms/step - loss: 10.9906 - mae: 10.9906 - val_loss: 10.0428 - val_mae: 10.0428
Epoch 5/8
[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 155ms/step - loss: 10.5255 - mae: 10.5255 - val_loss: 9.6640 - val_mae: 9.6640
Epoch 6/8
[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 155ms/step - loss: 10.3609 - mae: 10.3609 - val_loss: 9.7129 - val_mae: 9.7129
Epoch 7/8
[1m1947/1947[0m [32m━━━━━

<keras.src.callbacks.history.History at 0x24907103e60>

In [10]:
# Save the final model
model.save('final_model.h5')



In [11]:
# Assuming the model and data_generator are already defined and the model is trained
scores = model.evaluate(data_generator, verbose=1)
print(f'Mean Absolute Error (MAE) on the dataset: {scores[1]:.2f}')

[1m1947/1947[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 35ms/step - loss: 9.1622 - mae: 9.1622
Mean Absolute Error (MAE) on the dataset: 9.20


In [12]:
def predict_age(image_path, model, img_size=(128, 128)):
    img = image.load_img(image_path, target_size=img_size)
    img = image.img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img /= 255.0
    
    predicted_age = model.predict(img)
    return predicted_age[0][0]

sample_image_path = './wiki_crop/14/39014_1896-10-30_1980.jpg'
predicted_age = predict_age(sample_image_path, model)
print(f'The predicted age for the sample image is: {predicted_age:.2f} years')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
The predicted age for the sample image is: 55.94 years
