In [None]:
import os
import cv2 as cv
import numpy as np
import tensorflow as tf
import scipy.io
import pandas as pd
import datetime as dt
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import concurrent.futures
from tensorflow.keras.preprocessing.image import ImageDataGenerator

np.random.seed(42)
tf.random.set_seed(42)

def class_labels_reassign(age):
    if 1 <= age <= 4:
        return 0
    elif 5 <= age <= 10:
        return 1
    elif 11 <= age <= 20:
        return 2
    elif 21 <= age <= 30:
        return 3
    elif 31 <= age <= 40:
        return 4
    elif 41 <= age <= 50:
        return 5
    elif 51 <= age <= 60:
        return 6
    elif 61 <= age <= 70:
        return 7
    else:
        return 8

def extract_age(path):
    mat = scipy.io.loadmat(os.path.join(path, 'imdb.mat'))
    instances = mat['imdb'][0, 0]
    face_score_data = instances['face_score'][0]
    dob_data = instances['dob'][0]
    photo_taken_data = instances['photo_taken'][0]
    filename_data = instances['full_path'][0]
    gender = instances['gender'][0]

    dob_data_converted = [dt.datetime.fromordinal(int(dob) + 366) if dob > 366 else dt.datetime(1, 1, 1) for dob in dob_data]
    age_data = photo_taken_data - np.array([dob.year for dob in dob_data_converted])

    data = pd.DataFrame({'face_score': face_score_data, 'full_path': filename_data, 'gender': gender, 'age': age_data})
    data = data.dropna(subset=['gender'])
    data = data[data['face_score'] != -np.inf]
    data = data[(data['age'] >= 0) & (data['age'] <= 100)]
    data = data.sample(frac=0.5, random_state=42)
    return data

def process_images_parallel(image_paths, base_path, num_workers=6):
    with concurrent.futures.ThreadPoolExecutor(max_workers=num_workers) as executor:
        processed_images = list(executor.map(lambda x: process_image(x, base_path), image_paths))
    return np.array(processed_images)

def process_image(image_path, base_path):
    full_path = os.path.join(base_path, str(image_path).strip("[]").replace("'", ""))
    image = cv.imread(full_path, cv.IMREAD_GRAYSCALE)
    if image is None:
        return np.zeros((60, 60), dtype=np.uint8)
    image = cv.resize(image, (60, 60))
    return image

path = 'C:\\Users\\osmns\\Desktop\\imdb_crop'
data = extract_age(path)
data['age_interval'] = data['age'].map(class_labels_reassign)
image_paths = data['full_path'].tolist()

processed_images = process_images_parallel(image_paths, path)
processed_images = np.array(processed_images)
age_intervals = data['age_interval']

# Flatten images for SMOTE
n_samples = processed_images.shape[0]
processed_images_flat = processed_images.reshape((n_samples, -1))

# Apply SMOTE to balance the classes
smote = SMOTE(random_state=42, k_neighbors=3)
X_smote, y_smote = smote.fit_resample(processed_images_flat, age_intervals)

# Reshape images back to original shape
X_smote = X_smote.reshape((X_smote.shape[0], 60, 60, 1))

# One-hot encode the labels
y_smote = tf.keras.utils.to_categorical(y_smote, num_classes=9)

# Split the data into training, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X_smote, y_smote, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Fit the datagen on the training data
datagen.fit(X_train)

# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(60, 60, 1)),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Conv2D(128, (3, 3), activation='relu'),
    tf.keras.layers.MaxPooling2D((2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(9, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
batch_size = 32
epochs = 50

history = model.fit(
    datagen.flow(X_train, y_train, batch_size=batch_size),
    validation_data=(X_val, y_val),
    epochs=epochs,
    steps_per_epoch=len(X_train) // batch_size
)

# Save the model
model.save('age_recognition_2.h5')
loaded_model = tf.keras.models.load_model('age_recognition_2.h5')

# Evaluate the loaded model on the test set
test_loss, test_acc = loaded_model.evaluate(X_test, y_test)
print(f"Test accuracy of the loaded model: {test_acc}")


In [None]:
model.summary()

In [None]:
import matplotlib.pyplot as plt

train_loss = history.history['loss']
val_loss = history.history['val_loss']
train_accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']

fig, ax = plt.subplots(ncols=1, figsize=(15,7))

ax.plot(train_accuracy, label='Train Accuracy', color='royalblue', marker='o', markersize=5)
ax.plot(val_accuracy, label='Validation Accuracy', color='orangered', marker='o', markersize=5)

ax.set_xlabel('Epochs', fontsize=14)
ax.set_ylabel('Accuracy', fontsize=14)
ax.legend(fontsize=14)
ax.tick_params(axis='both', labelsize=12)

plt.show()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, mean_squared_error
import numpy as np

# Convert y_test from one-hot encoded format to class labels
y_test_labels = np.argmax(y_test, axis=1)

# Get class labels from the predicted probabilities
y_pred_probs = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_probs, axis=1)

# Calculate the metrics
precision = precision_score(y_test_labels, y_pred_labels, average='weighted')
recall = recall_score(y_test_labels, y_pred_labels, average='weighted')
f1 = f1_score(y_test_labels, y_pred_labels, average='weighted')
accuracy = accuracy_score(y_test_labels, y_pred_labels)

# RMSE and MAPE are typically for regression, but included for completeness
rmse = np.sqrt(mean_squared_error(y_test_labels, y_pred_labels))

def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = mean_absolute_percentage_error(y_test_labels, y_pred_labels)

# Print the metrics
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print(f"Accuracy: {accuracy}")
print(f"RMSE: {rmse}")
print(f"MAPE: {mape}")

import matplotlib.pyplot as plt

# Define a function to display images with their predictions
def display_predictions(images, true_labels, predictions, num_images=5):
    plt.figure(figsize=(15, 5))
    for i in range(num_images):
        plt.subplot(1, num_images, i + 1)
        plt.imshow(images[i].reshape(60, 60), cmap='gray')
        true_label = true_labels[i]
        predicted_label = predictions[i]
        plt.title(f"True: {true_label}, Pred: {predicted_label}")
        plt.axis('off')
    plt.show()

# Display a few sample images with their predictions
display_predictions(X_test, y_test_labels, y_pred_labels, num_images=5)


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

# Convert y_test from one-hot encoded format to class labels
y_test_labels = np.argmax(y_test, axis=1)

# Predict on the test set
y_pred_probs = model.predict(X_test)
y_pred_labels = np.argmax(y_pred_probs, axis=1)  # Convert predictions to label index

# Compute confusion matrix
cm = confusion_matrix(y_test_labels, y_pred_labels)

# Plotting using seaborn for better visualization
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('Actual Labels')
plt.xlabel('Predicted Labels')

plt.show()