In [2]:
import os
import cv2
import numpy as np
from mtcnn import MTCNN
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
# Define the main folder containing the dataset
dataset_dir = 'FacialDataset'

# Set the image size and batch size
image_size = (256, 256)
batch_size = 32

# Create an ImageDataGenerator with data augmentation and preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

detector = MTCNN()

def preprocess_image(file_path):
    image = cv2.imread(file_path)
    if image is None:
        print(f"Warning: Unable to read image {file_path}. It may be corrupt or the path may be incorrect.")
        os.remove(file_path)
        return

    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = detector.detect_faces(image_rgb)
    
    if results:
        x1, y1, width, height = results[0]['box']
        x2, y2 = x1 + width, y1 + height
        face = image_rgb[y1:y2, x1:x2]
        face_array = cv2.resize(face, image_size)

        mask = np.zeros((image_size[0], image_size[1]), dtype="uint8")
        resized_x1, resized_y1, resized_x2, resized_y2 = get_resized_coordinates(x1, y1, width, height, image_rgb)
        cv2.rectangle(mask, (resized_x1, resized_y1), (resized_x2, resized_y2), 255, -1)

        blurred_image = cv2.GaussianBlur(face_array, (21, 21), 0)
        final_image = cv2.bitwise_and(blurred_image, blurred_image, mask=cv2.bitwise_not(mask))
        final_image += cv2.bitwise_and(face_array, face_array, mask=mask)

        final_image = cv2.cvtColor(final_image, cv2.COLOR_RGB2BGR)
        
        # Ensure the file has a valid image extension before writing
        file_extension = os.path.splitext(file_path)[1].lower()
        if file_extension not in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']:
            print(f"Error: Unsupported file extension '{file_extension}' for file {file_path}.")
            return
        
        try:
            cv2.imwrite(file_path, final_image)
        except Exception as e:
            print(f"Exception occurred while saving image {file_path}: {e}")
    else:
        print(f"No face detected in image {file_path}. Removing it.")
        os.remove(file_path)

def get_resized_coordinates(x1, y1, width, height, original_image):
    resized_x1 = int(image_size[0] * x1 / original_image.shape[1])
    resized_y1 = int(image_size[1] * y1 / original_image.shape[0])
    resized_x2 = resized_x1 + int(image_size[0] * width / original_image.shape[1])
    resized_y2 = resized_y1 + int(image_size[1] * height / original_image.shape[0])
    return resized_x1, resized_y1, resized_x2, resized_y2

2024-01-14 23:40:28.038217: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-01-14 23:40:28.038412: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1 Pro

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



In [3]:
# for preprocessing
'''
for class_name in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_path):
        for image_filename in os.listdir(class_path):
            image_path = os.path.join(class_path, image_filename)
            preprocess_image(image_path)
'''

'\nfor class_name in os.listdir(dataset_dir):\n    class_path = os.path.join(dataset_dir, class_name)\n    if os.path.isdir(class_path):\n        for image_filename in os.listdir(class_path):\n            image_path = os.path.join(class_path, image_filename)\n            preprocess_image(image_path)\n'

In [4]:
X, y = [], []
for class_name in os.listdir(dataset_dir):
    class_path = os.path.join(dataset_dir, class_name)
    if os.path.isdir(class_path):
        for image_filename in os.listdir(class_path):
            image_path = os.path.join(class_path, image_filename)
            if os.path.isfile(image_path):
                X.append(image_path)
                y.append(class_name)

In [5]:
from collections import Counter
class_distribution = Counter(y)

for class_name, count in class_distribution.items():
    print(f"Class '{class_name}': {count} images")

Class 'Square Facial': 837 images
Class 'Round Facial': 820 images
Class 'Oval Facial': 799 images
Class 'Heart Facial': 794 images
Class 'Diamond Facial': 98 images
Class 'Triangle Facial': 97 images
Class 'Oblong Facial': 793 images


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

train_generator = datagen.flow_from_dataframe(
    pd.DataFrame({'filename': X_train, 'class': y_train}),
    x_col='filename',
    y_col='class',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
    seed=42,
    validate_filenames=False  # Disable filename validation for better performance
)

test_generator = datagen.flow_from_dataframe(
    pd.DataFrame({'filename': X_test, 'class': y_test}),
    x_col='filename',
    y_col='class',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    seed=42,
    validate_filenames=False
)


Found 3390 non-validated image filenames belonging to 7 classes.
Found 848 non-validated image filenames belonging to 7 classes.


# CNN

In [None]:
model = keras.Sequential([
    keras.layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=(256, 256, 3)),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(2, 2),

    keras.layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(2, 2),

    keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(2, 2),

    keras.layers.Conv2D(256, (3, 3), padding='same', activation='relu'),
    keras.layers.BatchNormalization(),
    keras.layers.MaxPooling2D(2, 2),

    keras.layers.Flatten(),
    keras.layers.Dropout(0.5),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.BatchNormalization(),

    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(7, activation='softmax')
])


In [7]:
from sklearn.utils.class_weight import compute_class_weight

labels = list(set(y))
class_indices = train_generator.class_indices

y_integers = np.array([class_indices[class_name] for class_name in y])

class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_integers), y=y_integers)
class_weights_dict = {i : class_weights[i] for i in range(len(class_weights))}


In [8]:
def lr_schedule(epoch):
    initial_lr = 0.001
    if epoch < 5:
        return initial_lr
    else:
        return initial_lr * tf.math.exp(0.1 * (5 - epoch))

lr_scheduler = LearningRateScheduler(lr_schedule)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


epochs = 50 
history = model.fit(
    train_generator,
    epochs=epochs,
    validation_data=test_generator,
    callbacks=[lr_scheduler, early_stopping],
    verbose=1,
    class_weight=class_weights_dict
)

test_loss, test_accuracy = model.evaluate(test_generator, verbose=1)
print(f"Test loss: {test_loss}")
print(f"Test accuracy: {test_accuracy}")

In [None]:
model.save('facial_classification_model.h5')

# PRETRAINED

In [13]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import LearningRateScheduler
import tensorflow as tf
import math
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [14]:
train_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(
    preprocessing_function=preprocess_input
)

train_generator = train_datagen.flow_from_dataframe(
    pd.DataFrame({'filename': X_train, 'class': y_train}),
    x_col='filename',
    y_col='class',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True,
    seed=42
)

test_generator = test_datagen.flow_from_dataframe(
    pd.DataFrame({'filename': X_test, 'class': y_test}),
    x_col='filename',
    y_col='class',
    target_size=image_size,
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False,
    seed=42
)

Found 3351 validated image filenames belonging to 7 classes.
Found 840 validated image filenames belonging to 7 classes.




In [15]:
def scheduler(epoch, lr):
    decay_rate = 0.1
    decay_step = 10
    if epoch % decay_step == 0 and epoch:
        return lr * math.exp(-decay_rate)
    return lr

lr_scheduler = LearningRateScheduler(scheduler)


In [17]:
# Load the ResNet50 base model
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(*image_size, 3))
base_model.trainable = False

# Build the model on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(len(set(y)), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model
initial_learning_rate = 0.001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=50,
    verbose=1,
    callbacks=[lr_scheduler],
    class_weight=class_weights_dict
)

# Fine-tuning: Unfreeze some layers and apply EarlyStopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

base_model.trainable = True
fine_tune_at = 100  # Example: fine-tune starting from layer 100
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False

fine_tuning_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=fine_tuning_learning_rate),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=50,
    verbose=1,
    callbacks=[lr_scheduler, early_stopping],
    class_weight=class_weights_dict
)

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test loss: {test_loss}")
print(f"Test accuracy: {test_accuracy}")

Epoch 1/50


2024-01-15 00:13:29.168905: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2024-01-15 00:14:03.630807: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50


2024-01-15 00:44:41.212097: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.




2024-01-15 00:45:18.261479: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Test loss: 0.948584794998169
Test accuracy: 0.7547619342803955


In [21]:
from sklearn.metrics import classification_report


predictions = model.predict(test_generator)
predicted_labels = [tf.argmax(pred).numpy() for pred in predictions]
true_labels = test_generator.classes
report = classification_report(true_labels, predicted_labels)
print(report)


              precision    recall  f1-score   support

           0       0.75      0.18      0.29        17
           1       0.74      0.80      0.77       159
           2       0.85      0.86      0.85       159
           3       0.63      0.71      0.67       160
           4       0.79      0.76      0.77       164
           5       0.80      0.77      0.78       167
           6       0.33      0.14      0.20        14

    accuracy                           0.75       840
   macro avg       0.70      0.60      0.62       840
weighted avg       0.75      0.75      0.75       840



In [18]:
model.save("facial_modelling.h5")

  layer_config = serialize_layer_fn(layer)
