# Modeling:

### CNN Model - 10 Epochs with Early Stopping, no Data Augmentation
__________________________

### Imports

In [2]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np
import matplotlib.pyplot as plt
import random
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

In [3]:
# Mounting my drive

from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# assigning proper file path to training data

training_data_dir = "/content/drive/MyDrive/GA Data Science Bootcamp/Projects/Capstone Project/Facial Recognition Data/Training/Training/"

In [5]:
# Renaming file names in training data due to a "FileNotFoundError" I kept getting when trying to run my models

# Setting the directory path where the files are located
training_directory_path = training_data_dir

# Initialize a starting index
start_index = 1

# Iterating over the files in the directory
for filename in os.listdir(training_directory_path):
    # Checking if the item is a file
    if os.path.isfile(os.path.join(training_directory_path, filename)):
        # Getting the file extension
        file_extension = os.path.splitext(filename)[1]

        # Defining the new filename with the updated index
        new_filename = f"{start_index}{file_extension}"

        # Building the full path for the old and new filenames
        old_filepath = os.path.join(training_directory_path, filename)
        new_filepath = os.path.join(training_directory_path, new_filename)

        # Renaming the file
        os.rename(old_filepath, new_filepath)

        # Incrementing the index
        start_index += 1

print("Files renamed successfully.")

Files renamed successfully.


In [7]:
# creating an instance of ImageDataGenerator for preprocessing of image data, and rescaling the pixel values to be in the range of 0.0-1.0.

training_datagen = ImageDataGenerator(rescale=1./255)

training_data_generator = training_datagen.flow_from_directory( # configuring the data generator to read, preprocess, and prepare the images for training
        training_directory_path,
        target_size=(224, 224), # resizing the input images to a target size of 224 x 224
        batch_size=32, # specifying data loading in batches of 32 samples at a time, helping memory efficiency during training
        class_mode='categorical') # specifying that the dataset contains multiple categorical classes, rather than just two (binary)

Found 28303 images belonging to 6 classes.


In [8]:
# assigning proper file path to testing data

testing_data_dir = '/content/drive/MyDrive/GA Data Science Bootcamp/Projects/Capstone Project/Facial Recognition Data/Testing/Testing/'

In [16]:
# Doing the same to the testing data as above to the training data

testing_directory_path = testing_data_dir

start_index = 1

for filename in os.listdir(testing_directory_path):
    if os.path.isfile(os.path.join(testing_directory_path, filename)):
        file_extension = os.path.splitext(filename)[1]

        new_filename = f"{start_index}{file_extension}"

        old_filepath = os.path.join(testing_directory_path, filename)
        new_filepath = os.path.join(testing_directory_path, new_filename)

        os.rename(old_filepath, new_filepath)

        start_index += 1

print("Testing files renamed successfully.")

Testing files renamed successfully.


In [10]:
# see "training_datagen" comments

testing_datagen = ImageDataGenerator(rescale=1./255)

testing_data_generator = testing_datagen.flow_from_directory(
        testing_directory_path,
        target_size=(224, 224),
        batch_size=32,
        class_mode='categorical')

Found 7067 images belonging to 6 classes.


_________________________
## CNN with Early Stopping

In [12]:
early_stopping_model = Sequential()

early_stopping_model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 3)))
early_stopping_model.add(MaxPooling2D((2, 2)))

early_stopping_model.add(Conv2D(64, (3, 3), activation='relu'))
early_stopping_model.add(MaxPooling2D((2, 2)))

early_stopping_model.add(Conv2D(128, (3, 3), activation='relu'))
early_stopping_model.add(MaxPooling2D((2, 2)))

early_stopping_model.add(Flatten())
early_stopping_model.add(Dense(128, activation='relu'))
early_stopping_model.add(Dropout(0.5))

early_stopping_model.add(Dense(6, activation='softmax'))  # 6 classes for emotions

# Compile the model
early_stopping_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [13]:
# Defining early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [14]:
# Training the model with early stopping for 10 epochs
try:
    history = early_stopping_model.fit(
        training_data_generator,
        steps_per_epoch=len(training_data_generator),
        epochs=10,
        validation_data=testing_data_generator,
        validation_steps=len(testing_data_generator),
        callbacks=[early_stopping]
    )
except FileNotFoundError as e:
    print(f"FileNotFoundError: {e}")
    print("Skipping this step due to missing files in the dataset.")
    pass

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10


As we can see above, adding in more epochs to the model improved its training accuracy(~0.70 after 7 epochs, vs ~0.61 after 5 epochs in my previous model), but the testing accuracy begins to stagnate around 0.5. Additionally, the model becomes increasingly overfit as it move higher in the number of epochs, stopping at epoch 7 due to the early stopping clause specifying to stop running the model if the validation loss increased for three epochs straight(which it did, from epoch 5 to 7).