In [1]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Input, Dropout, GlobalAveragePooling2D, Conv2D, BatchNormalization, Activation, MaxPooling2D, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K

# Parameters
IMAGE_SHAPE = (64, 64)  # Update based on your image dimensions
BATCH_SIZE = 32
test_dir = "FER2013/test"
train_dir = "FER2013/train"

# Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

# Load Data
print("training_data")
training_data = tf.keras.preprocessing.image_dataset_from_directory(train_dir,
                                                                   label_mode="categorical",
                                                                   image_size=IMAGE_SHAPE,
                                                                   batch_size=BATCH_SIZE)

print("testing_data")
testing_data = tf.keras.preprocessing.image_dataset_from_directory(test_dir,
                                                                   label_mode="categorical",
                                                                   image_size=IMAGE_SHAPE,
                                                                   batch_size=BATCH_SIZE,
                                                                   shuffle=False)

# Model Architecture
no_of_classes = 7

model_1 = Sequential()

# 1st Convolutional Layer
model_1.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same', input_shape=(64, 64, 3)))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 2nd Convolutional Layer
model_1.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 3rd Convolutional Layer
model_1.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 4th Convolutional Layer
model_1.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 5th Convolutional Layer
model_1.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# Global Average Pooling
model_1.add(GlobalAveragePooling2D())

# 1st Fully Connected Layer
model_1.add(Dense(256, kernel_regularizer=l2(0.001)))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(Dropout(0.5))

# 2nd Fully Connected Layer
model_1.add(Dense(512, kernel_regularizer=l2(0.001)))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(Dropout(0.5))

# Output Layer
model_1.add(Dense(no_of_classes, activation='softmax'))

# Compile the model with SGD optimizer and categorical crossentropy loss
opt = SGD(learning_rate=0.01, momentum=0.9)
model_1.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary to view the architecture
model_1.summary()

# Callbacks
checkpoint = ModelCheckpoint("./model.keras", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1, mode='auto', restore_best_weights=True)
reduce_learningrate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=1, min_delta=0.0001)

# Learning rate scheduler
def lr_schedule(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(K.eval(lr * tf.math.exp(-0.1)))

lr_scheduler = LearningRateScheduler(lr_schedule)

callbacks_list = [early_stopping, checkpoint, reduce_learningrate, lr_scheduler]

# Training
history_1 = model_1.fit(training_data,
                       epochs=10,
                       validation_data=testing_data,
                       callbacks=callbacks_list)

# Fine-tuning
final_epoch = 25
history_2 = model_1.fit(training_data,
                        epochs=final_epoch,
                        validation_data=testing_data,
                        callbacks=callbacks_list,
                        initial_epoch=history_1.epoch[-1])

# Comparison Function
def compare_historys(original_history, new_history, initial_epochs=10):
    acc = original_history.history["accuracy"]
    loss = original_history.history["loss"]
    val_acc = original_history.history["val_accuracy"]
    val_loss = original_history.history["val_loss"]

    total_acc = acc + new_history.history["accuracy"]
    total_loss = loss + new_history.history["loss"]
    total_val_acc = val_acc + new_history.history["val_accuracy"]
    total_val_loss = val_loss + new_history.history["val_loss"]

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label='Start Fine Tuning')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label='Start Fine Tuning')
    plt.legend(loc='upper right')
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()


training_data
Found 28709 files belonging to 7 classes.
testing_data
Found 7178 files belonging to 7 classes.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 156ms/step - accuracy: 0.2001 - loss: 2.8034
Epoch 1: val_accuracy improved from -inf to 0.21566, saving model to ./model.keras
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 179ms/step - accuracy: 0.2001 - loss: 2.8032 - val_accuracy: 0.2157 - val_loss: 2.3212 - learning_rate: 0.0100
Epoch 2/10
[1m897/898[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 116ms/step - accuracy: 0.2408 - loss: 2.3251
Epoch 2: val_accuracy improved from 0.21566 to 0.27988, saving model to ./model.keras
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m114s[0m 127ms/step - accuracy: 0.2409 - loss: 2.3249 - val_accuracy: 0.2799 - val_loss: 2.1420 - learning_rate: 0.0100
Epoch 3/10
[1m897/898[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 109ms/step - accuracy: 0.2722 - loss: 2.1202
Epoch 3: val_accuracy did not improve from 0.27988
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [2]:
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, Input, Dropout, GlobalAveragePooling2D, Conv2D, BatchNormalization, Activation, MaxPooling2D, Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.regularizers import l2
import tensorflow.keras.backend as K

In [3]:
test_dir = "FER2013/test"
train_dir = "FER2013/train"

IMAGE_SHAPE = (64,64)
BATCH_SIZE = 32

In [4]:
# Data Augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.3,
    height_shift_range=0.3,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest')

test_datagen = ImageDataGenerator(rescale=1./255)

# Load Data
print("training_data")
training_data = tf.keras.preprocessing.image_dataset_from_directory(train_dir,
                                                                   label_mode="categorical",
                                                                   image_size=IMAGE_SHAPE,
                                                                   batch_size=BATCH_SIZE)

print("testing_data")
testing_data = tf.keras.preprocessing.image_dataset_from_directory(test_dir,
                                                                   label_mode="categorical",
                                                                   image_size=IMAGE_SHAPE,
                                                                   batch_size=BATCH_SIZE,
                                                                   shuffle=False)

training_data
Found 28709 files belonging to 7 classes.
testing_data
Found 7178 files belonging to 7 classes.


In [5]:
# Model Architecture
no_of_classes = 7

model_1 = Sequential()

# 1st Convolutional Layer
model_1.add(Conv2D(64, (3, 3), strides=(2, 2), padding='same', input_shape=(64, 64, 3)))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 2nd Convolutional Layer
model_1.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 3rd Convolutional Layer
model_1.add(Conv2D(128, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 4th Convolutional Layer
model_1.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# 5th Convolutional Layer
model_1.add(Conv2D(256, (3, 3), strides=(2, 2), padding='same'))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(MaxPooling2D(pool_size=(2, 2), padding='same'))
model_1.add(Dropout(0.25))

# Global Average Pooling
model_1.add(GlobalAveragePooling2D())

# 1st Fully Connected Layer
model_1.add(Dense(256, kernel_regularizer=l2(0.001)))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(Dropout(0.5))

# 2nd Fully Connected Layer
model_1.add(Dense(512, kernel_regularizer=l2(0.001)))
model_1.add(BatchNormalization())
model_1.add(Activation('relu'))
model_1.add(Dropout(0.5))

# Output Layer
model_1.add(Dense(no_of_classes, activation='softmax'))

# Compile the model with SGD optimizer and categorical crossentropy loss
opt = SGD(learning_rate=0.01, momentum=0.9)
model_1.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Model summary to view the architecture
model_1.summary()

In [6]:
# Callbacks
checkpoint = ModelCheckpoint("./model.keras", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=20, verbose=1, mode='auto', restore_best_weights=True)
reduce_learningrate = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, verbose=1, min_delta=0.0001)

# Learning rate scheduler
def lr_schedule(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(K.eval(lr * tf.math.exp(-0.1)))

lr_scheduler = LearningRateScheduler(lr_schedule)

callbacks_list = [early_stopping, checkpoint, reduce_learningrate, lr_scheduler]

In [7]:
# Training
history_1 = model_1.fit(training_data,
                       epochs=10,
                       validation_data=testing_data,
                       callbacks=callbacks_list)

# Fine-tuning
final_epoch = 35
history_2 = model_1.fit(training_data,
                        epochs=final_epoch,
                        validation_data=testing_data,
                        callbacks=callbacks_list,
                        initial_epoch=history_1.epoch[-1])

Epoch 1/10
[1m897/898[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 143ms/step - accuracy: 0.2008 - loss: 2.7812
Epoch 1: val_accuracy improved from -inf to 0.21649, saving model to ./model.keras
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m145s[0m 155ms/step - accuracy: 0.2008 - loss: 2.7809 - val_accuracy: 0.2165 - val_loss: 2.3131 - learning_rate: 0.0100
Epoch 2/10
[1m897/898[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 106ms/step - accuracy: 0.2325 - loss: 2.3598
Epoch 2: val_accuracy improved from 0.21649 to 0.25397, saving model to ./model.keras
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 111ms/step - accuracy: 0.2325 - loss: 2.3597 - val_accuracy: 0.2540 - val_loss: 2.1419 - learning_rate: 0.0100
Epoch 3/10
[1m898/898[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 106ms/step - accuracy: 0.2540 - loss: 2.1406
Epoch 3: val_accuracy improved from 0.25397 to 0.28295, saving model to ./model.keras
[1m898/898[0m [32m

In [18]:
# Comparison Function
def compare_historys(original_history, new_history, initial_epochs=10):
    acc = original_history.history["accuracy"]
    loss = original_history.history["loss"]
    val_acc = original_history.history["val_accuracy"]
    val_loss = original_history.history["val_loss"]

    total_acc = acc + new_history.history["accuracy"]
    total_loss = loss + new_history.history["loss"]
    total_val_acc = val_acc + new_history.history["val_accuracy"]
    total_val_loss = val_loss + new_history.history["val_loss"]

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(total_acc, label='Training Accuracy')
    plt.plot(total_val_acc, label='Validation Accuracy')
    plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label='Start Fine Tuning')
    plt.legend(loc='lower right')
    plt.title('Training and Validation Accuracy')

    plt.subplot(2, 1, 2)
    plt.plot(total_loss, label='Training Loss')
    plt.plot(total_val_loss, label='Validation Loss')
    plt.plot([initial_epochs-1, initial_epochs-1], plt.ylim(), label='Start Fine Tuning')
    plt.legend(loc='upper right');
    plt.title('Training and Validation Loss')
    plt.xlabel('epoch')
    plt.show()

In [13]:
model_1.predict(testing_data)

[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 21ms/step


array([[0.20545146, 0.01499176, 0.32936966, ..., 0.12176564, 0.19856526,
        0.07817142],
       [0.14752784, 0.00555411, 0.17286675, ..., 0.18516755, 0.3967102 ,
        0.02476017],
       [0.28917265, 0.03488296, 0.08781228, ..., 0.24692614, 0.33204794,
        0.00326317],
       ...,
       [0.0192038 , 0.00104549, 0.14294662, ..., 0.00249135, 0.00316959,
        0.5689226 ],
       [0.06082296, 0.00638565, 0.25409034, ..., 0.09333109, 0.07694516,
        0.4909147 ],
       [0.19683813, 0.03087414, 0.10163438, ..., 0.23244217, 0.26966465,
        0.00816089]], dtype=float32)