### Library

In [5]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint

### Read files

In [6]:
#  folder data path

path_train = './dataHappy/train/'
path_validation = './dataHappy/test/'

path_train_c1 = './dataHappy/train/normal/'
path_train_c2 = './dataHappy/train/small_smile/'
path_train_c3 = './dataHappy/train/smile/'
path_train_c4 = './dataHappy/train/big_smile/'

path_val_c1 = './dataHappy/test/normal/'
path_val_c2 = './dataHappy/test/small_smile/'
path_val_c3 = './dataHappy/test/smile/'
path_val_c4 = './dataHappy/test/big_smile/'

In [7]:
# Check the number of pictures in train and validation
train_c1 = os.listdir(path_train_c1)
train_c2 = os.listdir(path_train_c2)
train_c3 = os.listdir(path_train_c3)
train_c4 = os.listdir(path_train_c4)

val_c1 = os.listdir(path_val_c1)
val_c2 = os.listdir(path_val_c2)
val_c3 = os.listdir(path_val_c3)
val_c4 = os.listdir(path_val_c4)

print('Total train images train_c1, train_c2, train_c3, train_c4: ', len(train_c1), len(train_c2), len(train_c3), len(train_c4))
print('Total validation images val_c1, val_c2, val_c3, val_c4: ', len(val_c1), len(val_c2), len(val_c3), len(val_c4))

Total train images train_c1, train_c2, train_c3, train_c4:  3591 3741 3026 4101
Total validation images val_c1, val_c2, val_c3, val_c4:  897 654 756 1026


In [8]:
print(os.listdir(path_train))

['normal', 'small_smile', 'smile', 'big_smile']


### rotate and flip

In [9]:
import os
import cv2

def count_images_in_directory(directory):
    image_count = 0
    for name in os.listdir(directory):
        if name.endswith(('.jpg', '.jpeg', '.png')):
            image_count += 1
    return image_count

def sinh_data(path):
    max_image_count = 0
    max_image_directory = None

    # Find the folder has the most pictures
    for sub_directory in os.listdir(path):
        sub_directory_path = os.path.join(path, sub_directory)

        if os.path.isdir(sub_directory_path):
            image_count = count_images_in_directory(sub_directory_path)
            if image_count > max_image_count:
                max_image_count = image_count
                max_image_directory = sub_directory

    # Deal with sub_folders that have fewer images than the largest folder
    for sub_directory in os.listdir(path):
        sub_directory_path = os.path.join(path, sub_directory)

        if os.path.isdir(sub_directory_path) and sub_directory != max_image_directory:
            image_count = count_images_in_directory(sub_directory_path)

            if image_count <= 1.5 * max_image_count:
                for name in os.listdir(sub_directory_path):
                    if name.endswith(('.jpg', '.jpeg', '.png')):
                        image = cv2.imread(os.path.join(sub_directory_path, name))
                        height, width = image.shape[:2]
                        angle = 20
                        rotation_matrix = cv2.getRotationMatrix2D((width/2, height/2), angle, 1)
                        rotated_image = cv2.warpAffine(image, rotation_matrix, (width, height))
                        flipped_image = cv2.flip(rotated_image, 1)
                        output_directory = os.path.join(path, sub_directory)
                        os.makedirs(output_directory, exist_ok=True)
                        output_path = os.path.join(output_directory, f'sinh_{name}')
                        cv2.imwrite(output_path, flipped_image)


dir = [path_train, path_validation]
for i in dir:
    sinh = sinh_data(i)

### Standardize data

In [10]:
datagen = ImageDataGenerator(rescale=1.0/255)

train_generator = datagen.flow_from_directory(path_train, 
                                                    # batch_size=16, 
                                                    class_mode='categorical', 
                                                    target_size=(48, 48), 
                                                    color_mode='grayscale')
validation_generator = datagen.flow_from_directory(path_validation, 
                                                            #   batch_size=16, 
                                                              class_mode='categorical', 
                                                              target_size=(48, 48), 
                                                              color_mode='grayscale')

Found 19929 images belonging to 4 classes.
Found 4606 images belonging to 4 classes.


### Build model

In [11]:
model = Sequential()

model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(48, 48, 1), data_format='channels_last', kernel_regularizer=l2(0.01)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.3))

model.add(Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(512, kernel_size=(3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2)))
model.add(Dropout(0.3))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model.add(Dropout(0.35))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.35))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.4))

model.add(Dense(4, activation='softmax'))

In [12]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.optimizers import Adam

# Re-create callback to save the best weights
checkpoint = ModelCheckpoint("./src/best_model.hdf5", monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')

# Compile model
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

In [13]:
history = model.fit(
        train_generator,
        epochs=10,
        batch_size=16, 
        validation_data=validation_generator,
        callbacks=[checkpoint])

Epoch 1/10
Epoch 1: val_accuracy improved from -inf to 0.32827, saving model to ./src\best_model.hdf5
Epoch 2/10
Epoch 2: val_accuracy improved from 0.32827 to 0.33283, saving model to ./src\best_model.hdf5
Epoch 3/10
Epoch 3: val_accuracy improved from 0.33283 to 0.37060, saving model to ./src\best_model.hdf5
Epoch 4/10
Epoch 4: val_accuracy improved from 0.37060 to 0.38406, saving model to ./src\best_model.hdf5
Epoch 5/10
Epoch 5: val_accuracy improved from 0.38406 to 0.45376, saving model to ./src\best_model.hdf5
Epoch 6/10
Epoch 6: val_accuracy did not improve from 0.45376
Epoch 7/10
Epoch 7: val_accuracy improved from 0.45376 to 0.46570, saving model to ./src\best_model.hdf5
Epoch 8/10
Epoch 8: val_accuracy improved from 0.46570 to 0.57490, saving model to ./src\best_model.hdf5
Epoch 9/10
Epoch 9: val_accuracy did not improve from 0.57490
Epoch 10/10
Epoch 10: val_accuracy improved from 0.57490 to 0.60899, saving model to ./src\best_model.hdf5


In [14]:
# Save the best model

model_json = model.to_json()
with open("./src/best_model.json",'w') as json_file:
    json_file.write(model_json)