In [1]:
from Preprocess import Preprocess
import cv2
import pickle
from keras.utils import image_dataset_from_directory
from tqdm import tqdm
import shutil
import os
os.environ["KERAS_BACKEND"] = "tensorflow"
import keras
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score, recall_score
from sklearn.metrics import f1_score, accuracy_score

In [2]:
train_pictures_path = r".\GENKI-4K Face, Expression, and Pose Dataset\files"
cropped_pictures_dir = r".\cropped_pictures"
labels_file_path = r".\GENKI-4K Face, Expression, and Pose Dataset\labels.txt"
haarcascade_frontalface_default_file = "haarcascade_frontalface_default.xml"
preprocess_class_pickle = "preprocess_class.pkl"
labeled_pictures_folder = "labeled_pictures"

In [3]:
files = glob.glob(train_pictures_path+r"\*")
df_train = pd.DataFrame({'full_path': files})
file_name = df_train['full_path'].apply(lambda x: os.path.basename(x)) # TODO: get the painters' names from the file names
df_train['file_name'] = file_name
df_train

Unnamed: 0,full_path,file_name
0,".\GENKI-4K Face, Expression, and Pose Dataset\...",file0001.jpg
1,".\GENKI-4K Face, Expression, and Pose Dataset\...",file0002.jpg
2,".\GENKI-4K Face, Expression, and Pose Dataset\...",file0003.jpg
3,".\GENKI-4K Face, Expression, and Pose Dataset\...",file0004.jpg
4,".\GENKI-4K Face, Expression, and Pose Dataset\...",file0005.jpg
...,...,...
3995,".\GENKI-4K Face, Expression, and Pose Dataset\...",file3996.jpg
3996,".\GENKI-4K Face, Expression, and Pose Dataset\...",file3997.jpg
3997,".\GENKI-4K Face, Expression, and Pose Dataset\...",file3998.jpg
3998,".\GENKI-4K Face, Expression, and Pose Dataset\...",file3999.jpg


In [4]:
# ! pip install opencv-python --upgrade

## crop faces from pictures and save to cropped_pictures_dir

In [5]:
with open(preprocess_class_pickle, 'rb') as f:
    preprocess_class = pickle.load(f)

cropped_pictures = preprocess_class.data
picture_labels = preprocess_class.labels
cropped_pictures_base_paths = preprocess_class.base_paths

In [6]:
len(picture_labels)

4000

In [7]:
cropped_pictures_base_paths[:5]

['file0001.jpg',
 'file0002.jpg',
 'file0003.jpg',
 'file0004.jpg',
 'file0005.jpg']

In [8]:
cropped_pictures_file_paths = [os.path.join(cropped_pictures_dir, filename) for filename in cropped_pictures_base_paths]
cropped_pictures_file_paths[:5]

['.\\cropped_pictures\\file0001.jpg',
 '.\\cropped_pictures\\file0002.jpg',
 '.\\cropped_pictures\\file0003.jpg',
 '.\\cropped_pictures\\file0004.jpg',
 '.\\cropped_pictures\\file0005.jpg']

In [9]:
# if os.path.exists(labeled_pictures_folder):
#     try:
#         shutil.rmtree(labeled_pictures_folder)
#     except OSError as e:
#         print(f"Error deleting folder: {e}")
# os.makedirs(labeled_pictures_folder)

# for label in set(picture_labels):
#     os.makedirs(fr'{labeled_pictures_folder}\{label}', exist_ok=True)

# # move each image to its corresponding label folder
# for index,label in enumerate(picture_labels):
#     shutil.copy(cropped_pictures_file_paths[index], fr'{labeled_pictures_folder}\{label}')

In [10]:
# !gdown 1aqH0YQHn8pSpWmno8l3CZ8SGsLTRxKXq

In [11]:
# !unzip labeled_pictures.zip > /dev/null 2>&1

In [12]:
max_width = 256
max_height = 256

max_width, max_height

(256, 256)

In [13]:
SEED = 42

train_dataset = image_dataset_from_directory(
   directory= rf'./{labeled_pictures_folder}/',
   labels='inferred',
   label_mode='categorical',
   color_mode='rgb',
   shuffle = True,
   subset='training',
   validation_split=0.1,
   seed=SEED,
   batch_size=32,
   image_size=(max_width, max_height),
   interpolation='bilinear'
)

class_names = train_dataset.class_names
print('Class names:', class_names)


Found 4000 files belonging to 2 classes.
Using 3600 files for training.
Class names: ['0', '1']


In [14]:
val_dataset = image_dataset_from_directory(
   directory = rf'./{labeled_pictures_folder}/',
   labels = 'inferred' ,
   label_mode = 'categorical',
   color_mode ='rgb',
   shuffle = True,
   subset = 'validation',
   validation_split = 0.1,
   seed = SEED,
   batch_size = 32,
   image_size = (max_width, max_height),
   interpolation = 'bilinear'
)

Found 4000 files belonging to 2 classes.
Using 400 files for validation.


In [15]:
# from keras.applications.resnet50 import preprocess_input
# from keras.applications.resnet_v2 import preprocess_input
from keras.applications.efficientnet import preprocess_input

# Preprocess the data
train_dataset_preprocess = train_dataset.map(lambda x, y: (preprocess_input(x), y))
val_dataset_preprocess = val_dataset.map(lambda x, y: (preprocess_input(x), y))

In [16]:
import plotly.express as px

# metric: 'accuracy' or 'loss'
def display_curves(history, metric):
  df = pd.DataFrame(history.history[metric], columns=[metric])
  df['val_'+metric] = history.history['val_'+metric]
  fig = px.line(df, x= df.index+1, y= [metric, 'val_'+metric])
  fig.update_layout(xaxis_title='Epochs', yaxis_title=metric)
  fig.show()

In [25]:
model = keras.applications.EfficientNetB6(
    include_top=False,
    weights="imagenet",
    # input_tensor=None,
    input_shape=(max_width, max_height, 3),
    pooling="max",
    # classes=2,
    # classifier_activation="softmax",
    name="efficientnetb6" # TODO
)

In [26]:
model.layers

[<InputLayer name=input_layer_1, built=True>,
 <Rescaling name=rescaling_2, built=True>,
 <Normalization name=normalization_1, built=True>,
 <Rescaling name=rescaling_3, built=True>,
 <ZeroPadding2D name=stem_conv_pad, built=True>,
 <Conv2D name=stem_conv, built=True>,
 <BatchNormalization name=stem_bn, built=True>,
 <Activation name=stem_activation, built=True>,
 <DepthwiseConv2D name=block1a_dwconv, built=True>,
 <BatchNormalization name=block1a_bn, built=True>,
 <Activation name=block1a_activation, built=True>,
 <GlobalAveragePooling2D name=block1a_se_squeeze, built=True>,
 <Reshape name=block1a_se_reshape, built=True>,
 <Conv2D name=block1a_se_reduce, built=True>,
 <Conv2D name=block1a_se_expand, built=True>,
 <Multiply name=block1a_se_excite, built=True>,
 <Conv2D name=block1a_project_conv, built=True>,
 <BatchNormalization name=block1a_project_bn, built=True>,
 <DepthwiseConv2D name=block1b_dwconv, built=True>,
 <BatchNormalization name=block1b_bn, built=True>,
 <Activation name=

In [27]:
len(model.layers)

668

In [28]:
len(model.trainable_variables)

582

In [31]:
fine_tune_at = 300

# Freeze all the layers before the `fine_tune_at` layer
for layer in model.layers[:fine_tune_at]:
  layer.trainable = False
for layer in model.layers[fine_tune_at:]:
  layer.trainable = True    

In [32]:
len(model.trainable_variables)

322

In [33]:
x = keras.layers.Flatten()(model.output)
x = keras.layers.Dense(64,activation='relu', kernel_initializer="he_normal")(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Dense(128,activation='relu', kernel_initializer="he_normal")(x)
x = keras.layers.Dropout(0.3)(x)
x = keras.layers.Dense(2, activation='softmax')(x)
model = keras.models.Model(model.input, x)

In [34]:
model.summary()

In [39]:
model.compile(
    optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'precision', 'recall'] # TODO
)

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy')
history = model.fit(
   train_dataset_preprocess,
   epochs=100,
   validation_data=val_dataset_preprocess,
   callbacks=[early_stopping_cb]
)

Epoch 1/100


In [30]:
model.save("Smile_Detector_Model_EfficientNetB6.keras")

In [None]:
# model = keras.saving.load_model("Smile_Detector_Model.keras")

In [31]:
display_curves(history, 'accuracy')

In [32]:
display_curves(history, 'loss')

In [36]:
model.evaluate(val_dataset_preprocess)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 149ms/step - accuracy: 0.9300 - loss: 0.2791


[0.3784318268299103, 0.9325000047683716]

In [34]:
model.evaluate(val_dataset)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 103ms/step - accuracy: 0.8946 - loss: 0.3873


[0.38524097204208374, 0.9100000262260437]