In [19]:
from PIL import Image
import os
from os import listdir

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px


import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical

from sklearn.metrics import confusion_matrix , classification_report 
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import roc_curve, auc, roc_auc_score

from IPython.display import clear_output
import warnings
warnings.filterwarnings('ignore')

In [21]:
train_dir = "/kaggle/working/train"
test_dir = "/kaggle/working/test"


SEED = 12
IMG_HEIGHT = 125
IMG_WIDTH = 125
BATCH_SIZE = 32
EPOCHS = 30
FINE_TUNING_EPOCHS = 20
LR = 0.01
NUM_CLASSES = 8
EARLY_STOPPING_CRITERIA=4
CLASS_LABELS = ['anger', 'contempt', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [22]:
data = pd.read_csv("/kaggle/input/aidentify-project-data/AIdentify_filenames.csv")

In [23]:
emotions_list = data.emotion.unique()#.to_list()
emotions_dictionary = {}
for i in range(len(emotions_list)):
    emotions_dictionary[emotions_list[i]] = i
print(emotions_dictionary)
parent_dir = "/kaggle/working/"
os.mkdir(parent_dir+"/train")
os.mkdir(parent_dir+"/test")
for i in emotions_dictionary.keys():
    path = os.path.join(parent_dir+"/train", i)
    os.mkdir(path)
    print("Directory '% s' created" % i)
for i in emotions_dictionary.keys():
    path = os.path.join(parent_dir+"/test", i)
    os.mkdir(path)
    print("Directory '% s' created" % i)

{'anger': 0, 'contempt': 1, 'disgust': 2, 'fear': 3, 'happy': 4, 'neutral': 5, 'sad': 6, 'surprise': 7}


FileExistsError: [Errno 17] File exists: '/kaggle/working//train'

In [24]:
def load_images():
    imgs = []
    labels = []
    data_path = '/kaggle/input/aidentify-project-data/AIdentify_image_bag/AIdentify_image_bag/'
    i = 0
    for images_name in os.listdir(data_path):
        image = Image.open(data_path+images_name)
        image = image.resize((IMG_HEIGHT, IMG_WIDTH))
        image = image.convert('RGB')
        tmp = np.array(image)
        imgs.append((tmp,images_name))
        labels.append(data[data.image_filename == images_name].emotion.to_list()[0])
        if i % 1000 == 0:
            print(i)
        i += 1

    X = np.array(imgs)
    return X, labels    

In [25]:
X,y = load_images()

0
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000
19000
20000
21000
22000
23000
24000
25000
26000
27000
28000
29000
30000
31000
32000
33000
34000
35000
36000
37000
38000
39000
40000
41000
42000
43000


In [None]:
def split_train_test_val(X,y):    
    num_rows = len(y)


    # Finally we shuffle:
    p = np.random.permutation(len(X))
    #X, y = X[p], y[p]


    first_split = int(num_rows /8.)
    X_test, X_train = X[:first_split], X[first_split:]
    y_test, y_train = y[:first_split], y[first_split:]
    
    return X_train, y_train, X_test, y_test

In [None]:
X_train, y_train, X_test, y_test = split_train_test_val(X,y)

In [None]:
output_data_path = "/kaggle/working/train"
for i in range(len(X_train)):
    output_path = f"{output_data_path}/{y_train[i]}/{X_train[i][1]}"
    Image.fromarray(X_train[i][0]).save(output_path)

In [None]:
output_data_path = "/kaggle/working/test"
for i in range(len(X_test)):
    output_path = f"{output_data_path}/{y_test[i]}/{X_test[i][1]}"
    Image.fromarray(X_test[i][0]).save(output_path)

In [26]:
preprocess_fun = tf.keras.applications.densenet.preprocess_input

train_datagen = ImageDataGenerator(horizontal_flip=True,
                                   width_shift_range=0.1,
                                   height_shift_range=0.05,
                                   rescale = 1./255,
                                   validation_split = 0.2,
                                   preprocessing_function=preprocess_fun
                                  )
test_datagen = ImageDataGenerator(rescale = 1./255,
                                  validation_split = 0.2,
                                  preprocessing_function=preprocess_fun)

train_generator = train_datagen.flow_from_directory(directory = train_dir,
                                                    target_size = (IMG_HEIGHT ,IMG_WIDTH),
                                                    batch_size = BATCH_SIZE,
                                                    shuffle  = True , 
                                                    color_mode = "rgb",
                                                    class_mode = "categorical",
                                                    subset = "training",
                                                    seed = 12
                                                   )

validation_generator = test_datagen.flow_from_directory(directory = train_dir,
                                                         target_size = (IMG_HEIGHT ,IMG_WIDTH),
                                                         batch_size = BATCH_SIZE,
                                                         shuffle  = True , 
                                                         color_mode = "rgb",
                                                         class_mode = "categorical",
                                                         subset = "validation",
                                                         seed = 12
                                                        )

test_generator = test_datagen.flow_from_directory(directory = test_dir,
                                                   target_size = (IMG_HEIGHT ,IMG_WIDTH),
                                                    batch_size = BATCH_SIZE,
                                                    shuffle  = False , 
                                                    color_mode = "rgb",
                                                    class_mode = "categorical",
                                                    seed = 12
                                                  )

Found 30177 images belonging to 8 classes.
Found 7542 images belonging to 8 classes.
Found 5388 images belonging to 8 classes.


In [None]:
fig = px.bar(x = CLASS_LABELS,
             y = [list(train_generator.classes).count(i) for i in np.unique(train_generator.classes)] , 
             color = np.unique(train_generator.classes) ,
             color_continuous_scale="Emrld") 
fig.update_xaxes(title="Emotions")
fig.update_yaxes(title = "Number of Images")
fig.update_layout(showlegend = True,
    title = {
        'text': 'Train Data Distribution ',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()

In [27]:
def feature_extractor(inputs):
    feature_extractor = tf.keras.applications.DenseNet169(input_shape=(IMG_HEIGHT,IMG_WIDTH, 3),
                                               include_top=False,
                                               weights="imagenet")(inputs)
    
    return feature_extractor

def classifier(inputs):
    x = tf.keras.layers.GlobalAveragePooling2D()(inputs)
    x = tf.keras.layers.Dense(256, activation="relu", kernel_regularizer = tf.keras.regularizers.l2(0.01))(x)
    x = tf.keras.layers.Dropout(0.3)(x)
    x = tf.keras.layers.Dense(1024, activation="relu", kernel_regularizer = tf.keras.regularizers.l2(0.01))(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(512, activation="relu", kernel_regularizer = tf.keras.regularizers.l2(0.01))(x)
    x = tf.keras.layers.Dropout(0.5) (x)
    x = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax", name="classification")(x)
    
    return x

def final_model(inputs):
    densenet_feature_extractor = feature_extractor(inputs)
    classification_output = classifier(densenet_feature_extractor)
    
    return classification_output

def define_compile_model():
    
    inputs = tf.keras.layers.Input(shape=(IMG_HEIGHT ,IMG_WIDTH,3))
    classification_output = final_model(inputs) 
    model = tf.keras.Model(inputs=inputs, outputs = classification_output)
     
    model.compile(optimizer=tf.keras.optimizers.SGD(0.1), 
                loss='categorical_crossentropy',
                metrics = ['accuracy'])
  
    return model

In [28]:
model = define_compile_model()
clear_output()

# Feezing the feature extraction layers
model.layers[1].trainable = False

model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 125, 125, 3)]     0         
_________________________________________________________________
densenet169 (Functional)     (None, 4, 4, 1664)        12642880  
_________________________________________________________________
global_average_pooling2d_1 ( (None, 1664)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               426240    
_________________________________________________________________
dropout_3 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              263168    
_________________________________________________________________
dropout_4 (Dropout)          (None, 1024)              0   

In [29]:
earlyStoppingCallback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                                         patience=EARLY_STOPPING_CRITERIA,
                                                         verbose= 1 ,
                                                         restore_best_weights=True
                                                        )

history = model.fit(x = train_generator,
                    epochs = EPOCHS ,
                    validation_data = validation_generator , 
                    callbacks= [earlyStoppingCallback])

history = pd.DataFrame(history.history)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Restoring model weights from the end of the best epoch.
Epoch 00006: early stopping


In [30]:
# Un-Freezing the feature extraction layers for fine tuning 
model.layers[1].trainable = True

model.compile(optimizer=tf.keras.optimizers.SGD(0.001), #lower learning rate
                loss='categorical_crossentropy',
                metrics = ['accuracy'])

history_ = model.fit(x = train_generator,epochs = FINE_TUNING_EPOCHS ,validation_data = validation_generator)
history = history.append(pd.DataFrame(history_.history) , ignore_index=True)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [31]:
model.evaluate(test_generator)
preds = model.predict(test_generator)
y_preds = np.argmax(preds , axis = 1 )
y_test = np.array(test_generator.labels)



In [None]:
plt.plot(history);

In [32]:
model.save("model_DenseNet169")

2022-12-02 10:02:13.099977: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them.
