**Imports**

In [None]:
import numpy as np 
import pandas as pd 

import tensorflow as tf
from tensorflow import keras
from keras import backend as K 

from scipy.stats import chisquare
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import gc

from numpy.random import seed
seed(42)
tf.random.set_seed(42)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
data_file= "../input/facial-expression/fer2013/fer2013.csv"
data= pd.read_csv(data_file)

print(os.listdir("../input"))

**Preprocessing**

In [None]:
#Chi-Squared with Uniform Distribution
target_OHE= pd.get_dummies(data.emotion, prefix='emotion')
target_count=target_OHE.sum()
print(target_count)
target_count.plot(kind='bar')

print("Chi-Squared for initial Dataset")
chisquare(target_count)

In [None]:
"""
#Subset for Hyperopt
ss_data= pd.DataFrame()

for i in range(0, 7):
    if i in [2, 3, 4, 6]:
        ss_data= ss_data.append(data.loc[data.emotion==i])
        
#    if i != 1:
#        ss_data= ss_data.append(data.loc[data.emotion==i].sample(3500))
        
#    if i==1:
#        ss_data=ss_data.append(data.loc[data.emotion==1].sample(547))    
        
data= ss_data
data= data.replace(2, 0)
data= data.replace(3, 1)
data= data.replace(4, 2)
data= data.replace(6, 3)

#Chi-Squared
target_OHE= pd.get_dummies(data.emotion, prefix='emotion')
target_count=target_OHE.sum()
print(target_count)
target_count.plot(kind='bar')

print("Chi-Squared for Dataset with 4 Classes")
chisquare(target_count) #f_exp to default/equally likely
"""

In [None]:
#Converting Data into Tensorflow proprietary Format
temp_gran_pixels= data['pixels'].str.split(" ", expand= True)
temp_gran_pixels= temp_gran_pixels.replace('%','',regex=True).astype('float')/255

data= data.join(temp_gran_pixels, how= 'left')
data= data.drop(columns=['pixels', 'Usage'])

df_train_features, df_test_features, df_train_label, df_test_label= \
    train_test_split(data.copy().drop(columns=['emotion']), data.emotion, test_size=0.15, random_state=42, shuffle= True)

df_train_features= np.array(df_train_features).copy()
df_test_features= np.array(df_test_features).copy()
df_train_label= np.array(df_train_label).copy()
df_test_label= np.array(df_test_label).copy()

df_train_features= df_train_features.reshape(df_train_features.shape[0], 48, 48, 1)
df_test_features= df_test_features.reshape(df_test_features.shape[0], 48, 48, 1)

del data
del data_file
del target_OHE
gc.collect() 

#One-Hot-Encode Labels
df_train_label= tf.keras.utils.to_categorical(df_train_label)
df_test_label= tf.keras.utils.to_categorical(df_test_label)

**Final Model**

In [None]:
model_two = tf.keras.Sequential()  

#INPUT
model_two.add(tf.keras.layers.Conv2D(32, (3, 3), padding= 'SAME', strides=(1, 1), use_bias= True, kernel_regularizer=keras.regularizers.l2(0.0001), input_shape=(48, 48, 1))) #48, 48, 1
model_two.add(tf.keras.layers.BatchNormalization())
model_two.add(tf.keras.layers.Activation("relu"))
model_two.add(tf.keras.layers.MaxPooling2D((2, 2), padding= 'SAME'))

#BETWEEN
#No Dropout after Input-Layer model_two.add(tf.keras.layers.Dropout(rate=0.2)) #lower probability of getting disconnected after input layer

#HIDDEN 1
model_two.add(tf.keras.layers.Conv2D(64, (3, 3), padding= 'SAME', use_bias= True, kernel_regularizer=keras.regularizers.l2(0.0001), strides=(1, 1)))
model_two.add(tf.keras.layers.BatchNormalization())
model_two.add(tf.keras.layers.Activation("relu"))
model_two.add(tf.keras.layers.MaxPooling2D((2, 2), padding= 'SAME'))

#BETWEEN
model_two.add(tf.keras.layers.BatchNormalization())
model_two.add(tf.keras.layers.Dropout(rate=0.5))

#HIDDEN 2
model_two.add(tf.keras.layers.Conv2D(128, (3, 3), padding= 'SAME', use_bias= True, kernel_regularizer=keras.regularizers.l2(0.0001), strides=(1, 1)))
model_two.add(tf.keras.layers.BatchNormalization())
model_two.add(tf.keras.layers.Activation("relu"))
model_two.add(tf.keras.layers.MaxPooling2D((2, 2), padding= 'SAME'))

#BETWEEN
model_two.add(tf.keras.layers.BatchNormalization())
model_two.add(tf.keras.layers.Dropout(rate=0.5))

#OUTPUT
model_two.add(tf.keras.layers.Flatten())
model_two.add(tf.keras.layers.Dense(32))
model_two.add(tf.keras.layers.BatchNormalization())
model_two.add(tf.keras.layers.Activation("relu"))
model_two.add(tf.keras.layers.Dropout(rate=0.5))
model_two.add(tf.keras.layers.Dense(7, activation= tf.nn.softmax))

model_two.summary()

model_two.compile(loss='categorical_crossentropy',
                  optimizer=tf.keras.optimizers.RMSprop(learning_rate=0.001),
                  metrics=['accuracy'])

#Train theModel
EPOCHS=8
BATCH_SIZE=128

history= model_two.fit(df_train_features, 
                       df_train_label, 
                       epochs= EPOCHS, 
                       batch_size= BATCH_SIZE,
                       validation_split= 0.18, #(0.15/0.85) #makes comparision of epochs more efficient
                       shuffle=True)

print('\nhistory dict:', history.history)

#Export Model
model_json = model_two.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)

model_two.save_weights("model.h5")
print("Saved model")

test_loss, test_accuracy= model_two.evaluate(df_test_features, df_test_label)
print("Test Loss= " + str(test_loss))
print("Test Accuracy= " + str(test_accuracy))


# Accuracy per Epoch
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# Loss per Epoch
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

K.clear_session()

**Hyperparameter-Optimization**

In [None]:
"""
from hyperopt import STATUS_OK, Trials, fmin, hp, tpe
import numpy as np

filters= [32, 64, 128, 256, 512]

def optimize(params):
    #This is the optimization function that given a space (space here) of 
    #hyperparameters and a scoring function (score here), finds the best hyperparameters.

    # To learn more about XGBoost parameters, head to this page: 
    # https://github.com/dmlc/xgboost/blob/master/doc/parameter.md
    space = params
    
    # Use the fmin function from Hyperopt to find the best hyperparameters (min, cause score is 1-accuracy)
    best = fmin(score, 
                space, 
                algo=tpe.suggest, 
                max_evals=15
                #max_evals=50
               )
    
    return best

def score(params):
    print(params)
    temp_model= build_model(params)
    
    history= temp_model.fit(df_train_features, 
                            df_train_label, 
                            #epochs= 1,
                            epochs= int(params["hp_EPOCHS"]), 
                            batch_size= int(params["hp_BATCH_SIZE"]),
                            #validation_split= 0.999,
                            validation_split= 0.18, #(0.15/0.85) #makes comparision of epochs more efficient
                            #validation_data= (df_test_features, df_test_label), #display val
                            shuffle=True)
    
    
    
    print('\nhistory dict:', history.history)
    
    score= history.history['val_accuracy']
    score= score[0]
    loss= 1-float(score)
    
    return {'loss': loss, 'status': STATUS_OK}

def build_model(params):
    model_hype = tf.keras.Sequential()
    
    #INPUT & HIDDEN
    for i in range(0, (int(params["hp_HIDDEN_LAYERS"]) +1)):
                  model_hype= build_layer(model_hype, params, i)
                  
                  if i != 0:
                      model_hype= build_between(model_hype, params)
                  
    #OUTPUT
    model_hype= build_output(model_hype, params)
    model_hype.compile(loss='categorical_crossentropy',
                  optimizer=tf.keras.optimizers.RMSprop(learning_rate= params["hp_LEARNING_RATE"]),
                  #learning_rate=params["hp_LEARNING_RATE"],
                  metrics=['accuracy'])
    
    model_hype.summary()
    
    return model_hype

def build_layer(model, params, nth_layer):
    if nth_layer== 0:
        model.add(tf.keras.layers.Conv2D(#params["hp_FILTERS"][nth_layer], 
                                         filters[nth_layer],
                                         params["hp_KERNEL_SIZE"], 
                                         padding= params["hp_PADDING"], 
                                         strides=params["hp_STRIDES"], 
                                         use_bias= params["hp_USE_BIAS"], 
                                         kernel_regularizer=keras.regularizers.l2(params["hp_KERNEL_REGULARIZER"]),
                                         input_shape=(48, 48, 1))) #48, 48, 1
                  
    if nth_layer!= 0:   
        model.add(tf.keras.layers.Conv2D(#params["hp_FILTERS"][nth_layer], 
                                         filters[nth_layer],
                                         params["hp_KERNEL_SIZE"], 
                                         padding= params["hp_PADDING"], 
                                         strides=params["hp_STRIDES"], 
                                         use_bias= params["hp_USE_BIAS"], 
                                         kernel_regularizer=keras.regularizers.l2(params["hp_KERNEL_REGULARIZER"])))
    
    if params["hp_USE_SECOND_CONV2D"] is True:
        model.add(tf.keras.layers.Conv2D(#params["hp_FILTERS"][nth_layer],
                                         filters[nth_layer],
                                         params["hp_KERNEL_SIZE"], 
                                         padding= params["hp_PADDING"], 
                                         strides=params["hp_STRIDES"], 
                                         use_bias= params["hp_USE_BIAS"], 
                                         kernel_regularizer=keras.regularizers.l2(params["hp_KERNEL_REGULARIZER"])))

    if params["hp_USE_BATCHNORM"] is True & params["hp_BATCHNORM_FIRST"] is True:
        model.add(tf.keras.layers.BatchNormalization())
    
    model.add(tf.keras.layers.Activation(params["hp_FUNCTION_TYPE"]))
                                         
    if params["hp_USE_BATCHNORM"] is True & params["hp_BATCHNORM_FIRST"] is False:
        model.add(tf.keras.layers.BatchNormalization())
    
    if params["hp_USE_POOLING"] is True:
        model.add(tf.keras.layers.MaxPooling2D(params["hp_POOL_SIZE"], 
                                               padding= params["hp_PADDING"]))
                  
    return model

def build_between(model, params):
    if params["hp_USE_BATCHNORM_BTW"] is True:
        model.add(tf.keras.layers.BatchNormalization())
                                         
    if params["hp_USE_DROPOUT"] is True:
        model.add(tf.keras.layers.Dropout(rate=params["hp_DROPOUT_RATE"]))
                                         
    return model

def build_output(model, params):
    model.add(tf.keras.layers.Flatten())
                                         
    if params["hp_USE_SECOND_DENSE"] is True:
        model.add(tf.keras.layers.Dense(params["hp_DENSE_DIMENSIONALITY"]))
                                         
    if params["hp_USE_BATCHNORM"] is True & params["hp_BATCHNORM_FIRST"] is True:
        model.add(tf.keras.layers.BatchNormalization())
                                         
    model.add(tf.keras.layers.Activation(params["hp_LAST_ACTIVATION"]))
                                         
    if params["hp_USE_BATCHNORM"] is True & params["hp_BATCHNORM_FIRST"] is False:
        model.add(tf.keras.layers.BatchNormalization())
               
    if params["hp_USE_DROPOUT"] is True:                                         
        model.add(tf.keras.layers.Dropout(rate=params["hp_DROPOUT_RATE"]))
                                         
    model.add(tf.keras.layers.Dense(7, activation= tf.nn.softmax)) 
                                         
    return model

#Initial Hyperparameter Boundaries
parameter_space= {
    'hp_HIDDEN_LAYERS': hp.quniform('hp_HIDDEN_LAYERS', 2, 5, 1), #scope


    #USE OF LAYERS
    'hp_USE_BATCHNORM': hp.choice('hp_USE_BATCHNORM', [True
                                                       #, False
                                                      ]),
    'hp_USE_POOLING': hp.choice('hp_USE_POOLING', [True
                                                   #, False
                                                  ]),
    'hp_USE_BATCHNORM_BTW': hp.choice('hp_USE_BATCHNORM_BTW', [True
                                                               #, False
                                                              ]),
    'hp_USE_DROPOUT': hp.choice('hp_USE_DROPOUT', [True
                                                   #, False
                                                  ]),
                                
    'hp_USE_SECOND_CONV2D': hp.choice('hp_USE_SECOND_CONV2D', [True
                                                               #, False
                                                              ]),
    'hp_USE_SECOND_DENSE': hp.choice('hp_USE_SECOND_DENSE', [True, False
                                                            ]),

    #ORDER OF LAYERS
    'hp_BATCHNORM_FIRST': hp.choice('hp_BATCHNORM_FIRST', [True
                                                           #, False
                                                          ]),
    
    
    #CONV-2D-PARAMS
    #'hp_FILTERS': hp.choice('hp_FILTERS', 
    #            [[32, 64, 128, 256, 512], 
    #              [64, 128, 256, 512, 2014], 
    #              [128, 256, 512, 1024, 2056]]),
    'hp_KERNEL_SIZE': hp.choice('hp_KERNEL_SIZE', 
                                [
                                #(1, 1), 
                                 (3, 3), 
                                 #(5, 5)
                                ]),
    'hp_PADDING': hp.choice('hp_PADDING', ['SAME']),
    'hp_STRIDES': hp.choice('hp_STRIDES', [(1, 1)]), #Just one alternative, reducing output with Max2DPooling instead
    'hp_USE_BIAS': hp.choice('hp_USE_BIAS', [True, 
                                             #False
                                            ]),
    'hp_KERNEL_REGULARIZER': hp.loguniform('hp_KERNEL_REGULARIZER', np.log(0.0001), np.log(0.1)),
    
    #BATCH-NORM-PARAMS
    #-
    
    #ACTIVATION-PARAMS
    'hp_FUNCTION_TYPE': hp.choice('hp_FUNCTION_TYPE', ['relu']),
    'hp_LAST_ACTIVATION': hp.choice('hp_LAST_ACTIVATION', ['relu', 'sigmoid']),
    
    #MAX-POOLING-2D-PARAMS
    'hp_POOL_SIZE': hp.choice('hp_POOL_SIZE', [(2, 2)]),
    
    #DROPOUT-PARAMS
    'hp_DROPOUT_RATE': hp.uniform('hp_DROPOUT_RATE', 0.3, 0.7),
    
    #FLATTEN-PARAMS
    
    #DENSE-PARAMS
    'hp_DENSE_DIMENSIONALITY': hp.choice('hp_DENSE_DIMENSIONALITY', [32, 64, 128]),
    
    #COMPILE-PARAMS
    #'hp_OMPITMIZER': hp.choice('hp_OMPITMIZER', ['rmsprop', 'adam']),
    'hp_OMPITMIZER': hp.choice('hp_OMPITMIZER', [tf.keras.optimizers.RMSprop(), tf.keras.optimizers.Adam()]),
    
    #'hp_EPOCHS': hp.quniform('hp_EPOCHS', 5, 50, 1),
    'hp_EPOCHS': hp.choice('hp_EPOCHS', [6]),
    'hp_BATCH_SIZE': hp.choice('hp_BATCH_SIZE', [128]),
    'hp_LEARNING_RATE': hp.loguniform('hp_LEARNING_RATE', np.log(0.0001), np.log(0.1)),
    }

opt_hyperparams = optimize(parameter_space)
print(opt_hyperparams)
"""