In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Activation, Add
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
# Define the input shape based on your dataset's image dimensions
input_shape = (48, 48, 1)




In [2]:
import os
import zipfile
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [3]:
dataset_path = "/kaggle/input/fer2013-cleaned-dataset/images1"
batch_size = 64

In [4]:
from keras.models import Sequential
import numpy as np
import tensorflow as tf
np.random.seed(42)
tf.random.set_seed(42)

In [6]:
from keras.layers import Layer, Input, Conv2D, BatchNormalization, Activation, Add, MaxPooling2D, Flatten, Dense
from keras.models import Model

class SelfAttention(Layer):
    def __init__(self, channels):
        super(SelfAttention, self).__init__()
        self.channels = channels

        # Query, Key, and Value transformations
        self.W_q = Conv2D(channels // 8, (1, 1), padding='same')
        self.W_k = Conv2D(channels // 8, (1, 1), padding='same')
        self.W_v = Conv2D(channels, (1, 1), padding='same')

    def call(self, x):
        q = self.W_q(x)
        k = self.W_k(x)
        v = self.W_v(x)

        # Reshape for compatibility with matrix multiplication
        q = tf.reshape(q, [-1, tf.shape(q)[1] * tf.shape(q)[2], self.channels // 8])
        k = tf.reshape(k, [-1, tf.shape(k)[1] * tf.shape(k)[2], self.channels // 8])
        v = tf.reshape(v, [-1, tf.shape(v)[1] * tf.shape(v)[2], self.channels])

        # Attention weights
        attention_weights = tf.nn.softmax(tf.matmul(q, k, transpose_b=True) / tf.math.sqrt(tf.cast(tf.shape(k)[-1], tf.float32)))

        # Weighted sum
        output = tf.matmul(attention_weights, v)

        # Reshape back to the original spatial dimensions
        output = tf.reshape(output, [-1, tf.shape(x)[1], tf.shape(x)[2], self.channels])

        return output

def residual_block_with_attention(x, filters, kernel_size=(3, 3), stride=(1, 1), padding='same'):
    shortcut = x

    # First convolution layer
    x = Conv2D(filters, kernel_size, strides=stride, padding=padding)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

    # Self-Attention layer
    x_att = SelfAttention(filters)(x)

    # Combine original features and attention features
    x = Add()([x, x_att])

    # Second convolution layer
    x = Conv2D(filters, kernel_size, padding=padding)(x)
    x = BatchNormalization()(x)

    # Add the shortcut to the output
    x = Add()([x, shortcut])
    x = Activation('relu')(x)

    return x

# Build the model with self-attention
def build_vgg_with_resnet_and_attention(input_shape, num_classes):
    input_layer = Input(shape=input_shape)

    # VGG-like convolutional layers with residual blocks and self-attention
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(input_layer)
    x = residual_block_with_attention(x, 64)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = residual_block_with_attention(x, 128)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(256, (3, 3), padding='same', activation='relu')(x)
    x = residual_block_with_attention(x, 256)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Conv2D(512, (3, 3), padding='same', activation='relu')(x)
    x = residual_block_with_attention(x, 512)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    # Flatten and add fully connected layers
    x = Flatten()(x)
    x = Dense(512, activation='relu')(x)
    x = Dense(512, activation='relu')(x)

    # Output layer
    output_layer = Dense(num_classes, activation='softmax')(x)

    # Create and compile the model
    model = Model(inputs=input_layer, outputs=output_layer)

    return model

# Define the number of classes in your dataset
num_classes = 7  # Assuming FER 2013 has 7 emotion classes

# Build the model with self-attention
model = build_vgg_with_resnet_and_attention(input_shape, num_classes)

# Compile the model with appropriate loss and optimizer

# Print a summary of the model architecture
model.summary()


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 48, 48, 1)]          0         []                            
                                                                                                  
 conv2d_12 (Conv2D)          (None, 48, 48, 64)           640       ['input_2[0][0]']             
                                                                                                  
 conv2d_13 (Conv2D)          (None, 48, 48, 64)           36928     ['conv2d_12[0][0]']           
                                                                                                  
 batch_normalization_8 (Bat  (None, 48, 48, 64)           256       ['conv2d_13[0][0]']           
 chNormalization)                                                                           

In [7]:
train_data_generator = ImageDataGenerator(rescale=1./255)
test_data_generator = ImageDataGenerator(rescale=1./255)

train_data = train_data_generator.flow_from_directory(
    os.path.join(dataset_path, 'train'),
    color_mode='grayscale',
    target_size=input_shape[:2],
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

test_data = test_data_generator.flow_from_directory(
    os.path.join(dataset_path, 'test'),
    color_mode='grayscale',
    target_size=input_shape[:2],
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

Found 28044 images belonging to 7 classes.
Found 7177 images belonging to 7 classes.


In [8]:
num_train_samples = 28044
num_test_samples = 7177

In [9]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [10]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

In [11]:
history = model.fit(
    train_data,
    steps_per_epoch=num_train_samples // batch_size,
    epochs=10,
    callbacks=[early_stopping],
    batch_size=batch_size,
    validation_data=test_data
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
y_true = test_data.classes  # Assuming you're using flow_from_directory
predictions = model.predict(test_data)

# Convert predictions to class labels
y_pred = np.argmax(predictions, axis=1)

# Compute metrics
accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='weighted')
recall = recall_score(y_true, y_pred, average='weighted')
f1 = f1_score(y_true, y_pred, average='weighted')

# If binary classification, compute AUC  # AUC is not applicable for multi-class

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)

Accuracy: 0.5053643583670057
Precision: 0.5142106091318197
Recall: 0.5053643583670057
F1 Score: 0.4969658092571358
