In [1]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import load_img, ImageDataGenerator
from tensorflow.keras.models import Sequential, model_from_json
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D, LSTM, TimeDistributed
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm


In [2]:

# Directories for training and testing data
TRAIN_DIR = 'images/train'
TEST_DIR = 'images/validation'


In [3]:
# Function to create a dataframe for images and their labels
def createdataframe(dir):
    image_paths = []
    labels = []
    for label in os.listdir(dir):
        for imagename in os.listdir(os.path.join(dir, label)):
            image_paths.append(os.path.join(dir, label, imagename))
            labels.append(label)
        print(label, "completed")
    return image_paths, labels


In [4]:

# Create dataframe for training and testing data
train = pd.DataFrame()
train['image'], train['label'] = createdataframe(TRAIN_DIR)

test = pd.DataFrame()
test['image'], test['label'] = createdataframe(TEST_DIR)


angry completed
disgust completed
fear completed
happy completed
neutral completed
sad completed
surprise completed
angry completed
disgust completed
fear completed
happy completed
neutral completed
sad completed
surprise completed


In [5]:

# Function to extract features from images
def extract_features(images):
    features = []
    for image in tqdm(images):
        img = load_img(image, color_mode='grayscale')
        img = np.array(img)
        features.append(img)
    features = np.array(features)
    features = features.reshape(len(features), 48, 48, 1)
    return features

In [6]:

# Extract features for training and testing datasets
train_features = extract_features(train['image'])
test_features = extract_features(test['image'])

# Normalize the data by dividing by 255
x_train = train_features / 255.0
x_test = test_features / 255.0



100%|██████████| 28821/28821 [02:19<00:00, 207.09it/s]
100%|██████████| 7066/7066 [00:37<00:00, 189.82it/s]


In [7]:

# Label encoding and one-hot encoding the labels
le = LabelEncoder()
le.fit(train['label'])

y_train = le.transform(train['label'])
y_test = le.transform(test['label'])

y_train = to_categorical(y_train, num_classes=7)
y_test = to_categorical(y_test, num_classes=7)


In [8]:

# Function to create sequences for LSTM
def create_sequences(features, sequence_length):
    sequences = []
    for i in range(len(features) - sequence_length + 1):
        sequences.append(features[i:i + sequence_length])
    return np.array(sequences)


In [9]:

# Function to apply augmentation to each frame and then create sequences
def augment_and_create_sequences(features, labels, sequence_length, datagen, batch_size):
    augmented_features = []
    augmented_labels = []
    num_samples = len(features)

    for i in range(0, num_samples, batch_size):
        # Extract a batch
        batch_x = features[i:i + batch_size]
        batch_y = labels[i:i + batch_size]
        
        # Apply augmentation to each image in the batch
        for j in range(len(batch_x)):
            # Expand the dimensions to (1, 48, 48, 1) for augmentation
            image = batch_x[j].reshape(1, 48, 48, 1)
            aug_iter = datagen.flow(image, batch_size=1)
            aug_image = next(aug_iter)[0]  # Get the augmented image
            
            augmented_features.append(aug_image)
            augmented_labels.append(batch_y[j])
    
    # Convert to numpy arrays
    augmented_features = np.array(augmented_features)
    augmented_labels = np.array(augmented_labels)

    # Now create sequences from the augmented images
    x_seq = create_sequences(augmented_features, sequence_length)
    y_seq = augmented_labels[:len(x_seq)]
    
    return x_seq, y_seq

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

sequence_length = 5  # Adjust sequence length as per your data


In [10]:

# Apply augmentation and create sequences for training data
x_train_seq, y_train_seq = augment_and_create_sequences(x_train, y_train, sequence_length, datagen, batch_size=128)

# No data augmentation for the test set
x_test_seq = create_sequences(x_test, sequence_length)
y_test_seq = y_test[:len(x_test_seq)]


In [11]:
# CNN-LSTM model definition
model = Sequential()

# CNN layers (for spatial feature extraction)
model.add(TimeDistributed(Conv2D(128, kernel_size=(3, 3), activation='relu'), input_shape=(sequence_length, 48, 48, 1)))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Dropout(0.4)))

model.add(TimeDistributed(Conv2D(256, kernel_size=(3, 3), activation='relu')))
model.add(TimeDistributed(MaxPooling2D(pool_size=(2, 2))))
model.add(TimeDistributed(Dropout(0.4)))

# Flatten the output of CNN
model.add(TimeDistributed(Flatten()))

# LSTM layer for temporal pattern recognition
model.add(LSTM(512, return_sequences=False))

# Dense layers
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))

# Output layer
model.add(Dense(7, activation='softmax'))


  super().__init__(**kwargs)


In [12]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [13]:
# Train the model with augmented sequence data
model.fit(x_train_seq, y_train_seq, epochs=15, batch_size=128, validation_data=(x_test_seq, y_test_seq))


Epoch 1/15
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2213s[0m 10s/step - accuracy: 0.2364 - loss: 1.8320 - val_accuracy: 0.3352 - val_loss: 1.6148
Epoch 2/15
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1396s[0m 6s/step - accuracy: 0.3310 - loss: 1.6511 - val_accuracy: 0.3676 - val_loss: 1.5118
Epoch 3/15
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1264s[0m 6s/step - accuracy: 0.4054 - loss: 1.5102 - val_accuracy: 0.4919 - val_loss: 1.2712
Epoch 4/15
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1261s[0m 6s/step - accuracy: 0.5229 - loss: 1.2799 - val_accuracy: 0.3973 - val_loss: 1.9010
Epoch 5/15
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1234s[0m 5s/step - accuracy: 0.6929 - loss: 0.8609 - val_accuracy: 0.5347 - val_loss: 1.6833
Epoch 6/15
[1m226/226[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1219s[0m 5s/step - accuracy: 0.8833 - loss: 0.3455 - val_accuracy: 0.4884 - val_loss: 2.4511
Epoch 7/15
[1m

<keras.src.callbacks.history.History at 0x267b65e7ce0>

In [14]:
# Save the model
model_json = model.to_json()
with open("emotiondetector_lstm.json", 'w') as json_file:
    json_file.write(model_json)
model.save("emotiondetector_lstm.h5")




In [15]:

# Load the saved model and weights
json_file = open("emotiondetector_lstm.json", "r")
model_json = json_file.read()
json_file.close()
model = model_from_json(model_json)
model.load_weights("emotiondetector_lstm.h5")


In [16]:

# Prediction function
label = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

def ef(image):
    img = load_img(image, color_mode='grayscale')
    feature = np.array(img)
    feature = feature.reshape(1, 48, 48, 1)
    return feature / 255.0


In [17]:
# Example prediction
image = 'images/train/sad/42.jpg'
print("original image is of sad")
img = ef(image)
pred = model.predict(img)
pred_label = label[pred.argmax()]
print("model prediction is", pred_label)


original image is of sad


ValueError: Exception encountered when calling TimeDistributed.call().

[1mNegative dimension size caused by subtracting 3 from 1 for '{{node sequential_1/time_distributed_1/convolution}} = Conv2D[T=DT_FLOAT, data_format="NHWC", dilations=[1, 1, 1, 1], explicit_paddings=[], padding="VALID", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true](sequential_1/time_distributed_1/strided_slice, sequential_1/time_distributed_1/convolution/ReadVariableOp)' with input shapes: [1,48,1,1], [3,3,1,128].[0m

Arguments received by TimeDistributed.call():
  • inputs=tf.Tensor(shape=(1, 48, 48, 1, 1), dtype=float32)
  • training=False
  • mask=None