In [1]:
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras import layers, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report


In [2]:
# Enable mixed precision for memory optimization
from tensorflow.keras.mixed_precision import set_global_policy
set_global_policy('mixed_float16')

# Configure GPU memory growth to avoid OOM errors
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

print("GPU configuration set.")


INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 3060 Laptop GPU, compute capability 8.6
GPU configuration set.
GPU configuration set.


In [3]:
# Dataset directories
dataset_path = r'D:\university\FER\fer_ckplus_kdef'
train_dir = f"{dataset_path}\\train"
val_dir = f"{dataset_path}\\val"
test_dir = f"{dataset_path}\\test"

# Image size and batch size optimized for 6GB GPU
img_size = (64, 64)
batch_size = 16  # Lower batch size to fit in memory
num_classes = 8


In [4]:
# Data augmentation for training set
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

val_test_datagen = ImageDataGenerator(rescale=1./255)

# Load data from directories
train_generator = train_datagen.flow_from_directory(
    train_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical'
)

val_generator = val_test_datagen.flow_from_directory(
    val_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical'
)

test_generator = val_test_datagen.flow_from_directory(
    test_dir, target_size=img_size, batch_size=batch_size, class_mode='categorical', shuffle=False
)


Found 23650 images belonging to 8 classes.
Found 2631 images belonging to 8 classes.
Found 6573 images belonging to 8 classes.


In [5]:
# Load EfficientNetB4 base model
efficientnet_base = EfficientNetB4(input_shape=(64, 64, 3), include_top=False, weights='imagenet')
efficientnet_base.trainable = False  # Freeze base model initially

# Vision Transformer-inspired feature extraction
def vit_layer(input_tensor):
    x = layers.Conv2D(64, (3, 3), padding="same", activation="relu")(input_tensor)
    x = layers.Flatten()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Reshape((16, 8))(x)  # Reshape for transformer compatibility
    x = layers.MultiHeadAttention(num_heads=8, key_dim=8)(x, x)
    x = layers.GlobalAveragePooling1D()(x)
    return x

# Input Layer
inputs = layers.Input(shape=(64, 64, 3))

# Extract features using EfficientNet and ViT
eff_net_features = efficientnet_base(inputs, training=False)
eff_net_features = layers.GlobalAveragePooling2D()(eff_net_features)

vit_features = vit_layer(inputs)

# Concatenate both models' outputs
merged_features = layers.concatenate([eff_net_features, vit_features])

# Fully connected layers
x = layers.Dense(256, activation='relu')(merged_features)
x = layers.Dropout(0.5)(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(num_classes, activation='softmax')(x)

# Define and compile the model
model = Model(inputs, x)

model.compile(optimizer=Adam(learning_rate=0.0001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 64, 64, 3)]  0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 64, 64, 64)   1792        ['input_2[0][0]']                
                                                                                                  
 flatten (Flatten)              (None, 262144)       0           ['conv2d[0][0]']                 
                                                                                                  
 dense (Dense)                  (None, 128)          33554560    ['flatten[0][0]']               

In [6]:
# Define callbacks for early stopping and learning rate reduction
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-6)

# Train the model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=30,
    callbacks=[early_stopping, reduce_lr]
)


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30

KeyboardInterrupt: 

In [None]:
# Evaluate on test set
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


In [None]:
# Predict test set
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = test_generator.classes

# Confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
class_labels = list(test_generator.class_indices.keys())

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_labels, yticklabels=class_labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Classification report
print("Classification Report:")
print(classification_report(y_true, y_pred_classes, target_names=class_labels))


In [None]:
from tensorflow.keras.preprocessing import image

def predict_emotion(img_path):
    img = image.load_img(img_path, target_size=img_size)
    img_array = image.img_to_array(img) / 255.0
    img_array = np.expand_dims(img_array, axis=0)

    prediction = model.predict(img_array)
    class_idx = np.argmax(prediction)

    class_labels = list(train_generator.class_indices.keys())
    return class_labels[class_idx]

# Example usage
img_path = r'D:\university\FER\fer_ckplus_kdef\test\happy\image1.jpg'
predicted_class = predict_emotion(img_path)
print(f"Predicted Emotion: {predicted_class}")
