In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,  GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.preprocessing import image
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import VGG16, ResNet50, MobileNetV2, EfficientNetV2B0
from tensorflow.keras.applications.efficientnet_v2 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import numpy as np
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image
from pathlib import Path

In [None]:
# ! pip install kagglehub

In [None]:
# ! pip install streamlit

## Downloading Dataset

In [None]:
# import kagglehub

# # Download latest version
# path = kagglehub.dataset_download("jonathanoheix/face-expression-recognition-dataset")

# print("Path to dataset files:", path)

In [None]:
BASE_DIR = Path('images')  # Update this to your data directory
TRAIN_DIR = BASE_DIR / 'train'
VALID_DIR = BASE_DIR / 'validation'
SAMPLE_DIR = BASE_DIR / 'sample'

# Image parameters
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3
BATCH_SIZE = 32

print(f"Train directory exists: {TRAIN_DIR.exists()}")
print(f"Valid directory exists: {VALID_DIR.exists()}")
print(f"Sample directory exists: {SAMPLE_DIR.exists()}")

In [None]:
# Count images in each set
def count_images(directory):
    """Count total images and images per class in a directory."""
    total = 0
    class_counts = {}
    
    for class_dir in sorted(directory.iterdir()):
        if class_dir.is_dir():
            num_images = len(list(class_dir.glob('*.jpg')))
            class_counts[class_dir.name] = num_images
            total += num_images
    
    return total, class_counts

# Count images in each set
train_total, train_counts = count_images(TRAIN_DIR)
valid_total, valid_counts = count_images(VALID_DIR)
sample_total = len(os.listdir(SAMPLE_DIR))

print(f"Training images: {train_total}")
print(f"Validation images: {valid_total}")
print(f"Sample images: {sample_total}")
print(f"\nNumber of classes (emotions): {len(train_counts)}")
print(f"\nAll 7 Emotions: {list(train_counts.keys())[:10]}")

In [None]:
train_happy = len(os.listdir(os.path.join(TRAIN_DIR, 'happy')))
train_sad = len(os.listdir(os.path.join(TRAIN_DIR, 'sad')))
train_fear = len(os.listdir(os.path.join(TRAIN_DIR, 'fear')))
train_disgust = len(os.listdir(os.path.join(TRAIN_DIR, 'disgust')))
train_angry = len(os.listdir(os.path.join(TRAIN_DIR, 'angry')))
train_neutral = len(os.listdir(os.path.join(TRAIN_DIR, 'neutral')))
train_surprise = len(os.listdir(os.path.join(TRAIN_DIR, 'surprise')))

print(f"Training happy: {train_happy} images")
print(f"Training sad: {train_sad} images")
print(f"Training fear: {train_fear} images")
print(f"Training disgust: {train_disgust} images")
print(f"Training angry: {train_angry} images")
print(f"Training neutral: {train_neutral} images")
print(f"Training surprise: {train_surprise} images")
print(f"Total training images: {train_happy + train_sad + train_fear + train_disgust + train_angry + train_neutral + train_surprise}")
# print(f"Total training images: {count_images(TRAIN_DIR)}")


In [None]:
valid_happy = len(os.listdir(os.path.join(VALID_DIR, 'happy')))
valid_sad = len(os.listdir(os.path.join(VALID_DIR, 'sad')))
valid_fear = len(os.listdir(os.path.join(VALID_DIR, 'fear')))
valid_disgust = len(os.listdir(os.path.join(VALID_DIR, 'disgust')))
valid_angry = len(os.listdir(os.path.join(VALID_DIR, 'angry')))
valid_neutral = len(os.listdir(os.path.join(VALID_DIR, 'neutral')))
valid_surprise = len(os.listdir(os.path.join(VALID_DIR, 'surprise')))

print(f"Validation happy: {valid_happy} images")
print(f"Validation sad: {valid_sad} images")
print(f"Validation fear: {valid_fear} images")
print(f"Validation disgust: {valid_disgust} images")
print(f"Validation angry: {valid_angry} images")
print(f"Validation neutral: {valid_neutral} images")
print(f"Validation surprise: {valid_surprise} images")
print(f"Total validation images: {valid_happy + valid_sad + valid_fear + valid_disgust + valid_angry + valid_neutral + valid_surprise}")
# print(f"Total validing images: {count_images(TRAIN_DIR)}")


## Visualisations

In [None]:
df = pd.DataFrame(list(train_counts.items()), columns=['Class', 'Count'])
df.sort_values(by='Count', ascending=False).head()

In [None]:
df.describe()

In [None]:
# Visualize class distribution
def plot_class_distribution(class_counts, title="Class Distribution", n=7):
    """Plot class distribution as a bar chart."""
    df = pd.DataFrame(list(class_counts.items()), columns=['Class', 'Count'])
    df = df.sort_values('Count', ascending=False).head(n)
    
    plt.figure(figsize=(14, 6))
    plt.bar(range(len(df)), df['Count'], color='steelblue')
    plt.xlabel('Emotion', fontsize=12)
    plt.ylabel('Number of Images', fontsize=12)
    plt.title(f'{title} ({n})', fontsize=14)
    plt.xticks(range(len(df)), df['Class'], rotation=90, ha='right', fontsize=8)
    plt.tight_layout()
    plt.show()
    
    print(f"Mean images per class: {df['Count'].mean():.2f}")
    print(f"Std images per class: {df['Count'].std():.2f}")
    print(f"Min images per class: {df['Count'].min()}")
    print(f"Max images per class: {df['Count'].max()}")

plot_class_distribution(train_counts, "Training Set Class Distribution")

In [None]:
# ! pip install plotly

In [None]:
df.groupby('Class')['Count'].sum().sort_values(ascending=False)

In [None]:
Class_Id_Dist_Total = df.groupby('Class')['Count'].sum().sort_values(ascending=False)
# Class_Id_Dist_Total = df['Class'].value_counts(sort=False)


import plotly.express as px
fig=px.pie(Class_Id_Dist_Total,values= Class_Id_Dist_Total.values, names=Class_Id_Dist_Total.index,hole=0.500)
fig.update_layout(title='Data Distribution of Emotions Dataset',font_size=15,title_x=0.45,annotations=[dict(text='Emotions Dataset',font_size=12, showarrow=False,height=1000,width=1000)])
fig.update_traces(textfont_size=15,textinfo='percent')
fig.show()

In [None]:
def plot_sample_images(directory, num_samples=12, figsize=(15, 10)):
    """Plot random sample images from the dataset."""
    classes = sorted([d.name for d in directory.iterdir() if d.is_dir()])
    selected_classes = np.random.choice(classes, min(num_samples, len(classes)), replace=False)
    
    fig, axes = plt.subplots(2, 4, figsize=figsize)
    axes = axes.ravel()
    
    for idx, class_name in enumerate(selected_classes):
        class_dir = directory / class_name
        images = list(class_dir.glob('*.jpg'))
        
        if images:
            random_image = np.random.choice(images)
            img = load_img(random_image, target_size=(IMG_HEIGHT, IMG_WIDTH))
            
            axes[idx].imshow(img)
            axes[idx].set_title(class_name, fontsize=10)
            axes[idx].axis('off')
    
    plt.tight_layout()
    plt.suptitle('Sample Butterfly Images', fontsize=16, y=1.02)
    plt.show()

plot_sample_images(TRAIN_DIR)

In [None]:
# sns.heatmap(df, cmap='coolwarm')

In [None]:
happy_image_path = os.path.join(SAMPLE_DIR, 'happy_img.webp')

print(f"Loading image from: {happy_image_path}")

In [None]:
# Open the image using PIL
happy_image = Image.open(happy_image_path)

print(f"Image loaded successfully!")
print(f"Image format: {happy_image.format}")
print(f"Image mode: {happy_image.mode}")
print(f"Image size: {happy_image.size}")

plt.imshow(happy_image)

In [None]:
happy_array = np.array(happy_image)

red_channel = happy_array[:, :, 0]    # First channel (index 0)
green_channel = happy_array[:, :, 1]  # Second channel (index 1)
blue_channel = happy_array[:, :, 2]   # Third channel (index 2)

print(f"Red channel shape: {red_channel.shape}")
print(f"Green channel shape: {green_channel.shape}")
print(f"Blue channel shape: {blue_channel.shape}")

In [None]:
# Display all three channels side by side
fig, axes = plt.subplots(1, 4, figsize=(16, 4))

# Original image
axes[0].imshow(happy_image)
axes[0].set_title('Original Image')
axes[0].axis('off')

# Red channel
axes[1].imshow(red_channel, cmap='Reds')
axes[1].set_title('Red Channel')
axes[1].axis('off')

# Green channel
axes[2].imshow(green_channel, cmap='Greens')
axes[2].set_title('Green Channel')
axes[2].axis('off')

# Blue channel
axes[3].imshow(blue_channel, cmap='Blues')
axes[3].set_title('Blue Channel')
axes[3].axis('off')

plt.tight_layout()
plt.show()


In [None]:
small_patch = happy_array[50:55, 50:55, 0]  # 5x5 patch from red channel

print("Pixel values from a 5x5 patch (Red channel):")
print(small_patch)

## Data Preprocessing and Augmentation

In [None]:
train_data = ImageDataGenerator(
    # rescale=1./255, # normalize pixel values to 0-1
    rotation_range=40, # rotating images up to 40 degress 
    width_shift_range=0.2, # randomly shifting horizontally by 20% 
    height_shift_range=0.2, # randomly shifting vertically by 20% 
    shear_range=0.2, 
    zoom_range=0.5,
    horizontal_flip=True,
    fill_mode='nearest',
    preprocessing_function=preprocess_input
)
train_data

In [None]:
val_data = ImageDataGenerator(
    preprocessing_function=preprocess_input
)
val_data

In [None]:
target_size = (224,224) 

In [None]:
train_gen = train_data.flow_from_directory(
    TRAIN_DIR,
    target_size=target_size,
    batch_size=32, 
    class_mode='categorical',
)


In [None]:
val_gen = val_data.flow_from_directory(
    VALID_DIR,
    target_size=target_size,
    batch_size=32, 
    class_mode='categorical',
    shuffle=False
)

In [None]:
train_gen.class_indices

In [None]:
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=5, 
    restore_best_weights=True
) 

In [None]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

class_names = list(val_gen.class_indices.keys())
class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.arange(len(class_names)),
    y=train_gen.classes
)

class_weights = dict(enumerate(class_weights))

In [None]:
base_model = MobileNetV2(
    include_top=False,
    weights='imagenet',
    input_shape= (224,224,3)
)

base_model.trainable=False

model = Sequential([
    base_model,
    GlobalAveragePooling2D(),
    Dense(256,activation='relu'),
    Dropout(0.5),
    Dense(128,activation='relu'),
    Dropout(0.5),
    Dense(7,activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()

# history = model.fit(
#     train_gen,
#     validation_data = val_gen,
#     epochs=10,
#     callbacks=[early_stopping]
# )

In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=10,
    callbacks=[early_stopping],
    class_weight=class_weights
)

In [None]:
# # [AI-Generated]
# base_model.trainable = True

# for layer in base_model.layers[:-40]:
#     layer.trainable = False

# model.compile(
#     optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
#     loss='categorical_crossentropy',
#     metrics=['accuracy']
# )

# history2 = model.fit(
#     train_gen,
#     validation_data=val_gen,
#     epochs=20,
#     callbacks=[early_stopping]
# )


#### Visualisation of data augmentation

In [None]:
sample_path_img = os.path.join(SAMPLE_DIR, 'happy_img.webp')
sample_img = image.load_img(sample_path_img, target_size=target_size)
sample_img

In [None]:
arr = image.img_to_array(sample_img)
arr_img = arr.reshape((1,)+ arr.shape).shape
arr_img

### Training curves

In [None]:
pd.DataFrame(history.history).plot()

In [None]:
plt.plot(history.history['val_loss'],label='validation loss')
plt.plot(history.history['loss'],label='training loss')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history['accuracy'],label='training accuracy')
plt.plot(history.history['val_accuracy'],label='validation accuracy')
plt.legend()
plt.show()

In [None]:
model.save('emotion.keras')

In [None]:
from tensorflow.keras import models
model = models.load_model('emotion.keras')

In [None]:
import matplotlib.pyplot as plt
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from PIL import Image
import numpy as np

In [None]:
sample_images = [f for f in os.listdir(SAMPLE_DIR) if f.endswith(('.jpg','.jpeg','.png','.webp'))]

fig,axes = plt.subplots(2,4,figsize=(16,8))
axes = axes.flatten()

for i, img_name in enumerate(sample_images):
    img_path = os.path.join(SAMPLE_DIR,img_name)
    img = image.load_img(img_path, target_size=(224,224))
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array,axis=0)
    img_array = img_array/255.0

    pred  = model.predict(img_array)

    class_labels = ['angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'sad',
 'surprise']

    # [AI-Generated]
    class_index = np.argmax(pred[0])
    conf = pred[0][class_index] * 100
    label = class_labels[class_index]

    axes[i].imshow(img)
    axes[i].set_title(f'{label}\n({conf:.1f}%)confident')
    axes[i].axis('off')
plt.suptitle('Model Prediction on sample images')
plt.tight_layout()
plt.show()

In [None]:
pred = model.predict(img_array)
print(pred)
print("Sum:", pred[0].sum())

In [None]:
# map index â†’ class name
class_labels = ['angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'sad',
 'surprise']

sample_images = [f for f in os.listdir(SAMPLE_DIR) if f.endswith(('.jpg','.jpeg','.png','.webp'))]

fig,axes = plt.subplots(2,4,figsize=(16,8))
axes = axes.flatten()




for i, img_name in enumerate(sample_images):
    img_path = os.path.join(SAMPLE_DIR,img_name)
    img = load_img(img_path, target_size=(150,150))
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array,axis=0)
    img_array = img_array/255.0

    pred  = model.predict(img_array)

    class_index = np.argmax(pred[0])
    conf = pred[0][class_index] * 100
    label = class_labels[class_index]
    
    axes[i].imshow(img)
    axes[i].set_title(f'{label}\n({conf:.1f}%)confident')
    axes[i].axis('off')
plt.suptitle('Model Prediction on sample images')
plt.tight_layout()
plt.show()

In [None]:
print(pred)

In [None]:
y_pred = model.predict(val_gen)

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay, classification_report

In [None]:
y_true = val_gen.classes
y_true

In [None]:
y_pred2 = np.argmax(y_pred, axis=1)

In [None]:

print("Classification Report:\n")
print(classification_report(
    y_true,
    y_pred2,
    target_names=class_names,
    digits=4
))

In [None]:
import numpy as np
from sklearn.metrics import ConfusionMatrixDisplay

# y_pred: model.predict(val_gen), shape = (num_samples, 7)
pred_classes = np.argmax(y_pred, axis=1)

ConfusionMatrixDisplay.from_predictions(val_gen.classes,pred_classes,display_labels=['angry',
 'disgust',
 'fear',
 'happy',
 'neutral',
 'sad',
 'surprise'])
