# Import Library

In [18]:
# Standard library
import os

# Data manipulation
import pandas as pd
import numpy as np

# Machine learning utilities
from sklearn.model_selection import train_test_split
from sklearn.utils import compute_class_weight, resample
from sklearn.metrics import classification_report, confusion_matrix

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# TensorFlow and Keras
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Data Preparation

In [19]:
# Struktur folder dataset
DATA_DIR = "data"
train_dir = os.path.join(DATA_DIR, "train")
test_dir = os.path.join(DATA_DIR, "test")

# Membuat dataframe untuk train dan test
def create_dataframe(data_dir):
    data = []
    for label in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, label)
        if os.path.isdir(class_dir):
            for file in os.listdir(class_dir):
                filepath = os.path.join(class_dir, file)
                data.append({"filepath": filepath, "label": label})
    return pd.DataFrame(data)

train_data = create_dataframe(train_dir)
test_data = create_dataframe(test_dir)

# Distribusi data awal
print("Distribusi data train:")
print(train_data['label'].value_counts())
print("\nDistribusi data test:")
print(test_data['label'].value_counts())

Distribusi data train:
label
happy       7215
neutral     4965
sad         4830
fear        4097
angry       3995
surprise    3171
disgust      436
Name: count, dtype: int64

Distribusi data test:
label
happy       1774
sad         1247
neutral     1233
fear        1024
angry        958
surprise     831
disgust      111
Name: count, dtype: int64


In [20]:
# Mapping 7 emosi ke 4 kategori stres
stress_mapping = {
    "happy": "no_stress",
    "neutral": "weak_stress",
    "surprise": "weak_stress",
    "disgust": "medium_stress",
    "fear": "medium_stress",
    "sad": "strong_stress",
    "angry": "strong_stress"
}

# Modifikasi label pada dataframe
train_data["label"] = train_data["label"].map(stress_mapping)
test_data["label"] = test_data["label"].map(stress_mapping)

# Redistribusi data setelah mapping
print("\nDistribusi data train setelah mapping:")
print(train_data["label"].value_counts())
print("\nDistribusi data test setelah mapping:")
print(test_data["label"].value_counts())


Distribusi data train setelah mapping:
label
strong_stress    8825
weak_stress      8136
no_stress        7215
medium_stress    4533
Name: count, dtype: int64

Distribusi data test setelah mapping:
label
strong_stress    2205
weak_stress      2064
no_stress        1774
medium_stress    1135
Name: count, dtype: int64


# Data Preprocessing

In [21]:
# Split data train menjadi train dan validation
train_split, val_split = train_test_split(
    train_data,
    test_size=0.2,
    stratify=train_data["label"],
    random_state=42
)

# Oversampling kelas minoritas
def oversample_data(dataframe, label_col):
    from imblearn.over_sampling import RandomOverSampler
    ros = RandomOverSampler(random_state=42)
    filepaths = dataframe['filepath'].values.reshape(-1, 1)
    labels = dataframe[label_col]
    filepaths_resampled, labels_resampled = ros.fit_resample(filepaths, labels)
    resampled_df = pd.DataFrame({
        'filepath': filepaths_resampled.flatten(),
        'label': labels_resampled
    })
    return resampled_df

train_split_balanced = oversample_data(train_split, "label")
print("\nDistribusi data setelah oversampling:")
print(train_split_balanced['label'].value_counts())


Distribusi data setelah oversampling:
label
weak_stress      7060
no_stress        7060
medium_stress    7060
strong_stress    7060
Name: count, dtype: int64


In [22]:
# Data generator dengan augmentasi
train_datagen = ImageDataGenerator(
    rescale=1.0/255.0,
    rotation_range=25,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.15,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest"
)

val_datagen = ImageDataGenerator(rescale=1.0/255.0)
test_datagen = ImageDataGenerator(rescale=1.0/255.0)

# Augmentasi dan preprocessing data
train_generator = train_datagen.flow_from_dataframe(
    train_split_balanced,
    x_col="filepath",
    y_col="label",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

val_generator = val_datagen.flow_from_dataframe(
    val_split,
    x_col="filepath",
    y_col="label",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical"
)

test_generator = test_datagen.flow_from_dataframe(
    test_data,
    x_col="filepath",
    y_col="label",
    target_size=(224, 224),
    batch_size=32,
    class_mode="categorical",
    shuffle=False
)


Found 28240 validated image filenames belonging to 4 classes.
Found 5742 validated image filenames belonging to 4 classes.
Found 7178 validated image filenames belonging to 4 classes.


# Model Building

In [23]:
model = models.Sequential()

# Layer Convolutional pertama
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(layers.MaxPooling2D((2, 2)))

# Layer Convolutional kedua
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Layer Convolutional ketiga
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Layer Convolutional keempat
model.add(layers.Conv2D(256, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))

# Flatten layer untuk input ke dense layers
model.add(layers.Flatten())

# Fully connected layer pertama
model.add(layers.Dense(512, activation='relu'))

# Fully connected layer kedua
model.add(layers.Dense(256, activation='relu'))

# Output layer dengan 4 kelas (untuk stres)
model.add(layers.Dense(4, activation='softmax'))

# Kompilasi model
model.compile(optimizer=optimizers.Adam(learning_rate=0.0001),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Model Training

In [None]:
# Callbacks untuk training
early_stopping = callbacks.EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
checkpoint = callbacks.ModelCheckpoint("test_model.keras", monitor="val_loss", save_best_only=True)

# Training model
history = model.fit(
    train_generator,
    epochs=50,  
    validation_data=val_generator,
    callbacks=[early_stopping, checkpoint]
)

# Evaluation

In [None]:
# Evaluasi model pada data validasi/test
val_loss, val_accuracy = model.evaluate(val_generator)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Prediksi kelas
y_pred = model.predict(val_generator)
y_pred_classes = np.argmax(y_pred, axis=1)

# Kelas sebenarnya
y_true = val_generator.classes

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred_classes)

# Visualisasi Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=val_generator.class_indices.keys(), yticklabels=val_generator.class_indices.keys())
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()

# === Classification Report ===
print("Classification Report:")
print(classification_report(y_true, y_pred_classes, target_names=val_generator.class_indices.keys()))

# === Plot History (Akuras dan Loss) ===

# Plotting training dan validation accuracy
plt.figure(figsize=(12, 6))

# Akurasi
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label="Training Accuracy")
plt.plot(history.history['val_accuracy'], label="Validation Accuracy")
plt.title("Training vs Validation Accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()

# Loss
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label="Training Loss")
plt.plot(history.history['val_loss'], label="Validation Loss")
plt.title("Training vs Validation Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()

plt.tight_layout()
plt.show()