In [None]:
import os

# --- 1. ENTER YOUR DETAILS HERE ---
# Replace these with your actual username and key from the API token
os.environ['KAGGLE_USERNAME'] = "nnidhishree"
os.environ['KAGGLE_KEY'] = "KGAT_c8fe61958553f254371c0af7e7592f8e"

# --- 2. VERIFY LOGIN ---
# This command checks if it works. It should list datasets.
!kaggle datasets list | head -n 5

ref                                                           title                                                     size  lastUpdated                 downloadCount  voteCount  usabilityRating  
------------------------------------------------------------  --------------------------------------------------  ----------  --------------------------  -------------  ---------  ---------------  
wardabilal/spotify-global-music-dataset-20092025              Spotify Global Music Dataset (2009–2025)               1289021  2025-11-11 09:43:05.933000           5872        120  1.0              
sadiajavedd/students-academic-performance-dataset             Students_Academic_Performance_Dataset                     8907  2025-10-23 04:16:35.563000          11451        288  1.0              
kundanbedmutha/instagram-analytics-dataset                    Instagram Analytics Dataset                            1090208  2025-11-19 09:28:48.650000           1184         29  1.0              


In [None]:
# 1. Download the RAF-DB dataset (Clean version)
!kaggle datasets download -d nishchalchandel/raf-db-face-emotion-dataset

# 2. Unzip it
!unzip -q raf-db-face-emotion-dataset.zip -d raf_db_dataset

# 3. Print a confirmation
import os
if os.path.exists("raf_db_dataset"):
    print("Success! RAF-DB dataset is downloaded and ready.")
else:
    print("Something went wrong with the unzip.")

Dataset URL: https://www.kaggle.com/datasets/nishchalchandel/raf-db-face-emotion-dataset
License(s): MIT
raf-db-face-emotion-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)
replace raf_db_dataset/test/angry/test_0037_aligned.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: y
replace raf_db_dataset/test/angry/test_0042_aligned.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: Success! RAF-DB dataset is downloaded and ready.


In [None]:
# ==========================================
#        TRAINING PIPELINE (RUN THIS)
# ==========================================
import os
import zipfile
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import EfficientNetB0
from sklearn.utils.class_weight import compute_class_weight

# --- 1. PREPARE DATA ---
# We check if it's already unzipped; if not, we unzip it.
ZIP_FILE = "raf-db-face-emotion-dataset.zip"
EXTRACT_DIR = "raf_db_dataset"

if not os.path.exists(EXTRACT_DIR):
    print(f"Unzipping {ZIP_FILE}...")
    with zipfile.ZipFile(ZIP_FILE, 'r') as zip_ref:
        zip_ref.extractall(EXTRACT_DIR)
else:
    print("Folder already exists. Skipping unzip.")

# Auto-detect the correct subfolder path
BASE_DIR = EXTRACT_DIR
if "RAF-DB" in os.listdir(EXTRACT_DIR):
    BASE_DIR = os.path.join(EXTRACT_DIR, "RAF-DB")

print(f"Targeting data in: {BASE_DIR}")
TRAIN_DIR = os.path.join(BASE_DIR, "train")
TEST_DIR = os.path.join(BASE_DIR, "test")

# --- 2. LOAD DATA ---
IMG_SIZE = 224
BATCH_SIZE = 32

print("\nLoading Training Data...")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TRAIN_DIR,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    shuffle=True
)

print("Loading Validation Data...")
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    TEST_DIR,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    shuffle=False
)

# --- 3. CLASS WEIGHTS (CRITICAL FOR RAF-DB) ---
print("\nComputing class weights to fix imbalance...")
train_labels = []
# We iterate once to get all labels (takes ~30 seconds)
for _, y in train_ds:
    train_labels.extend(np.argmax(y.numpy(), axis=1))

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weight_dict = dict(enumerate(class_weights))
print(f"Weights Applied: {class_weight_dict}")

# --- 4. BUILD MODEL ---
def build_model():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    # Augmentation: Helps model generalize to webcam images
    x = layers.RandomFlip("horizontal")(inputs)
    x = layers.RandomRotation(0.15)(x)
    x = layers.RandomContrast(0.1)(x)
    x = layers.RandomZoom(0.1)(x)

    # EfficientNetB0 (Pre-trained)
    # include_top=False removes the generic ImageNet classifier
    base = EfficientNetB0(include_top=False, weights="imagenet", input_tensor=x)
    base.trainable = False # Freeze initially

    # Custom Emotion Head
    x = base.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.4)(x)
    outputs = layers.Dense(7, activation="softmax")(x)

    return models.Model(inputs, outputs, name="Emotion_EfficientNet")

model = build_model()

# --- 5. COMPILE (WITH FOCAL LOSS IF AVAILABLE) ---
try:
    loss_fn = tf.keras.losses.CategoricalFocalCrossentropy(alpha=0.25, gamma=2.0)
    print("Using Focal Loss (Optimal for this dataset)")
except AttributeError:
    loss_fn = 'categorical_crossentropy'
    print("Using Standard CrossEntropy (Focal Loss not found in this TF version)")

# --- 6. PHASE 1: TRAIN HEAD ---
print("\n--- Phase 1: Training Top Layers (Warmup) ---")
model.compile(optimizer=optimizers.Adam(1e-3), loss=loss_fn, metrics=['accuracy'])
model.fit(train_ds, validation_data=val_ds, epochs=5, class_weight=class_weight_dict)



Folder already exists. Skipping unzip.
Targeting data in: raf_db_dataset

Loading Training Data...
Found 12271 files belonging to 7 classes.
Loading Validation Data...
Found 1535 files belonging to 7 classes.

Computing class weights to fix imbalance...
Weights Applied: {0: np.float64(2.4865248226950354), 1: np.float64(2.4449093444909344), 2: np.float64(6.238434163701068), 3: np.float64(0.3673512154233026), 4: np.float64(0.6945324881141046), 5: np.float64(0.884460141271443), 6: np.float64(1.3589147286821706)}
Using Focal Loss (Optimal for this dataset)

--- Phase 1: Training Top Layers (Warmup) ---
Epoch 1/5
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 95ms/step - accuracy: 0.2389 - loss: 0.5310 - val_accuracy: 0.4124 - val_loss: 0.2674
Epoch 2/5
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 88ms/step - accuracy: 0.3560 - loss: 0.3644 - val_accuracy: 0.4463 - val_loss: 0.2388
Epoch 3/5
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

<keras.src.callbacks.history.History at 0x7c81e0cf4b00>

In [None]:
# --- 7. PHASE 2: FINE-TUNING (CORRECTED) ---
print("\n--- Phase 2: Fine-Tuning EfficientNet ---")

# 1. Unfreeze the entire model
# This automatically finds the EfficientNet base inside and unfreezes it
model.trainable = True

# 2. CRITICAL: Recompile with a very low learning rate
# If we don't lower the rate, we will destroy the pre-trained weights
model.compile(optimizer=optimizers.Adam(1e-5), loss=loss_fn, metrics=['accuracy'])

# 3. Resume Training
callbacks_list = [
    callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor='val_loss'),
    callbacks.ReduceLROnPlateau(factor=0.2, patience=2, monitor='val_loss')
]

history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    class_weight=class_weight_dict,
    callbacks=callbacks_list
)

# --- 8. SAVE ---
model.save("raf_db_model.keras")
print("\nSUCCESS! Model saved as 'raf_db_model.keras'")


--- Phase 2: Fine-Tuning EfficientNet ---
Epoch 1/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 101ms/step - accuracy: 0.4279 - loss: 0.2808 - val_accuracy: 0.4860 - val_loss: 0.2221 - learning_rate: 1.0000e-05
Epoch 2/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 89ms/step - accuracy: 0.4351 - loss: 0.2779 - val_accuracy: 0.4938 - val_loss: 0.2198 - learning_rate: 1.0000e-05
Epoch 3/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 91ms/step - accuracy: 0.4465 - loss: 0.2730 - val_accuracy: 0.4977 - val_loss: 0.2183 - learning_rate: 1.0000e-05
Epoch 4/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 89ms/step - accuracy: 0.4472 - loss: 0.2707 - val_accuracy: 0.5036 - val_loss: 0.2168 - learning_rate: 1.0000e-05
Epoch 5/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 88ms/step - accuracy: 0.4323 - loss: 0.2676 - val_accuracy: 0.5075 - val_loss: 0.2166 - learning_rate: 1.0000e

In [None]:
# --- 8. SAVE ---
model.save("raf_db_model.keras")
print("\nSUCCESS! Model saved as 'raf_db_model.keras'")


SUCCESS! Model saved as 'raf_db_model.keras'


In [None]:
# ==========================================
#      PHASE 3: AGGRESSIVE FINE-TUNING
# ==========================================
print("--- Phase 3: Aggressive Fine-Tuning (Maximizing Accuracy) ---")

# 1. Load the model you just saved (so we don't start from zero)
#    We are at ~51% accuracy now. Let's push it higher.
model = tf.keras.models.load_model('raf_db_model.keras')

# 2. Unfreeze EVERYTHING
model.trainable = True

# 3. SWITCH TO STANDARD LOSS & HIGHER LR
#    We remove Focal Loss and Class Weights to let the model learn faster.
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4), # Higher speed
    loss='categorical_crossentropy',               # Standard loss
    metrics=['accuracy']
)

# 4. Train WITHOUT Class Weights
#    This allows the model to get high accuracy on "Happy" and "Neutral"
#    which will spike your overall score.
history_aggressive = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    # REMOVED: class_weight=class_weight_dict  <-- This was holding it back
    callbacks=[
        callbacks.EarlyStopping(patience=4, restore_best_weights=True, monitor='val_accuracy'),
        callbacks.ReduceLROnPlateau(factor=0.5, patience=2, min_lr=1e-6, monitor='val_loss')
    ]
)



--- Phase 3: Aggressive Fine-Tuning (Maximizing Accuracy) ---
Epoch 1/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 97ms/step - accuracy: 0.4794 - loss: 1.4167 - val_accuracy: 0.5837 - val_loss: 1.1867 - learning_rate: 1.0000e-04
Epoch 2/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 93ms/step - accuracy: 0.5248 - loss: 1.2955 - val_accuracy: 0.5915 - val_loss: 1.1399 - learning_rate: 1.0000e-04
Epoch 3/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 88ms/step - accuracy: 0.5333 - loss: 1.2785 - val_accuracy: 0.5948 - val_loss: 1.1254 - learning_rate: 1.0000e-04
Epoch 4/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 89ms/step - accuracy: 0.5501 - loss: 1.2377 - val_accuracy: 0.5896 - val_loss: 1.1193 - learning_rate: 1.0000e-04
Epoch 5/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 94ms/step - accuracy: 0.5421 - loss: 1.2396 - val_accuracy: 0.5961 - val_loss: 1.1130 - lear

In [None]:
# 5. Save the high-accuracy version
model.save("raf_db_aggressive.keras")
print("\nDONE. This model should have much higher raw accuracy.")


DONE. This model should have much higher raw accuracy.


In [None]:
from google.colab import files
import os

# List of potential model names we created
model_files = ["raf_db_model.keras", "raf_db_aggressive.keras"]

print("Checking for saved models...")

found_any = False
for filename in model_files:
    if os.path.exists(filename):
        print(f"Downloading {filename} ({os.path.getsize(filename)/1e6:.2f} MB)...")
        files.download(filename)
        found_any = True
    else:
        print(f"Could not find {filename} (maybe it wasn't saved yet).")

if not found_any:
    print("\nNo models found! Did you run the 'model.save()' line?")

Checking for saved models...
Downloading raf_db_model.keras (17.20 MB)...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading raf_db_aggressive.keras (17.20 MB)...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import os
print("Contents of raf_db_dataset:", os.listdir("raf_db_dataset"))

Contents of raf_db_dataset: ['test', 'validation', 'train']


In [None]:
# ==========================================
#      BALANCED TRAINING (FIXED PATHS)
# ==========================================
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks
from tensorflow.keras.applications import EfficientNetB0
from sklearn.utils.class_weight import compute_class_weight
import numpy as np
import os

# 1. CLEANUP
tf.keras.backend.clear_session()
print("GPU Memory Cleared.")

# 2. SETUP PATHS (UPDATED based on your check)
BASE_DIR = "raf_db_dataset"
IMG_SIZE = 224
BATCH_SIZE = 32

print(f"Loading Data from {BASE_DIR}...")
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    f"{BASE_DIR}/train",
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    shuffle=True
)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    f"{BASE_DIR}/test", # We use 'test' as our validation set
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode='categorical',
    shuffle=False
)

# 3. COMPUTE CLASS WEIGHTS
print("Computing weights...")
train_labels = []
for _, y in train_ds:
    train_labels.extend(np.argmax(y.numpy(), axis=1))

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weight_dict = dict(enumerate(class_weights))
print(f"Weights: {class_weight_dict}")

# 4. BUILD MODEL (Balanced Augmentation)
def build_model():
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    # MODERATE AUGMENTATION
    # Flip + Slight Rotation/Contrast. No Zoom (to preserve faces).
    x = layers.RandomFlip("horizontal")(inputs)
    x = layers.RandomRotation(0.1)(x)
    x = layers.RandomContrast(0.1)(x)

    # EfficientNetB0 (Unfrozen)
    base = EfficientNetB0(include_top=False, weights="imagenet", input_tensor=x)
    base.trainable = True

    x = base.output
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x) # Moderate dropout

    outputs = layers.Dense(7, activation="softmax")(x)
    return models.Model(inputs, outputs, name="EfficientNet_Balanced")

model = build_model()

# 5. COMPILE
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-4),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# 6. TRAIN
print("\nStarting Balanced Training...")
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=15,
    class_weight=class_weight_dict,
    callbacks=[
        callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor='val_accuracy'),
        callbacks.ReduceLROnPlateau(factor=0.2, patience=3, monitor='val_loss')
    ]
)



GPU Memory Cleared.
Loading Data from raf_db_dataset...
Found 12271 files belonging to 7 classes.
Found 1535 files belonging to 7 classes.
Computing weights...
Weights: {0: np.float64(2.4865248226950354), 1: np.float64(2.4449093444909344), 2: np.float64(6.238434163701068), 3: np.float64(0.3673512154233026), 4: np.float64(0.6945324881141046), 5: np.float64(0.884460141271443), 6: np.float64(1.3589147286821706)}

Starting Balanced Training...
Epoch 1/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 358ms/step - accuracy: 0.2219 - loss: 2.4424 - val_accuracy: 0.4313 - val_loss: 1.5609 - learning_rate: 1.0000e-04
Epoch 2/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 347ms/step - accuracy: 0.4333 - loss: 1.6381 - val_accuracy: 0.5381 - val_loss: 1.3034 - learning_rate: 1.0000e-04
Epoch 3/15
[1m384/384[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 349ms/step - accuracy: 0.5261 - loss: 1.3370 - val_accuracy: 0.6241 - val_loss: 1.0771 - 

In [None]:
# 7. SAVE
model.save("raf_db_balanced.keras")
print("Saved as 'raf_db_balanced.keras'")

Saved as 'raf_db_balanced.keras'


In [None]:
from google.colab import files
import os

filename = "raf_db_balanced.keras"

if os.path.exists(filename):
    print(f"Found {filename} ({os.path.getsize(filename)/1e6:.2f} MB). Downloading...")
    files.download(filename)
else:
    print(f"Error: Could not find {filename}. Did the training finish and save?")

Found raf_db_balanced.keras (49.42 MB). Downloading...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>