# Training Emotion Classification Model

This notebook will guide you through training the emotion classification model for student engagement analysis.

## Step 1: Setup Environment

First, make sure you have all required packages installed. Run this cell to install them:

In [None]:
# Install required packages
!pip install tensorflow pandas numpy matplotlib seaborn scikit-learn opencv-python

## Step 2: Import Required Libraries

Now let's import all the libraries we'll need:

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# Set random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

## Step 3: Load and Prepare Data

Download the [Emotion Recognition Dataset](https://www.kaggle.com/datasets/sujaykapadnis/emotion-recognition-dataset/data) from Kaggle and extract it. 

The dataset should have a structure like this:
```
dataset/
    train/
        angry/
        happy/
        neutral/
        sad/
        surprise/
```

Now let's load the data:

In [None]:
def create_dataframe(data_dir):
    """Create DataFrame with image paths and labels"""
    image_paths = []
    labels = []
    emotions = ['angry', 'happy', 'neutral', 'sad', 'surprise']
    
    for i, emotion in enumerate(emotions):
        path = f"{data_dir}/{emotion}"
        for img in os.listdir(path):
            if img.endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(path, img))
                labels.append(i)
    
    return pd.DataFrame({'image_path': image_paths, 'emotion': labels})

# Create DataFrame
data_dir = "path/to/dataset/train"  # Replace with your dataset path
df = create_dataframe(data_dir)

# Display sample of the data
print("Dataset shape:", df.shape)
df.head()

## Step 4: Create and Compile Model

We'll use MobileNetV2 as our base model and add custom layers for emotion classification:

In [None]:
def create_model(input_shape=(224, 224, 3)):
    """Create and compile the model"""
    # Base model - MobileNetV2
    base_model = MobileNetV2(
        weights='imagenet',  # Use pre-trained weights
        include_top=False,   # Don't include the classification layers
        input_shape=input_shape
    )
    
    # Freeze base model layers
    base_model.trainable = False
    
    # Add classification head
    x = GlobalAveragePooling2D()(base_model.output)
    x = Dropout(0.5)(x)  # Prevent overfitting
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(5, activation='softmax')(x)  # 5 emotion classes
    
    model = Model(inputs=base_model.input, outputs=outputs)
    
    # Compile model
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Create model
model = create_model()
model.summary()

## Step 5: Prepare Data Generator

We'll create a data generator to efficiently load and preprocess images:

In [None]:
def preprocess_data(df, input_shape=(224, 224)):
    """Preprocess images and prepare labels"""
    X = []
    y = []
    
    for idx, row in df.iterrows():
        # Load and preprocess image
        img = tf.keras.preprocessing.image.load_img(
            row['image_path'],
            target_size=input_shape
        )
        img_array = tf.keras.preprocessing.image.img_to_array(img)
        img_array = tf.keras.applications.mobilenet_v2.preprocess_input(img_array)
        X.append(img_array)
        y.append(row['emotion'])
        
        # Print progress
        if (idx + 1) % 1000 == 0:
            print(f"Processed {idx + 1} images")
    
    # Convert to numpy arrays
    X = np.array(X)
    y = tf.keras.utils.to_categorical(y)
    
    return X, y

# Preprocess data
print("Preprocessing training data...")
X, y = preprocess_data(df)

# Split data
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.2,  # Use 20% for validation
    random_state=42,  # For reproducibility
    stratify=y  # Maintain class distribution
)

## Step 6: Train the Model

Now let's train the model with callbacks for better training:

In [None]:
# Setup callbacks for better training
callbacks = [
    # Save the best model
    ModelCheckpoint(
        'best_model.h5',  # Save model to this file
        monitor='val_accuracy',  # Watch validation accuracy
        save_best_only=True,  # Only save if better than previous
        mode='max',  # Higher accuracy is better
        verbose=1
    ),
    # Stop if not improving
    EarlyStopping(
        monitor='val_loss',  # Watch validation loss
        patience=10,  # Wait 10 epochs for improvement
        restore_best_weights=True,  # Use best weights when done
        verbose=1
    ),
    # Reduce learning rate when stuck
    ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.1,  # Reduce LR by 90%
        patience=5,  # Wait 5 epochs before reducing
        min_lr=1e-6,  # Don't go below this LR
        verbose=1
    )
]

# Train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,  # Maximum number of epochs
    batch_size=32,  # Process 32 images at a time
    callbacks=callbacks,
    verbose=1  # Show progress
)

## Step 7: Visualize Training Results

Let's plot the training history:

In [None]:
def plot_training_results(history):
    """Plot training metrics"""
    # Set style
    sns.set_style("whitegrid")
    sns.set_palette("husl")
    
    # Create figure
    fig = plt.figure(figsize=(15, 5))
    
    # Plot accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    
    # Plot loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.tight_layout()
    plt.show()

# Plot results
plot_training_results(history)

## Step 8: Save the Model

The model has already been saved as 'best_model.h5' through the ModelCheckpoint callback during training. You can now copy this file to your Student Engagement Analysis project directory.

To verify the model was saved:

In [None]:
import os
if os.path.exists('best_model.h5'):
    print("Model saved successfully!")
    print(f"Model file size: {os.path.getsize('best_model.h5') / (1024*1024):.2f} MB")
else:
    print("Error: Model file not found!")