In [185]:
import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.utils.class_weight import compute_class_weight

In [186]:
class EmotionDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.tensor(features, dtype=torch.float32)
        self.labels = torch.tensor(labels, dtype=torch.long)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [187]:
def load_ravdess_data():
    # emotion_files = {        
    #     'neutral': "C:/Nini/Capstone/CSV_Files/Facial data/Neutral.xlsx",
    #     'calm': "C:/Nini/Capstone/CSV_Files/Facial data/Calm.xlsx",
    #     'happy': "C:/Nini/Capstone/CSV_Files/Facial data/Happy.xlsx",
    #     'sad': "C:/Nini/Capstone/CSV_Files/Facial data/Sad.xlsx",
    #     'angry': "C:/Nini/Capstone/CSV_Files/Facial data/Angry.xlsx",
    #     'fearful': "C:/Nini/Capstone/CSV_Files/Facial data/Fearful.xlsx",
    #     'disgust': "C:/Nini/Capstone/CSV_Files/Facial data/Disgusted.xlsx",
    #     'surprised': "C:/Nini/Capstone/CSV_Files/Facial data/Surprised.xlsx"
    # }

    # all_data = []
    
    # for emotion, file_path in emotion_files.items():
    #     try:
    #         df = pd.read_excel(file_path)
    #         df['emotion'] = emotion
    #         all_data.append(df)
    #         print(f"Loaded {emotion} data: {len(df)} samples")
    #     except Exception as e:
    #         print(f"Error loading {emotion} file: {str(e)}")
    
    # combined_df = pd.concat(all_data, ignore_index=True)
    
    # feature_columns = ['face1_face_x', 'face_y', 'face_width', 'face_height', 'face_aspect_ratio',
    #                    'left_eye_x', 'left_eye_y', 'left_eye_width', 'left_eye_height',
    #                    'right_eye_x', 'right_eye_y', 'right_eye_width', 'right_eye_height',
    #                    'eye_separation', 'mouth_x', 'mouth_y', 'mouth_width', 'mouth_height',
    #                    'mouth_aspect_ratio', 'avg_intensity', 'intensity_variance']
    
    # print("\nDataset Statistics:")
    # print(f"Total samples: {len(combined_df)}")
    # print("\nSamples per emotion:")
    # print(combined_df['emotion'].value_counts())
    
    # X = combined_df[feature_columns].values
    
    # label_encoder = LabelEncoder()
    # y = label_encoder.fit_transform(combined_df['emotion'])
    
    # return X, y, label_encoder
    train_data = pd.read_excel("C:/Nini/Capstone/CSV_Files/Facial data/Train.xlsx")
    feature_columns = ['face_x', 'face_y', 'face_width', 'face_height', 'face_aspect_ratio',
                        'left_eye_x', 'left_eye_y', 'left_eye_width', 'left_eye_height',
                        'right_eye_x', 'right_eye_y', 'right_eye_width', 'right_eye_height',
                        'eye_separation', 'mouth_x', 'mouth_y', 'mouth_width', 'mouth_height',
                        'mouth_aspect_ratio', 'avg_intensity', 'intensity_variance']
    X_train = train_data[feature_columns].values
    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(train_data['emotion'])

    test_data = pd.read_excel("C:/Nini/Capstone/CSV_Files/Facial data/Test.xlsx")
    print(train_data.isnull().sum())
    print(train_data[train_data.isnull().any(axis=1)])
    # print(test_data[feature_columns].dtypes)
    print(test_data.head())
    X_test = test_data[feature_columns].values
    y_test = label_encoder.fit_transform(test_data['emotion'])

    print(X_train.dtype)
    print(X_test.dtype)
    print(y_train.dtype)
    print(y_test.dtype)
    return X_train, X_test, y_train, y_test, label_encoder

    
    

In [188]:
# train_data = pd.read_excel("C:/Nini/Capstone/CSV_Files/Facial data/Train.xlsx")
# feature_columns = ['face_x', 'face_y', 'face_width', 'face_height', 'face_aspect_ratio',
#                        'left_eye_x', 'left_eye_y', 'left_eye_width', 'left_eye_height',
#                        'right_eye_x', 'right_eye_y', 'right_eye_width', 'right_eye_height',
#                        'eye_separation', 'mouth_x', 'mouth_y', 'mouth_width', 'mouth_height',
#                        'mouth_aspect_ratio', 'avg_intensity', 'intensity_variance']
# X_train = train_data[feature_columns].values
# label_encoder = LabelEncoder()
# y_train = label_encoder.fit_transform(train_data['emotion'])


In [189]:
# test_data = pd.read_excel("C:/Nini/Capstone/CSV_Files/Facial data/Test.xlsx")
# X_test = test_data[feature_columns].values
# y_test = label_encoder.fit_transform(train_data['emotion'])


In [190]:
class EmotionLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=192, num_classes=8, dropout_rate=0.3):
        super(EmotionLSTM, self).__init__()
        
        # Simplified architecture - one bidirectional LSTM
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, bidirectional=True)
        self.dropout = nn.Dropout(dropout_rate)
        
        # Simple dense layers
        self.bn = nn.BatchNorm1d(hidden_size*2)
        self.fc1 = nn.Linear(hidden_size*2, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        # LSTM layer
        lstm_out, _ = self.lstm(x)
        
        # Get last time step or squeeze
        if lstm_out.size(1) == 1:
            lstm_out = lstm_out.squeeze(1)
        else:
            lstm_out = lstm_out[:, -1, :]
        
        # Apply batch normalization
        lstm_out = self.bn(lstm_out)
        lstm_out = self.dropout(lstm_out)
        
        # Dense layers
        x = self.fc1(lstm_out)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# # class EmotionLSTM(nn.Module):
# # #     def __init__(self, input_size, hidden_size=128, num_layers=2, num_classes=8, dropout=0.5):
# # #         super(EmotionLSTM, self).__init__()
        
# # #         self.lstm = nn.LSTM(
# # #             input_size=input_size,
# # #             hidden_size=hidden_size,
# # #             num_layers=num_layers,
# # #             batch_first=True,
# # #             dropout=dropout
# # #         )
        
# # #         self.fc1 = nn.Linear(hidden_size, 64)
# # #         self.dropout = nn.Dropout(dropout)
# # #         self.fc2 = nn.Linear(64, num_classes)
        
# # #     def forward(self, x):
# # #         # x shape: (batch_size, seq_length, input_size)
# # #         out, _ = self.lstm(x)  # output from all time steps
# # #         out = out[:, -1, :]    # Take output of the last time step

# # #         out = F.relu(self.fc1(out))
# # #         out = self.dropout(out)
# # #         out = self.fc2(out)
# # #         return out

# # # class EmotionLSTM(nn.Module):
# # #     def __init__(self, input_size, hidden_size=128, num_layers=2, num_classes=6, dropout=0.3):
# # #         super(EmotionLSTM, self).__init__()
        
# # #         self.lstm = nn.LSTM(
# # #             input_size=input_size,
# # #             hidden_size=hidden_size,
# # #             num_layers=num_layers,
# # #             batch_first=True,
# # #             dropout=dropout
# # #         )
        
# # #         self.fc1 = nn.Linear(hidden_size, 64)
# # #         self.dropout = nn.Dropout(dropout)
# # #         self.fc2 = nn.Linear(64, num_classes)
        
# # #     def forward(self, x):
# # #         # x shape: (batch_size, seq_length, input_size)
# # #         out, _ = self.lstm(x)  # output from all time steps
# # #         out = out[:, -1, :]    # Take output of the last time step

# # #         out = F.relu(self.fc1(out))
# # #         out = self.dropout(out)
# # #         out = self.fc2(out)
# # #         return out

# # class EmotionLSTM(nn.Module):
# #     def __init__(self, input_size, hidden_size=256, num_layers=2, num_classes=8, dropout=0.5, bidirectional=True):
# #         super(EmotionLSTM, self).__init__()
        
# #         # Make LSTM bidirectional for better temporal feature understanding
# #         self.lstm = nn.LSTM(
# #             input_size=input_size,
# #             hidden_size=hidden_size,
# #             num_layers=num_layers,
# #             batch_first=True,
# #             dropout=dropout if num_layers > 1 else 0,
# #             bidirectional=bidirectional
# #         )
        
# #         # Adjust for bidirectional output
# #         lstm_output_size = hidden_size * 2 if bidirectional else hidden_size
        
# #         # Add batch normalization for better training stability
# #         self.batch_norm = nn.BatchNorm1d(lstm_output_size)
        
# #         # Improved fully connected layers
# #         self.fc1 = nn.Linear(lstm_output_size, 128)
# #         self.dropout1 = nn.Dropout(dropout)
# #         self.fc2 = nn.Linear(128, 64)
# #         self.dropout2 = nn.Dropout(dropout)
# #         self.fc3 = nn.Linear(64, num_classes)
        
# #     def forward(self, x):
# #         # x shape: (batch_size, seq_length, input_size)
# #         out, _ = self.lstm(x)
        
# #         # Get output from last time step
# #         if self.lstm.bidirectional:
# #             # Combine forward and backward last outputs
# #             out = out[:, -1, :]
# #         else:
# #             out = out[:, -1, :]
            
# #         # Apply batch normalization
# #         out = self.batch_norm(out)
        
# #         # First dense layer with activation and dropout
# #         out = F.relu(self.fc1(out))
# #         out = self.dropout1(out)
        
# #         # Second dense layer with activation and dropout
# #         out = F.relu(self.fc2(out))
# #         out = self.dropout2(out)
        
# #         # Output layer
# #         out = self.fc3(out)
        
# #         return out

# class EmotionLSTM(nn.Module):
#     def __init__(self, input_size, num_classes=8):
#         super(EmotionLSTM, self).__init__()

#         self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=128, batch_first=True)
#         self.bn1 = nn.BatchNorm1d(1)
#         self.dropout1 = nn.Dropout(0.3)

#         self.lstm2 = nn.LSTM(input_size=128, hidden_size=96, batch_first=True)
#         self.bn2 = nn.BatchNorm1d(1)
#         self.dropout2 = nn.Dropout(0.3)

#         self.lstm3 = nn.LSTM(input_size=96, hidden_size=64, batch_first=True)
#         self.bn3 = nn.BatchNorm1d(64)
#         self.dropout3 = nn.Dropout(0.3)

#         self.fc1 = nn.Linear(64, 128)
#         self.bn4 = nn.BatchNorm1d(128)
#         self.dropout4 = nn.Dropout(0.2)

#         self.fc2 = nn.Linear(128, 64)
#         self.bn5 = nn.BatchNorm1d(64)
#         self.dropout5 = nn.Dropout(0.2)

#         self.output = nn.Linear(64, num_classes)

#     def forward(self, x):
#         # x: (batch, 1, input_size)
#         x, _ = self.lstm1(x)
#         x = self.bn1(x)
#         x = self.dropout1(x)

#         x, _ = self.lstm2(x)
#         x = self.bn2(x)
#         x = self.dropout2(x)

#         x, _ = self.lstm3(x)
#         x = x[:, -1, :]  # get last time step
#         x = self.bn3(x)
#         x = self.dropout3(x)

#         x = F.relu(self.fc1(x))
#         x = self.bn4(x)
#         x = self.dropout4(x)

#         x = F.relu(self.fc2(x))
#         x = self.bn5(x)
#         x = self.dropout5(x)

#         x = self.output(x)
#         return F.softmax(x, dim=1)

# class EmotionLSTM(nn.Module):
#     def __init__(self, input_size, num_classes=8):
#         super(EmotionLSTM, self).__init__()

#         self.lstm1 = nn.LSTM(input_size=input_size, hidden_size=128, batch_first=True)
#         self.bn1 = nn.BatchNorm1d(1)
#         self.dropout1 = nn.Dropout(0.3)

#         self.lstm2 = nn.LSTM(input_size=128, hidden_size=96, batch_first=True)
#         self.bn2 = nn.BatchNorm1d(1)
#         self.dropout2 = nn.Dropout(0.3)

#         self.lstm3 = nn.LSTM(input_size=96, hidden_size=64, batch_first=True)
#         self.bn3 = nn.BatchNorm1d(64)
#         self.dropout3 = nn.Dropout(0.3)

#         self.fc1 = nn.Linear(64, 128)
#         self.bn4 = nn.BatchNorm1d(128)
#         self.dropout4 = nn.Dropout(0.2)

#         self.fc2 = nn.Linear(128, 64)
#         self.bn5 = nn.BatchNorm1d(64)
#         self.dropout5 = nn.Dropout(0.2)

#         self.output = nn.Linear(64, num_classes)

#     def forward(self, x):
#         # x: (batch, 1, input_size)
#         x, _ = self.lstm1(x)
#         x = self.bn1(x)
#         x = self.dropout1(x)

#         x, _ = self.lstm2(x)
#         x = self.bn2(x)
#         x = self.dropout2(x)

#         x, _ = self.lstm3(x)
#         x = x[:, -1, :]  # get last time step
#         x = self.bn3(x)
#         x = self.dropout3(x)

#         x = F.relu(self.fc1(x))
#         x = self.bn4(x)
#         x = self.dropout4(x)

#         x = F.relu(self.fc2(x))
#         x = self.bn5(x)
#         x = self.dropout5(x)

#         x = self.output(x)
#         return F.softmax(x, dim=1)

In [191]:
class EmotionDetectionPyTorch:
    def __init__(self, X_train,X_test, y_train, y_test, label_encoder):
        # Split the data
        self.X_train, self.X_test, self.y_train, self.y_test = X_train, X_test, y_train, y_test
        
        # Standardize features
        self.scaler = StandardScaler()
        self.X_train_scaled = self.scaler.fit_transform(self.X_train)
        self.X_test_scaled = self.scaler.transform(self.X_test)
        
        # Reshape for LSTM (samples, sequence_length, features)
        self.X_train_reshaped = self.X_train_scaled.reshape(
            (self.X_train_scaled.shape[0], 1, self.X_train_scaled.shape[1]))
        self.X_test_reshaped = self.X_test_scaled.reshape(
            (self.X_test_scaled.shape[0], 1, self.X_test_scaled.shape[1]))
        
        self.label_encoder = label_encoder
        self.num_classes = len(label_encoder.classes_)
        
        # Set device
        # self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.device = torch.device("cpu" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {self.device}")
        # Create datasets and dataloaders
        self.train_dataset = EmotionDataset(self.X_train_reshaped, self.y_train)
        self.test_dataset = EmotionDataset(self.X_test_reshaped, self.y_test)
        
        self.results = {}
        
        print(f"\nTraining set shape: {self.X_train.shape}")
        print(f"Testing set shape: {self.X_test.shape}")

    def create_dataloaders(self, batch_size=32):
        """Create DataLoaders for train and test sets"""
        train_loader = DataLoader(
            self.train_dataset, 
            batch_size=batch_size, 
            shuffle=True
        )
        
        test_loader = DataLoader(
            self.test_dataset,
            batch_size=batch_size,
            shuffle=False
        )
        
        return train_loader, test_loader
    
    def create_lstm_model(self):
        """Create LSTM model"""
        input_dim = self.X_train.shape[1]  # Number of features
        hidden_dim = 128
        model = EmotionLSTM(input_size=self.X_train.shape[1])
        model.to(self.device)
        return model
    
    def train_model(self, model, train_loader, test_loader, epochs=1000, learning_rate=0.001):
        """Train the PyTorch model"""
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        
        # For tracking metrics
        train_losses = []
        train_accs = []
        val_losses = []
        val_accs = []
        
        print(f"\nTraining LSTM model...")
        for epoch in range(epochs):
            # Training phase
            model.train()
            running_loss = 0.0
            correct = 0
            total = 0
            
            for inputs, labels in train_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                
                # Zero the parameter gradients
                optimizer.zero_grad()
                
                # Forward pass
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                
                # Backward pass and optimize
                loss.backward()
                optimizer.step()
                
                running_loss += loss.item() * inputs.size(0)
                
                # Calculate accuracy
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            
            epoch_loss = running_loss / len(train_loader.dataset)
            epoch_acc = correct / total
            train_losses.append(epoch_loss)
            train_accs.append(epoch_acc)
            
            # Validation phase
            model.eval()
            val_running_loss = 0.0
            val_correct = 0
            val_total = 0
            
            with torch.no_grad():
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(self.device), labels.to(self.device)
                    
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                    val_running_loss += loss.item() * inputs.size(0)
                    
                    _, predicted = torch.max(outputs.data, 1)
                    val_total += labels.size(0)
                    val_correct += (predicted == labels).sum().item()
            
            val_epoch_loss = val_running_loss / len(test_loader.dataset)
            val_epoch_acc = val_correct / val_total
            val_losses.append(val_epoch_loss)
            val_accs.append(val_epoch_acc)
            
            # Print statistics
            if (epoch + 1) % 5 == 0 or epoch == 0:
                print(f"Epoch {epoch+1}/{epochs} - "
                      f"Loss: {epoch_loss:.4f} - Acc: {epoch_acc:.4f} - "
                      f"Val Loss: {val_epoch_loss:.4f} - Val Acc: {val_epoch_acc:.4f}")
        
        history = {
            'loss': train_losses,
            'accuracy': train_accs,
            'val_loss': val_losses,
            'val_accuracy': val_accs
        }
        
        return model, history
    
    def evaluate_model(self, model, test_loader):
        """Evaluate the model on test data"""
        model.eval()
        all_preds = []
        all_labels = []
        
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device)
                outputs = model(inputs)
                _, preds = torch.max(outputs, 1)
                
                all_preds.extend(preds.cpu().numpy())
                all_labels.extend(labels.cpu().numpy())
        
        # Convert indices to emotion labels
        pred_emotions = self.label_encoder.inverse_transform(all_preds)
        true_emotions = self.label_encoder.inverse_transform(all_labels)
        
        # Calculate metrics
        conf_matrix = confusion_matrix(true_emotions, pred_emotions)
        class_report = classification_report(true_emotions, pred_emotions)
        
        return conf_matrix, class_report, pred_emotions, true_emotions
    
    def plot_confusion_matrix(self, conf_matrix, title, emotion_labels):
        """Plot confusion matrix"""
        plt.figure(figsize=(10, 8))
        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                   xticklabels=emotion_labels, yticklabels=emotion_labels)
        plt.title(title)
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.show()

    def plot_training_history(self, history, title):
        """Plot training history"""
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        plt.plot(history['accuracy'], label='Training Accuracy')
        plt.plot(history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'{title} - Accuracy')
        plt.legend()
        
        plt.subplot(1, 2, 2)
        plt.plot(history['loss'], label='Training Loss')
        plt.plot(history['val_loss'], label='Validation Loss')
        plt.title(f'{title} - Loss')
        plt.legend()
        
        plt.tight_layout()
        plt.show()

    def run_experiment(self, model_name="LSTM", batch_size=32, epochs=1000, learning_rate=0.001):
        """Run a complete experiment"""
        # Create dataloaders
        train_loader, test_loader = self.create_dataloaders(batch_size)
        
        # Create model
        model = self.create_lstm_model()
        
        # Train model
        trained_model, history = self.train_model(
            model, train_loader, test_loader, epochs, learning_rate
        )
        
        # Evaluate model
        conf_matrix, class_report, pred_emotions, true_emotions = self.evaluate_model(
            trained_model, test_loader
        )
        
        # Store results
        self.results[model_name] = {
            'model': trained_model,
            'history': history,
            'confusion_matrix': conf_matrix,
            'classification_report': class_report,
            'predictions': pred_emotions,
            'true_labels': true_emotions
        }
        
        # Plot results
        self.plot_training_history(history, model_name)
        self.plot_confusion_matrix(
            conf_matrix, f'{model_name} Confusion Matrix', 
            self.label_encoder.classes_
        )
        
        print(f"\n{model_name} Classification Report:")
        print(class_report)
        
        return trained_model
    
    def print_comparison(self):
        """Print comparison of different models"""
        print("\nModel Comparison Results")
        print("=" * 50)
        
        summary_data = []
        
        for model_name, result in self.results.items():
            print(f"\n{model_name} Results")
            print("-" * 30)
            print("\nClassification Report:")
            print(result['classification_report'])
            
            val_acc = result['history']['val_accuracy'][-1]
            val_loss = result['history']['val_loss'][-1]
            
            summary_data.append({
                'Model': model_name,
                'Validation Accuracy': val_acc,
                'Validation Loss': val_loss
            })
        
        summary_df = pd.DataFrame(summary_data)
        print("\nSummary of Results:")
        print(summary_df.to_string(index=False))

    def save_model(self, model, path='/Users/smriti/Desktop/emotion_lstm_model.pth'):
        """Save the PyTorch model"""
        torch.save(model.state_dict(), path)
        print(f"Model saved to {path}")

    def load_model(self, path='/Users/smriti/Desktop/emotion_lstm_model.pth'):
        """Load a saved PyTorch model"""
        model = self.create_lstm_model()
        model.load_state_dict(torch.load(path, map_location=self.device))
        model.eval()
        return model

In [192]:
if __name__ == "__main__":
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    X_train, X_test, y_train, y_test, label_encoder = load_ravdess_data()
    
    emotion_detector = EmotionDetectionPyTorch(X_train, X_test, y_train, y_test, label_encoder)
    
    model = emotion_detector.run_experiment()  
    
    emotion_detector.save_model(model)
    
    emotion_detector.print_comparison()
    

video_name            0
frame                 0
face_x                0
face_y                0
face_width            0
face_height           0
face_aspect_ratio     0
left_eye_x            0
left_eye_y            0
left_eye_width        0
left_eye_height       0
right_eye_x           0
right_eye_y           0
right_eye_width       0
right_eye_height      0
eye_separation        0
mouth_x               0
mouth_y               0
mouth_width           0
mouth_height          0
mouth_aspect_ratio    0
avg_intensity         0
intensity_variance    0
emotion               0
dtype: int64
Empty DataFrame
Columns: [video_name, frame, face_x, face_y, face_width, face_height, face_aspect_ratio, left_eye_x, left_eye_y, left_eye_width, left_eye_height, right_eye_x, right_eye_y, right_eye_width, right_eye_height, eye_separation, mouth_x, mouth_y, mouth_width, mouth_height, mouth_aspect_ratio, avg_intensity, intensity_variance, emotion]
Index: []

[0 rows x 24 columns]
                 video_name  f

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
