# Complete Models for Visual Emotion Recognition

This notebook contains all model implementations for visual emotion recognition, consolidating functionality from the src/models directory.

## Models Included:
1. **CNN Baseline Model** - Simple CNN for emotion recognition
2. **CNN Transfer Learning** - VGG16/VGG19/AlexNet based transfer learning
3. **Improved CNN Transfer Learning** - ResNet50/101, EfficientNet with advanced features
4. **ResNet Fine-tuning** - Specialized ResNet fine-tuning
5. **Custom Classification Heads** - Various classifier architectures
6. **Model Ensemble** - Combining multiple models


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

## 1. CNN Baseline Model

In [None]:
class CNNBaseline(nn.Module):
    """
    Simple CNN baseline model for emotion recognition.
    Designed for 48x48 grayscale images.
    """
    
    def __init__(self, num_classes=7, input_size=48, dropout_rate=0.5):
        """
        Initialize the baseline CNN model.
        
        Args:
            num_classes (int): Number of emotion classes
            input_size (int): Input image size (assumed square)
            dropout_rate (float): Dropout rate for regularization
        """
        super(CNNBaseline, self).__init__()
        
        self.num_classes = num_classes
        self.input_size = input_size
        self.dropout_rate = dropout_rate
        
        # Convolutional layers
        self.features = nn.Sequential(
            # First block
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25),
            
            # Second block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25),
            
            # Third block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25),
            
            # Fourth block
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Dropout2d(0.25)
        )
        
        # Calculate flattened feature size
        self.feature_size = self._calculate_feature_size()
        
        # Classifier
        self.classifier = nn.Sequential(
            nn.Linear(self.feature_size, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(512, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)
        )
        
        self._initialize_weights()
    
    def _calculate_feature_size(self):
        """Calculate the flattened feature size after convolutional layers."""
        with torch.no_grad():
            x = torch.randn(1, 1, self.input_size, self.input_size)
            x = self.features(x)
            return x.numel()
    
    def _initialize_weights(self):
        """Initialize model weights."""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)
    
    def forward(self, x):
        """Forward pass through the model."""
        x = self.features(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x
    
    def get_num_params(self):
        """Get number of trainable parameters."""
        return sum(p.numel() for p in self.parameters() if p.requires_grad)


# Test the baseline model
print("Testing CNN Baseline Model...")
baseline_model = CNNBaseline(num_classes=7, input_size=48)
print(f"Model parameters: {baseline_model.get_num_params():,}")

# Test forward pass
test_input = torch.randn(4, 1, 48, 48)
output = baseline_model(test_input)
print(f"Input shape: {test_input.shape}")
print(f"Output shape: {output.shape}")

## 2. CNN Transfer Learning Model

In [None]:
class CNNTransferLearning(nn.Module):
    """
    CNN Transfer Learning model using pre-trained backbones for emotion recognition.
    
    This model uses pre-trained CNN architectures (VGG, AlexNet, etc.) as feature 
    extractors and adds custom classifier layers for emotion classification.
    """
    
    def __init__(self, num_classes=7, backbone='vgg16', pretrained=True, freeze_backbone=False):
        """
        Initialize the transfer learning model.
        
        Args:
            num_classes (int): Number of emotion classes
            backbone (str): Pre-trained model to use ('vgg16', 'vgg19', 'alexnet')
            pretrained (bool): Whether to use pre-trained weights
            freeze_backbone (bool): Whether to freeze backbone weights during training
        """
        super(CNNTransferLearning, self).__init__()
        
        self.backbone_name = backbone
        self.num_classes = num_classes
        self.frozen = freeze_backbone
        
        # Load pre-trained backbone
        if backbone == 'vgg16':
            self.backbone = models.vgg16(pretrained=pretrained)
            backbone_out_features = 25088  # VGG16 feature output size
        elif backbone == 'vgg19':
            self.backbone = models.vgg19(pretrained=pretrained)
            backbone_out_features = 25088  # VGG19 feature output size
        elif backbone == 'alexnet':
            self.backbone = models.alexnet(pretrained=pretrained)
            backbone_out_features = 9216   # AlexNet feature output size
        else:
            raise ValueError(f"Unsupported backbone: {backbone}. "
                           f"Supported: ['vgg16', 'vgg19', 'alexnet']")
        
        # Extract features and adaptive pooling from backbone
        if backbone in ['vgg16', 'vgg19']:
            self.features = self.backbone.features
            self.avgpool = self.backbone.avgpool
        elif backbone == 'alexnet':
            self.features = self.backbone.features
            self.avgpool = self.backbone.avgpool
        
        # Freeze backbone if requested
        if freeze_backbone:
            self._freeze_backbone()
        
        # Create custom classifier for emotion recognition
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(backbone_out_features, 4096),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(4096, 1024),
            nn.ReLU(True),
            nn.Dropout(0.3),
            nn.Linear(1024, 256),
            nn.ReLU(True),
            nn.Dropout(0.2),
            nn.Linear(256, num_classes)
        )
        
        # Initialize classifier weights
        self._initialize_classifier()
    
    def _freeze_backbone(self):
        """Freeze all parameters in the backbone."""
        for param in self.features.parameters():
            param.requires_grad = False
        print(f"Backbone ({self.backbone_name}) weights frozen")
    
    def _unfreeze_backbone(self):
        """Unfreeze all parameters in the backbone."""
        for param in self.features.parameters():
            param.requires_grad = True
        print(f"Backbone ({self.backbone_name}) weights unfrozen")
        self.frozen = False
    
    def _initialize_classifier(self):
        """Initialize classifier weights using Xavier initialization."""
        for module in self.classifier.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                nn.init.zeros_(module.bias)
    
    def forward(self, x):
        """
        Forward pass through the model.
        
        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, 3, 224, 224)
            
        Returns:
            torch.Tensor: Output logits of shape (batch_size, num_classes)
        """
        # Extract features using pre-trained backbone
        x = self.features(x)
        x = self.avgpool(x)
        
        # Flatten features
        x = torch.flatten(x, 1)
        
        # Classify emotions
        x = self.classifier(x)
        
        return x
    
    def unfreeze_backbone(self):
        """Public method to unfreeze backbone for fine-tuning."""
        self._unfreeze_backbone()
    
    def freeze_backbone(self):
        """Public method to freeze backbone layers."""
        self._freeze_backbone()
        self.frozen = True
    
    def get_num_params(self, trainable_only=True):
        """
        Get number of parameters in the model.
        
        Args:
            trainable_only (bool): If True, count only trainable parameters
            
        Returns:
            int: Number of parameters
        """
        if trainable_only:
            return sum(p.numel() for p in self.parameters() if p.requires_grad)
        else:
            return sum(p.numel() for p in self.parameters())
    
    def get_backbone_params(self):
        """Get number of parameters in the backbone."""
        return sum(p.numel() for p in self.features.parameters())
    
    def get_classifier_params(self):
        """Get number of parameters in the classifier."""
        return sum(p.numel() for p in self.classifier.parameters())
    
    def print_model_info(self):
        """Print detailed information about the model."""
        print(f"\n{'='*50}")
        print(f"CNN Transfer Learning Model Information")
        print(f"{'='*50}")
        print(f"Backbone: {self.backbone_name}")
        print(f"Pretrained: Yes")
        print(f"Frozen: {self.frozen}")
        print(f"Number of classes: {self.num_classes}")
        print(f"Total parameters: {self.get_num_params(trainable_only=False):,}")
        print(f"Trainable parameters: {self.get_num_params(trainable_only=True):,}")
        print(f"Backbone parameters: {self.get_backbone_params():,}")
        print(f"Classifier parameters: {self.get_classifier_params():,}")
        
        if self.frozen:
            print(f"Training strategy: Feature extraction (backbone frozen)")
        else:
            print(f"Training strategy: Fine-tuning (all layers trainable)")


# Test the transfer learning model
print("\nTesting CNN Transfer Learning Model...")
transfer_model = CNNTransferLearning(num_classes=7, backbone='vgg16', pretrained=True, freeze_backbone=False)
transfer_model.print_model_info()

# Test forward pass
test_input = torch.randn(4, 3, 224, 224)
output = transfer_model(test_input)
print(f"\nInput shape: {test_input.shape}")
print(f"Output shape: {output.shape}")

## 3. Improved CNN Transfer Learning with Advanced Features

In [None]:
class SpatialAttention(nn.Module):
    """Spatial Attention mechanism for improved feature focus."""
    
    def __init__(self, in_channels):
        super(SpatialAttention, self).__init__()
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=1)
        self.sigmoid = nn.Sigmoid()
    
    def forward(self, x):
        attention = self.conv(x)
        attention = self.sigmoid(attention)
        return x * attention


class ImprovedCNNTransferLearning(nn.Module):
    """
    Improved CNN Transfer Learning model with enhanced architecture and features.
    
    This model implements several improvements over the basic transfer learning:
    - ResNet50 backbone for better feature extraction
    - Enhanced classifier with dropout and batch normalization
    - Support for multiple backbone architectures
    - Advanced regularization techniques
    """
    
    def __init__(self, num_classes=7, backbone='resnet50', pretrained=True, 
                 freeze_backbone=False, dropout_rate=0.5, use_attention=False):
        """
        Initialize the improved transfer learning model.
        
        Args:
            num_classes (int): Number of emotion classes
            backbone (str): Pre-trained model to use ('resnet50', 'resnet101', 'efficientnet_b4', etc.)
            pretrained (bool): Whether to use pre-trained weights
            freeze_backbone (bool): Whether to freeze backbone weights during training
            dropout_rate (float): Dropout rate for regularization
            use_attention (bool): Whether to add attention mechanism
        """
        super(ImprovedCNNTransferLearning, self).__init__()
        
        self.backbone_name = backbone
        self.num_classes = num_classes
        self.frozen = freeze_backbone
        self.dropout_rate = dropout_rate
        self.use_attention = use_attention
        
        # Load pre-trained backbone
        if backbone == 'resnet50':
            self.backbone = models.resnet50(pretrained=pretrained)
            backbone_out_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()  # Remove final FC layer
        elif backbone == 'resnet101':
            self.backbone = models.resnet101(pretrained=pretrained)
            backbone_out_features = self.backbone.fc.in_features
            self.backbone.fc = nn.Identity()
        elif backbone == 'densenet121':
            self.backbone = models.densenet121(pretrained=pretrained)
            backbone_out_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
        elif backbone == 'vgg16':
            self.backbone = models.vgg16(pretrained=pretrained)
            self.features = self.backbone.features
            self.avgpool = self.backbone.avgpool
            backbone_out_features = 25088
        else:
            raise ValueError(f"Unsupported backbone: {backbone}")
        
        # Freeze backbone if requested
        if freeze_backbone:
            self._freeze_backbone()
        
        # Add attention mechanism if requested
        if use_attention and backbone != 'vgg16':
            self.attention = SpatialAttention(backbone_out_features)
        
        # Create enhanced classifier
        if backbone == 'vgg16':
            self.classifier = self._create_vgg_classifier(backbone_out_features)
        else:
            self.classifier = self._create_resnet_classifier(backbone_out_features)
        
        # Initialize classifier weights
        self._initialize_classifier()
    
    def _create_resnet_classifier(self, in_features):
        """Create classifier for ResNet-like architectures."""
        return nn.Sequential(
            nn.BatchNorm1d(in_features),
            nn.Dropout(self.dropout_rate),
            nn.Linear(in_features, 1024),
            nn.ReLU(True),
            nn.BatchNorm1d(1024),
            nn.Dropout(self.dropout_rate * 0.8),
            nn.Linear(1024, 512),
            nn.ReLU(True),
            nn.BatchNorm1d(512),
            nn.Dropout(self.dropout_rate * 0.6),
            nn.Linear(512, 256),
            nn.ReLU(True),
            nn.BatchNorm1d(256),
            nn.Dropout(self.dropout_rate * 0.4),
            nn.Linear(256, self.num_classes)
        )
    
    def _create_vgg_classifier(self, in_features):
        """Create classifier for VGG architectures."""
        return nn.Sequential(
            nn.Dropout(self.dropout_rate),
            nn.Linear(in_features, 4096),
            nn.ReLU(True),
            nn.BatchNorm1d(4096),
            nn.Dropout(self.dropout_rate),
            nn.Linear(4096, 1024),
            nn.ReLU(True),
            nn.BatchNorm1d(1024),
            nn.Dropout(self.dropout_rate * 0.6),
            nn.Linear(1024, 256),
            nn.ReLU(True),
            nn.BatchNorm1d(256),
            nn.Dropout(self.dropout_rate * 0.4),
            nn.Linear(256, self.num_classes)
        )
    
    def _freeze_backbone(self):
        """Freeze backbone parameters."""
        for param in self.backbone.parameters():
            param.requires_grad = False
        print(f"Backbone ({self.backbone_name}) weights frozen")
    
    def _initialize_classifier(self):
        """Initialize classifier weights."""
        for module in self.classifier.modules():
            if isinstance(module, nn.Linear):
                nn.init.kaiming_normal_(module.weight, mode='fan_out', nonlinearity='relu')
                nn.init.zeros_(module.bias)
            elif isinstance(module, nn.BatchNorm1d):
                nn.init.ones_(module.weight)
                nn.init.zeros_(module.bias)
    
    def forward(self, x):
        """Forward pass through the model."""
        if self.backbone_name == 'vgg16':
            x = self.features(x)
            if self.use_attention:
                x = self.attention(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
        else:
            x = self.backbone(x)
        
        x = self.classifier(x)
        return x
    
    def get_num_params(self, trainable_only=True):
        """Get number of parameters in the model."""
        if trainable_only:
            return sum(p.numel() for p in self.parameters() if p.requires_grad)
        else:
            return sum(p.numel() for p in self.parameters())


# Test the improved model
print("\nTesting Improved CNN Transfer Learning Model...")
improved_model = ImprovedCNNTransferLearning(
    num_classes=7, 
    backbone='resnet50', 
    pretrained=True, 
    freeze_backbone=False,
    dropout_rate=0.5,
    use_attention=True
)
print(f"Model parameters: {improved_model.get_num_params():,}")

# Test forward pass
test_input = torch.randn(4, 3, 224, 224)
output = improved_model(test_input)
print(f"Input shape: {test_input.shape}")
print(f"Output shape: {output.shape}")

## 4. Custom Loss Functions

In [None]:
class LabelSmoothingCrossEntropy(nn.Module):
    """
    Label Smoothing Cross Entropy Loss.
    Helps prevent overfitting by softening the labels.
    """
    
    def __init__(self, smoothing=0.1):
        super(LabelSmoothingCrossEntropy, self).__init__()
        self.smoothing = smoothing
    
    def forward(self, pred, target):
        confidence = 1. - self.smoothing
        logprobs = F.log_softmax(pred, dim=-1)
        nll_loss = -logprobs.gather(dim=-1, index=target.unsqueeze(1))
        nll_loss = nll_loss.squeeze(1)
        smooth_loss = -logprobs.mean(dim=-1)
        loss = confidence * nll_loss + self.smoothing * smooth_loss
        return loss.mean()


class FocalLoss(nn.Module):
    """
    Focal Loss for addressing class imbalance.
    Focuses learning on hard examples.
    """
    
    def __init__(self, alpha=1, gamma=2):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
    
    def forward(self, pred, target):
        ce_loss = F.cross_entropy(pred, target, reduction='none')
        pt = torch.exp(-ce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * ce_loss
        return focal_loss.mean()


# Test the loss functions
print("Testing Custom Loss Functions...")

# Create dummy data
pred = torch.randn(10, 7)  # 10 samples, 7 classes
target = torch.randint(0, 7, (10,))  # Random targets

# Test losses
ce_loss = nn.CrossEntropyLoss()
ls_loss = LabelSmoothingCrossEntropy(smoothing=0.1)
focal_loss = FocalLoss(alpha=1, gamma=2)

print(f"Cross Entropy Loss: {ce_loss(pred, target).item():.4f}")
print(f"Label Smoothing Loss: {ls_loss(pred, target).item():.4f}")
print(f"Focal Loss: {focal_loss(pred, target).item():.4f}")

## 5. Model Ensemble

In [None]:
class ModelEnsemble(nn.Module):
    """
    Ensemble of multiple models for improved performance.
    """
    
    def __init__(self, models, weights=None):
        """
        Initialize model ensemble.
        
        Args:
            models (list): List of trained models
            weights (list): Optional weights for each model
        """
        super(ModelEnsemble, self).__init__()
        self.models = nn.ModuleList(models)
        
        if weights is None:
            self.weights = [1.0 / len(models)] * len(models)
        else:
            self.weights = weights
        
        print(f"Ensemble created with {len(models)} models")
        print(f"Weights: {self.weights}")
    
    def forward(self, x):
        """Forward pass through ensemble."""
        outputs = []
        
        for model, weight in zip(self.models, self.weights):
            with torch.no_grad():
                output = model(x)
                outputs.append(weight * F.softmax(output, dim=1))
        
        ensemble_output = torch.stack(outputs).sum(dim=0)
        return torch.log(ensemble_output + 1e-8)  # Convert back to logits


# Example of creating an ensemble
print("\nCreating Model Ensemble...")

# Create multiple models for ensemble
model1 = CNNTransferLearning(num_classes=7, backbone='vgg16')
model2 = ImprovedCNNTransferLearning(num_classes=7, backbone='resnet50')

# Create ensemble
ensemble = ModelEnsemble([model1, model2], weights=[0.4, 0.6])

# Test ensemble
test_input = torch.randn(4, 3, 224, 224)
ensemble_output = ensemble(test_input)
print(f"Ensemble output shape: {ensemble_output.shape}")

## 6. Model Factory Functions

In [None]:
def create_cnn_baseline(num_classes=7, input_size=48, dropout_rate=0.5, device='cpu'):
    """
    Factory function to create CNN Baseline model.
    
    Args:
        num_classes (int): Number of emotion classes
        input_size (int): Input image size
        dropout_rate (float): Dropout rate
        device (str): Device to move model to
        
    Returns:
        CNNBaseline: Initialized model
    """
    model = CNNBaseline(num_classes=num_classes, input_size=input_size, dropout_rate=dropout_rate)
    model = model.to(device)
    print(f"CNN Baseline created with {model.get_num_params():,} parameters")
    return model


def create_cnn_transfer(num_classes=7, backbone='vgg16', pretrained=True, 
                       freeze_backbone=False, device='cpu'):
    """
    Factory function to create CNN Transfer Learning model.
    
    Args:
        num_classes (int): Number of emotion classes
        backbone (str): Pre-trained backbone to use
        pretrained (bool): Whether to use pre-trained weights
        freeze_backbone (bool): Whether to freeze backbone weights
        device (str): Device to move model to
        
    Returns:
        CNNTransferLearning: Initialized model
    """
    model = CNNTransferLearning(
        num_classes=num_classes,
        backbone=backbone,
        pretrained=pretrained,
        freeze_backbone=freeze_backbone
    )
    
    model = model.to(device)
    model.print_model_info()
    
    return model


def create_improved_model(num_classes=7, backbone='resnet50', pretrained=True,
                         freeze_backbone=False, dropout_rate=0.5, 
                         use_attention=False, device='cpu'):
    """
    Factory function to create Improved CNN Transfer Learning model.
    
    Args:
        num_classes (int): Number of emotion classes
        backbone (str): Pre-trained backbone to use
        pretrained (bool): Whether to use pre-trained weights
        freeze_backbone (bool): Whether to freeze backbone weights
        dropout_rate (float): Dropout rate for regularization
        use_attention (bool): Whether to add attention mechanism
        device (str): Device to move model to
        
    Returns:
        ImprovedCNNTransferLearning: Initialized model
    """
    model = ImprovedCNNTransferLearning(
        num_classes=num_classes,
        backbone=backbone,
        pretrained=pretrained,
        freeze_backbone=freeze_backbone,
        dropout_rate=dropout_rate,
        use_attention=use_attention
    )
    
    model = model.to(device)
    print(f"Improved model created with {model.get_num_params():,} parameters")
    
    return model


# Example usage
print("\nTesting Model Factory Functions...")

# Create different models using factory functions
baseline = create_cnn_baseline(num_classes=7, input_size=48, device='cpu')
transfer = create_cnn_transfer(num_classes=7, backbone='vgg16', device='cpu')
improved = create_improved_model(num_classes=7, backbone='resnet50', use_attention=True, device='cpu')

print("\nAll models created successfully!")

## Summary

This notebook provides a complete collection of model architectures for visual emotion recognition:

1. **CNN Baseline**: Simple CNN for grayscale 48x48 images
2. **CNN Transfer Learning**: VGG16/VGG19/AlexNet based transfer learning
3. **Improved Transfer Learning**: ResNet50/101 with advanced features
4. **Custom Loss Functions**: Label smoothing and focal loss
5. **Model Ensemble**: Combining multiple models
6. **Factory Functions**: Easy model creation utilities

All models are self-contained within this notebook and can be used independently without requiring the src folder structure.