In [None]:
import torch
import torch.nn as nn
import numpy as np

# Define the parallel model with CNN and Transformer
class ParallelModel(nn.Module):
    def __init__(self, num_emotions=8):
        super(ParallelModel, self).__init__()
        
        # Transformer block
        self.transformer_maxpool = nn.MaxPool2d(kernel_size=[1, 4], stride=[1, 4])
        transformer_layer = nn.TransformerEncoderLayer(d_model=40, nhead=4, dim_feedforward=512, dropout=0.4, activation='relu')
        self.transformer_encoder = nn.TransformerEncoder(transformer_layer, num_layers=4)

        # CNN Block 1
        self.conv2Dblock1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.3),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4),
            nn.Dropout(p=0.3),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4),
            nn.Dropout(p=0.3),
        )

        # CNN Block 2 (identical to Block 1)
        self.conv2Dblock2 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(p=0.3),
            nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4),
            nn.Dropout(p=0.3),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=4, stride=4),
            nn.Dropout(p=0.3),
        )

        # Final Linear layer
        self.fc1_linear = nn.Linear(512 * 2 + 40, num_emotions)
        self.softmax_out = nn.Softmax(dim=1)

    def forward(self, x):
        # Forward pass through CNN Block 1
        conv2d_embedding1 = torch.flatten(self.conv2Dblock1(x), start_dim=1)
        
        # Forward pass through CNN Block 2
        conv2d_embedding2 = torch.flatten(self.conv2Dblock2(x), start_dim=1)
        
        # Forward pass through Transformer Block
        x_maxpool = self.transformer_maxpool(x)
        x_maxpool_reduced = torch.squeeze(x_maxpool, 1)
        x = x_maxpool_reduced.permute(2, 0, 1)
        transformer_output = self.transformer_encoder(x)
        transformer_embedding = torch.mean(transformer_output, dim=0)
        
        # Concatenate embeddings and pass through final FC layer
        complete_embedding = torch.cat([conv2d_embedding1, conv2d_embedding2, transformer_embedding], dim=1)
        output_logits = self.fc1_linear(complete_embedding)
        output_softmax = self.softmax_out(output_logits)
        return output_logits, output_softmax

# Load model and weights
model = ParallelModel(num_emotions=8)
model.load_state_dict(torch.load('binaries/parallel_all_you_wantFINAL-429.pkl'))
model.eval()

# Test the model on your MFCC data
def test_model(mfcc_data):
    # Convert to tensor and add batch and channel dimensions
    mfcc_tensor = torch.tensor(mfcc_data, dtype=torch.float32).unsqueeze(0).unsqueeze(0)
    
    # Get model output
    with torch.no_grad():
        _, predictions = model(mfcc_tensor)
    
    # Output predicted emotion
    predicted_emotion = torch.argmax(predictions, dim=1).item()
    print(f"Predicted Emotion Class: {predicted_emotion}")

# Example usage with MFCC data
# Assuming `mfcc_data` is your precomputed MFCC feature (2D array for one audio sample)
# mfcc_data = ...
# test_model(mfcc_data)
