In [40]:
import os
import numpy as np
import librosa
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from glob import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score, confusion_matrix
from torch.optim.lr_scheduler import CosineAnnealingLR

In [41]:
def seed_all(seed=42):
    import random, numpy as np, torch
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_all(42)

In [42]:
SAMPLE_RATE = 16000
N_MELS = 64
HOP_LENGTH = 512
DURATION = 4
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
MAX_LEN = 200
NUM_CLASSES = 8

In [43]:
def extract_features(filepath):
    y, sr = librosa.load(filepath, sr=SAMPLE_RATE)
    y = librosa.util.fix_length(y, size=SAMPLES_PER_TRACK)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS, hop_length=HOP_LENGTH)
    log_mel = librosa.power_to_db(mel)
    delta = librosa.feature.delta(log_mel)
    delta2 = librosa.feature.delta(log_mel, order=2)
    pitches, _ = librosa.piptrack(y=y, sr=sr)
    pitch = np.mean(pitches, axis=0, keepdims=True)
    pitch = np.tile(pitch, (N_MELS, 1))
    energy = librosa.feature.rms(y=y)
    energy = np.tile(energy, (N_MELS, 1))
    features = np.stack([log_mel, delta, delta2, pitch, energy], axis=0)
    features = features[:, :, :MAX_LEN]
    if features.shape[2] < MAX_LEN:
        pad_width = MAX_LEN - features.shape[2]
        features = np.pad(features, ((0, 0), (0, 0), (0, pad_width)))
        features = (features - features.mean(axis=(1, 2), keepdims=True)) / (features.std(axis=(1, 2), keepdims=True) + 1e-6)
    return features.astype(np.float32)


In [44]:
class EmotionDataset(Dataset):
    def __init__(self, filepaths, labels):
        self.filepaths = filepaths
        self.labels = labels

    def __len__(self):
        return len(self.filepaths)

    def __getitem__(self, idx):
        x = extract_features(self.filepaths[idx])
        y = self.labels[idx]
        return torch.tensor(x), torch.tensor(y)

def parse_label(filename):
    emotion_id = int(filename.split("-")[2])
    return emotion_id - 1

def load_dataset(root_dir):
    files = glob(os.path.join(root_dir, "*.wav"))
    labels = [parse_label(os.path.basename(f)) for f in files]
    return train_test_split(files, labels, test_size=0.2, stratify=labels, random_state=42)

In [45]:
class EmotionCNNBiLSTMWithAttention(nn.Module):
    def __init__(self, num_classes=NUM_CLASSES, hidden_size=128):
        super().__init__()
        self.cnn = nn.Sequential(
            nn.Conv2d(5, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d((2, 2))
        )
        self.lstm = nn.LSTM(input_size=32 * 32, hidden_size=hidden_size, batch_first=True, bidirectional=True)
        self.attention = nn.Linear(hidden_size * 2, 1)
        self.fc = nn.Linear(hidden_size * 2, num_classes)

    def forward(self, x):
        x = self.cnn(x)
        x = x.permute(0, 3, 1, 2)
        x = x.contiguous().view(x.size(0), x.size(1), -1)
        lstm_out, _ = self.lstm(x)
        attn_weights = torch.softmax(self.attention(lstm_out), dim=1)
        x = torch.sum(attn_weights * lstm_out, dim=1)
        return self.fc(x)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
root_dir = r"C:\Users\Raihan\OneDrive\Desktop\OPEN_PROJECT_AUDIO\data\train_dataset" 
train_files, val_files, train_labels, val_labels = load_dataset(root_dir)
train_loader = DataLoader(EmotionDataset(train_files, train_labels), batch_size=32, shuffle=True)
test_loader = DataLoader(EmotionDataset(val_files, val_labels), batch_size=32)

In [47]:

model = EmotionCNNBiLSTMWithAttention().to(device)

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizers = optim.AdamW(model.parameters(), lr=1e-3,weight_decay=1e-4)
schedulers = CosineAnnealingLR(optimizers, T_max=50)

In [48]:
from tqdm import tqdm
for epoch in (range(50)):
        for x, y in tqdm(train_loader):
            x, y = x.to(device), y.to(device)
            optimizers.zero_grad()
            outputs = model(x)
            loss = criterion(outputs, y)
            loss.backward()
            optimizers.step()
            total_loss += loss.item()
            schedulers.step()

        model.eval()
        val_preds, val_labels = [], []
        with torch.no_grad():
            for x_val, y_val in test_loader:
                x_val = x_val.to(device)
                out_val = model(x_val)
                val_preds.extend(torch.argmax(out_val, dim=1).cpu().numpy())
                val_labels.extend(y_val.numpy())
            acc = np.mean(np.array(val_preds) == np.array(val_labels))
            print(f" Epoch {epoch+1} | Train Loss: {total_loss / len(train_loader):.4f} | Val Acc: {acc:.4f}")


100%|██████████| 62/62 [02:12<00:00,  2.13s/it]


 Epoch 1 | Train Loss: 2.0427 | Val Acc: 0.2261


100%|██████████| 62/62 [02:05<00:00,  2.02s/it]


 Epoch 2 | Train Loss: 3.9375 | Val Acc: 0.2933


100%|██████████| 62/62 [01:55<00:00,  1.87s/it]


 Epoch 3 | Train Loss: 5.6730 | Val Acc: 0.3483


100%|██████████| 62/62 [01:43<00:00,  1.67s/it]


 Epoch 4 | Train Loss: 7.3300 | Val Acc: 0.4257


100%|██████████| 62/62 [01:15<00:00,  1.22s/it]


 Epoch 5 | Train Loss: 8.9128 | Val Acc: 0.4399


100%|██████████| 62/62 [01:14<00:00,  1.19s/it]


 Epoch 6 | Train Loss: 10.3875 | Val Acc: 0.4969


100%|██████████| 62/62 [01:57<00:00,  1.89s/it]


 Epoch 7 | Train Loss: 11.7945 | Val Acc: 0.5418


100%|██████████| 62/62 [01:59<00:00,  1.92s/it]


 Epoch 8 | Train Loss: 13.0738 | Val Acc: 0.5397


100%|██████████| 62/62 [01:18<00:00,  1.27s/it]


 Epoch 9 | Train Loss: 14.3052 | Val Acc: 0.6578


100%|██████████| 62/62 [01:15<00:00,  1.22s/it]


 Epoch 10 | Train Loss: 15.4412 | Val Acc: 0.6945


100%|██████████| 62/62 [01:26<00:00,  1.40s/it]


 Epoch 11 | Train Loss: 16.4334 | Val Acc: 0.7128


100%|██████████| 62/62 [01:21<00:00,  1.32s/it]


 Epoch 12 | Train Loss: 17.4586 | Val Acc: 0.7576


100%|██████████| 62/62 [01:15<00:00,  1.21s/it]


 Epoch 13 | Train Loss: 18.3382 | Val Acc: 0.7026


100%|██████████| 62/62 [01:14<00:00,  1.20s/it]


 Epoch 14 | Train Loss: 19.1757 | Val Acc: 0.7719


100%|██████████| 62/62 [01:37<00:00,  1.57s/it]


 Epoch 15 | Train Loss: 19.9884 | Val Acc: 0.7984


100%|██████████| 62/62 [01:19<00:00,  1.28s/it]


 Epoch 16 | Train Loss: 20.7185 | Val Acc: 0.7189


100%|██████████| 62/62 [01:14<00:00,  1.20s/it]


 Epoch 17 | Train Loss: 21.5159 | Val Acc: 0.7841


100%|██████████| 62/62 [01:14<00:00,  1.21s/it]


 Epoch 18 | Train Loss: 22.2315 | Val Acc: 0.7189


100%|██████████| 62/62 [01:14<00:00,  1.21s/it]


 Epoch 19 | Train Loss: 22.9076 | Val Acc: 0.8208


100%|██████████| 62/62 [01:07<00:00,  1.10s/it]


 Epoch 20 | Train Loss: 23.5668 | Val Acc: 0.7963


100%|██████████| 62/62 [01:10<00:00,  1.14s/it]


 Epoch 21 | Train Loss: 24.1752 | Val Acc: 0.8208


100%|██████████| 62/62 [02:05<00:00,  2.02s/it]


 Epoch 22 | Train Loss: 24.7781 | Val Acc: 0.8167


100%|██████████| 62/62 [02:11<00:00,  2.12s/it]


 Epoch 23 | Train Loss: 25.3629 | Val Acc: 0.8208


100%|██████████| 62/62 [01:52<00:00,  1.82s/it]


 Epoch 24 | Train Loss: 25.9222 | Val Acc: 0.8371


100%|██████████| 62/62 [11:17:21<00:00, 655.51s/it]      


 Epoch 25 | Train Loss: 26.4753 | Val Acc: 0.8513


100%|██████████| 62/62 [01:07<00:00,  1.09s/it]


 Epoch 26 | Train Loss: 27.0109 | Val Acc: 0.8493


100%|██████████| 62/62 [01:34<00:00,  1.52s/it]


 Epoch 27 | Train Loss: 27.5391 | Val Acc: 0.8534


100%|██████████| 62/62 [01:51<00:00,  1.80s/it]


 Epoch 28 | Train Loss: 28.0615 | Val Acc: 0.8473


100%|██████████| 62/62 [01:20<00:00,  1.30s/it]


 Epoch 29 | Train Loss: 28.5733 | Val Acc: 0.8534


100%|██████████| 62/62 [01:31<00:00,  1.47s/it]


 Epoch 30 | Train Loss: 29.0844 | Val Acc: 0.8615


100%|██████████| 62/62 [01:09<00:00,  1.11s/it]


 Epoch 31 | Train Loss: 29.5907 | Val Acc: 0.8513


100%|██████████| 62/62 [01:06<00:00,  1.07s/it]


 Epoch 32 | Train Loss: 30.0909 | Val Acc: 0.8717


100%|██████████| 62/62 [02:08<00:00,  2.07s/it]


 Epoch 33 | Train Loss: 30.5917 | Val Acc: 0.8615


100%|██████████| 62/62 [01:07<00:00,  1.10s/it]


 Epoch 34 | Train Loss: 31.0877 | Val Acc: 0.8697


100%|██████████| 62/62 [01:09<00:00,  1.12s/it]


 Epoch 35 | Train Loss: 31.5819 | Val Acc: 0.8656


100%|██████████| 62/62 [01:11<00:00,  1.16s/it]


 Epoch 36 | Train Loss: 32.0748 | Val Acc: 0.8513


100%|██████████| 62/62 [01:10<00:00,  1.13s/it]


 Epoch 37 | Train Loss: 32.5631 | Val Acc: 0.8452


100%|██████████| 62/62 [01:27<00:00,  1.42s/it]


 Epoch 38 | Train Loss: 33.0510 | Val Acc: 0.8697


100%|██████████| 62/62 [01:29<00:00,  1.45s/it]


 Epoch 39 | Train Loss: 33.5381 | Val Acc: 0.8554


100%|██████████| 62/62 [01:15<00:00,  1.21s/it]


 Epoch 40 | Train Loss: 34.0245 | Val Acc: 0.8635


100%|██████████| 62/62 [01:43<00:00,  1.67s/it]


 Epoch 41 | Train Loss: 34.5080 | Val Acc: 0.8656


100%|██████████| 62/62 [01:08<00:00,  1.10s/it]


 Epoch 42 | Train Loss: 34.9883 | Val Acc: 0.8635


100%|██████████| 62/62 [01:11<00:00,  1.15s/it]


 Epoch 43 | Train Loss: 35.4681 | Val Acc: 0.8758


100%|██████████| 62/62 [01:09<00:00,  1.13s/it]


 Epoch 44 | Train Loss: 35.9472 | Val Acc: 0.8737


100%|██████████| 62/62 [01:08<00:00,  1.10s/it]


 Epoch 45 | Train Loss: 36.4244 | Val Acc: 0.8717


100%|██████████| 62/62 [01:08<00:00,  1.11s/it]


 Epoch 46 | Train Loss: 36.9022 | Val Acc: 0.8635


100%|██████████| 62/62 [01:09<00:00,  1.13s/it]


 Epoch 47 | Train Loss: 37.3784 | Val Acc: 0.8676


100%|██████████| 62/62 [01:09<00:00,  1.11s/it]


 Epoch 48 | Train Loss: 37.8541 | Val Acc: 0.8676


100%|██████████| 62/62 [01:08<00:00,  1.11s/it]


 Epoch 49 | Train Loss: 38.3294 | Val Acc: 0.8676


100%|██████████| 62/62 [01:08<00:00,  1.11s/it]


 Epoch 50 | Train Loss: 38.8034 | Val Acc: 0.8656


In [57]:
acc=accuracy_score(val_preds,val_labels)
f1=f1_score(val_preds,val_labels,average='weighted')
cm=confusion_matrix(val_preds,val_labels)

In [58]:
print(acc)
print(f1)
print(cm)

0.8655804480651731
0.8668825036608722
[[33  0  1  0  1  0  0  0]
 [ 1 75  3  4  0  0  0  0]
 [ 1  0 62  4  0  3  0  2]
 [ 3  0  1 56  1  6  1  0]
 [ 0  0  4  4 68  4  0  1]
 [ 0  0  3  5  2 62  1  3]
 [ 0  0  0  2  2  0 37  1]
 [ 0  0  1  0  1  0  0 32]]


In [59]:
per_class_acc = cm.diagonal() / cm.sum(axis=1)
for i, acc in enumerate(per_class_acc):
    print(f"Class {i} Accuracy: {acc:.4f}")

Class 0 Accuracy: 0.9429
Class 1 Accuracy: 0.9036
Class 2 Accuracy: 0.8611
Class 3 Accuracy: 0.8235
Class 4 Accuracy: 0.8395
Class 5 Accuracy: 0.8158
Class 6 Accuracy: 0.8810
Class 7 Accuracy: 0.9412


In [60]:
torch.save(model.state_dict(), 'cnn_bilstm_attention_model.pth')