# Simple MLP Model for Audio Classification (PyTorch)
This notebook demonstrates training a simple MLP model to categorize wav files using MFCC features extracted from audio files in `data/dataset/train`, `data/dataset/val`, and `data/dataset/test` using PyTorch.

In [None]:
# Imports
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from lvdpy.preprocessing import extract_cepstral_coefficients, create_metadata

In [None]:
# Helper to load MFCC features and labels from a folder
def load_features_labels(folder):
    metadata = create_metadata(folder)
    X = extract_cepstral_coefficients(metadata).values.astype(np.float32)
    y = metadata['category'].values
    return X, y

# Load train, val, test sets
train_folder = 'data\\dataset\\train'
val_folder = 'data\\dataset\\val'
test_folder = 'data\\dataset\\test'

X_train, y_train = load_features_labels(train_folder)
X_val, y_val = load_features_labels(val_folder)
X_test, y_test = load_features_labels(test_folder)

In [None]:
# Encode labels
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

num_classes = len(le.classes_)

In [None]:
# PyTorch Dataset
class MFCCDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_ds = MFCCDataset(X_train, y_train_enc)
val_ds = MFCCDataset(X_val, y_val_enc)
test_ds = MFCCDataset(X_test, y_test_enc)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [None]:
# Define simple MLP model
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, num_classes)
        )
    def forward(self, x):
        return self.net(x)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = SimpleMLP(X_train.shape[1], num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Epoch 1/30
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.1765 - loss: 25.6440 - val_accuracy: 0.2099 - val_loss: 3.8841
Epoch 2/30
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.1765 - loss: 25.6440 - val_accuracy: 0.2099 - val_loss: 3.8841
Epoch 2/30
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1749 - loss: 9.7786 - val_accuracy: 0.1358 - val_loss: 1.8013
Epoch 3/30
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1749 - loss: 9.7786 - val_accuracy: 0.1358 - val_loss: 1.8013
Epoch 3/30
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1903 - loss: 4.2160 - val_accuracy: 0.1235 - val_loss: 1.8040
Epoch 4/30
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1903 - loss: 4.2160 - val_accuracy: 0.1235 - val_loss: 1.8040
Epoch 4/30
[1m52/52[0m [32m━━━━━━━━

In [None]:
# Training loop
def train_epoch(model, loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * X_batch.size(0)
    return running_loss / len(loader.dataset)

def eval_epoch(model, loader, criterion):
    model.eval()
    running_loss = 0.0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for X_batch, y_batch in loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            running_loss += loss.item() * X_batch.size(0)
            preds = torch.argmax(outputs, dim=1).cpu().numpy()
            all_preds.extend(preds)
            all_targets.extend(y_batch.cpu().numpy())
    avg_loss = running_loss / len(loader.dataset)
    acc = accuracy_score(all_targets, all_preds)
    return avg_loss, acc, all_preds, all_targets

num_epochs = 30
for epoch in range(num_epochs):
    train_loss = train_epoch(model, train_loader, criterion, optimizer)
    val_loss, val_acc, _, _ = eval_epoch(model, val_loader, criterion)
    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_loss:.4f} - Val Loss: {val_loss:.4f} - Val Acc: {val_acc:.4f}")

[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Test Accuracy: 0.13861386138613863
              precision    recall  f1-score   support

  belly pain       0.00      0.00      0.00        25
     burping       1.00      0.29      0.45        24
    cold_hot       0.11      1.00      0.19        21
  discomfort       0.00      0.00      0.00        28
      hungry       0.00      0.00      0.00        77
       tired       0.00      0.00      0.00        27

    accuracy                           0.14       202
   macro avg       0.18      0.22      0.11       202
weighted avg       0.13      0.14      0.07       202

Test Accuracy: 0.13861386138613863
              precision    recall  f1-score   support

  belly pain       0.00      0.00      0.00        25
     burping       1.00      0.29      0.45        24
    cold_hot       0.11      1.00      0.19        21
  discomfort       0.00   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [None]:
# Evaluate on test set
test_loss, test_acc, test_preds, test_targets = eval_epoch(model, test_loader, criterion)
print('Test Accuracy:', test_acc)
print(classification_report(test_targets, test_preds, target_names=le.classes_))