In [34]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchvision import transforms

from utils import train_model, evaluate_model
from Datasets import MelSpectrogramDataset
# Models
from Models import CRNN

In [4]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: (x - np.mean(x)) / (np.std(x) + 1e-6))  # Normalize Mel spectrogram
])

train_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/train"
test_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/test"

In [5]:
train_dataset = MelSpectrogramDataset(
    features_dir=train_dir,
    labels_path=os.path.join(train_dir, "labels.npy"),
    transform=transform
)

test_dataset = MelSpectrogramDataset(
    features_dir=test_dir,
    labels_path=os.path.join(test_dir, "labels.npy"),
    transform=transform
)

# DataLoaders for train and test datasets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


## Models


In [6]:
model = CRNN(
    input_channels=1,
    img_height=128,
    img_width=216,
    num_classes=50
)

In [5]:
# Train the model
train_model(model, train_loader, test_loader, device, num_epochs=100)

# Evaluate the model on the test set
test_accuracy = evaluate_model(model, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/100], Loss: 3.8373, Accuracy: 3.75%
Epoch [2/100], Loss: 3.5848, Accuracy: 5.94%
Epoch [3/100], Loss: 3.4412, Accuracy: 6.69%
Epoch [4/100], Loss: 3.2612, Accuracy: 10.62%
Epoch [5/100], Loss: 3.2328, Accuracy: 9.81%
Epoch [6/100], Loss: 3.1658, Accuracy: 12.75%
Epoch [7/100], Loss: 3.0839, Accuracy: 13.38%
Epoch [8/100], Loss: 2.9564, Accuracy: 16.06%
Epoch [9/100], Loss: 2.8304, Accuracy: 19.62%
Epoch [10/100], Loss: 2.7900, Accuracy: 20.44%
Epoch [11/100], Loss: 2.7223, Accuracy: 22.06%
Epoch [12/100], Loss: 2.6458, Accuracy: 23.06%
Epoch [13/100], Loss: 2.5247, Accuracy: 25.50%
Epoch [14/100], Loss: 2.4736, Accuracy: 27.50%
Epoch [15/100], Loss: 2.4225, Accuracy: 28.88%
Epoch [16/100], Loss: 2.3308, Accuracy: 30.94%
Epoch [17/100], Loss: 2.3196, Accuracy: 30.88%
Epoch [18/100], Loss: 2.1609, Accuracy: 33.25%
Epoch [19/100], Loss: 2.0903, Accuracy: 35.62%
Epoch [20/100], Loss: 2.0499, Accuracy: 38.69%
Epoch [21/100], Loss: 1.9979, Accuracy: 39.88%
Epoch [22/100], Loss: 1.87

In [2]:
from models.mn.model import get_model as get_mn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchvision import transforms
from utils import train_model, evaluate_model
from Datasets import MelSpectrogramDataset
# Models
from models.dymn.model import get_model as get_dymn


In [None]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: (x - np.mean(x)) / (np.std(x) + 1e-6))  # Normalize Mel spectrogram
])

train_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/train"
test_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/test"

train_dataset = MelSpectrogramDataset(
    features_dir=train_dir,
    labels_path=os.path.join(train_dir, "labels.npy"),
    transform=transform
)

test_dataset = MelSpectrogramDataset(
    features_dir=test_dir,
    labels_path=os.path.join(test_dir, "labels.npy"),
    transform=transform
)
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [None]:
pretrained_model = get_mn(pretrained_name="mn10_as")

MN(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
       



In [None]:
sample_input, sample_label = next(iter(train_loader))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

output = pretrained_model.forward(sample_input)

In [8]:
loss = torch.nn.CrossEntropyLoss()
loss.forward(input = output[0], target = sample_label)

tensor(8.3919, grad_fn=<NllLossBackward0>)

In [None]:
def train_model(model, train_loader, test_loader, device, num_epochs=25):
    model = model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        correct_predictions = 0
        total_predictions = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            outputs = outputs[0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)

        epoch_accuracy = (correct_predictions / total_predictions) * 100  # in percentage
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%")

    print("Training complete!")


train_model(model=pretrained_model,
            train_loader=train_loader,
            test_loader=test_loader,
            device=device,
            num_epochs=4)


Epoch [1/4], Loss: 2.9062, Accuracy: 38.31%
Epoch [2/4], Loss: 0.4413, Accuracy: 88.62%
Epoch [3/4], Loss: 0.1560, Accuracy: 95.62%
Epoch [4/4], Loss: 0.0802, Accuracy: 98.00%
Training complete!


In [None]:
test_accuracy = evaluate_model(pretrained_model, test_loader, device)
print(f"Final Test Accuracy: {test_accuracy:.2f}%")


Final Test Accuracy: 83.75%
