In [19]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchvision import transforms

from utils import train_model, evaluate_model
from Datasets import MelSpectrogramDataset
# Models
from Models import CRNN

In [20]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: (x - np.mean(x)) / (np.std(x) + 1e-6))  # Normalize Mel spectrogram
])


In [21]:
train_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/train"
test_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/test"


In [22]:
train_dataset = MelSpectrogramDataset(
    features_dir=train_dir,
    labels_path=os.path.join(train_dir, "labels.npy"),
    transform=transform
)

test_dataset = MelSpectrogramDataset(
    features_dir=test_dir,
    labels_path=os.path.join(test_dir, "labels.npy"),
    transform=transform
)

# DataLoaders for train and test datasets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Models


In [23]:
model = CRNN(
    input_channels=1,
    img_height=128,
    img_width=216,
    num_classes=50
)

In [24]:
# Train the model
train_model(model, train_loader, test_loader, device, num_epochs=100)

# Evaluate the model on the test set
test_accuracy = evaluate_model(model, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/100], Loss: 3.8429, Accuracy: 3.75%
Epoch [2/100], Loss: 3.6058, Accuracy: 5.44%
Epoch [3/100], Loss: 3.5301, Accuracy: 5.69%
Epoch [4/100], Loss: 3.4215, Accuracy: 8.00%
Epoch [5/100], Loss: 3.3284, Accuracy: 9.19%


KeyboardInterrupt: 

In [25]:
# The pretrained model is from https://github.com/fschmid56/EfficientAT
from models.mn.model import get_model as get_mn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchvision import transforms
from utils import train_model, evaluate_model
from Datasets import MelSpectrogramDataset
# Models
from models.dymn.model import get_model as get_dymn


In [26]:
pretrained_model = get_mn(pretrained_name="mn10_as")

MN(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
       



In [27]:
sample_input, sample_label = next(iter(train_loader))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

output = pretrained_model.forward(sample_input)

In [28]:
loss = torch.nn.CrossEntropyLoss()
loss.forward(input = output[0], target = sample_label)

tensor(7.6640, grad_fn=<NllLossBackward0>)

In [29]:
def train_model(model, train_loader, test_loader, device, num_epochs=25):
    model = model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        correct_predictions = 0
        total_predictions = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            outputs = outputs[0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)

        epoch_accuracy = (correct_predictions / total_predictions) * 100
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%")

    print("Training complete!")

train_model(model=pretrained_model,
            train_loader=train_loader,
            test_loader=test_loader,
            device=device,
            num_epochs=4)


Epoch [1/4], Loss: 3.1195, Accuracy: 32.94%
Epoch [2/4], Loss: 0.4519, Accuracy: 88.69%
Epoch [3/4], Loss: 0.1418, Accuracy: 96.19%
Epoch [4/4], Loss: 0.0784, Accuracy: 97.75%
Training complete!


In [30]:
test_accuracy = evaluate_model(pretrained_model, test_loader, device)
print(f"Final Test Accuracy: {test_accuracy:.2f}%")


Final Test Accuracy: 83.75%


In [37]:
import os
from Datasets import ESC50Dataset
import torch 
import numpy as np
from mel_spectrogram_extraction import save_mel_spectrogram_dataset

In [41]:


if __name__ == "__main__":
    processed_data_dir = "custom_processed_dataset"
    os.makedirs(processed_data_dir, exist_ok=True)

    dataset_config = {
        'meta_csv': os.path.join("dataset", "esc50.csv"),
        'audio_path': os.path.join("dataset", "audio"),
        'num_of_classes': 50,
    }

    train_dataset = ESC50Dataset(
        metadata=dataset_config['meta_csv'],
        data_dir=dataset_config['audio_path'],
        fold=1,
        train=True
    )

    test_dataset = ESC50Dataset(
        metadata=dataset_config['meta_csv'],
        data_dir=dataset_config['audio_path'],
        fold=1,
        train=False
    )

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

    mel_config = {
        'audio_length': 160000,
        'n_fft': 1024,
        'hop_length': 320,
        'win_length': 320,
        'n_mels': 128,
        'fmax': None,
    }

    save_mel_spectrogram_dataset(train_loader, "train", processed_data_dir, mel_config)
    save_mel_spectrogram_dataset(test_loader, "test", processed_data_dir, mel_config)
    

100%|██████████| 1600/1600 [00:23<00:00, 68.30it/s]


train dataset saved in custom_processed_dataset\train


100%|██████████| 400/400 [00:05<00:00, 76.61it/s]

test dataset saved in custom_processed_dataset\test





In [47]:
custom_train_dir = "C:/Users/jimmy/Desktop/Practical_Work/custom_processed_dataset/train"
custom_test_dir = "C:/Users/jimmy/Desktop/Practical_Work/custom_processed_dataset/test"


In [48]:
custom_train_dataset = MelSpectrogramDataset(
    features_dir=custom_train_dir,
    labels_path=os.path.join(custom_train_dir, "labels.npy"),
    transform=transform
)

custom_test_dataset = MelSpectrogramDataset(
    features_dir=custom_test_dir,
    labels_path=os.path.join(custom_test_dir, "labels.npy"),
    transform=transform
)

# DataLoaders for train and test datasets
batch_size = 32
custom_train_loader = DataLoader(custom_train_dataset, batch_size=batch_size, shuffle=True)
custom_test_loader = DataLoader(custom_test_dataset, batch_size=batch_size, shuffle=False)

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [50]:
# Ensure the model is on the right device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrained_model = pretrained_model.to(device)

# Get a sample from the DataLoader
custom_sample_input, custom_sample_label = next(iter(custom_train_loader))

# Move the sample input and label to the device
custom_sample_input = custom_sample_input.to(device)
custom_sample_label = custom_sample_label.to(device)

# Perform the forward pass
output = pretrained_model(custom_sample_input)


In [52]:
loss = torch.nn.CrossEntropyLoss()
loss.forward(input = output[0], target = custom_sample_label)

tensor(1.9948, device='cuda:0', grad_fn=<NllLossBackward0>)

In [55]:
def train_model(model, train_loader, test_loader, device, num_epochs=25):
    model = model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        correct_predictions = 0
        total_predictions = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            outputs = outputs[0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)

        epoch_accuracy = (correct_predictions / total_predictions) * 100
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%")

    print("Training complete!")

train_model(model=pretrained_model,
            train_loader=custom_train_loader,
            test_loader=custom_test_loader,
            device=device,
            num_epochs=20)

Epoch [1/20], Loss: 0.0713, Accuracy: 98.00%
Epoch [2/20], Loss: 0.0918, Accuracy: 97.75%
Epoch [3/20], Loss: 0.0961, Accuracy: 97.50%
Epoch [4/20], Loss: 0.0358, Accuracy: 99.06%
Epoch [5/20], Loss: 0.0500, Accuracy: 98.69%
Epoch [6/20], Loss: 0.0361, Accuracy: 99.06%
Epoch [7/20], Loss: 0.0476, Accuracy: 98.94%
Epoch [8/20], Loss: 0.0773, Accuracy: 98.00%
Epoch [9/20], Loss: 0.0428, Accuracy: 98.62%
Epoch [10/20], Loss: 0.0517, Accuracy: 98.75%
Epoch [11/20], Loss: 0.0777, Accuracy: 98.19%
Epoch [12/20], Loss: 0.0316, Accuracy: 99.31%
Epoch [13/20], Loss: 0.0325, Accuracy: 98.88%
Epoch [14/20], Loss: 0.0398, Accuracy: 99.12%
Epoch [15/20], Loss: 0.0155, Accuracy: 99.69%
Epoch [16/20], Loss: 0.0223, Accuracy: 99.38%
Epoch [17/20], Loss: 0.0392, Accuracy: 99.12%
Epoch [18/20], Loss: 0.0270, Accuracy: 99.38%
Epoch [19/20], Loss: 0.0117, Accuracy: 99.62%
Epoch [20/20], Loss: 0.0018, Accuracy: 100.00%
Training complete!


In [56]:
test_accuracy = evaluate_model(pretrained_model, custom_test_loader, device)
print(f"Final Test Accuracy: {test_accuracy:.2f}%")

Final Test Accuracy: 89.25%
