In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchvision import transforms

from utils import train_model, evaluate_model
from Datasets import MelSpectrogramDataset
# Models
from Models import CRNN

In [2]:
transform = transforms.Compose([
    transforms.Lambda(lambda x: (x - np.mean(x)) / (np.std(x) + 1e-6))  # Normalize Mel spectrogram
])


In [3]:
train_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/train"
test_dir = "C:/Users/jimmy/Desktop/Practical_Work/processed_data/mel_spectrogram/test"


In [4]:
train_dataset = MelSpectrogramDataset(
    features_dir=train_dir,
    labels_path=os.path.join(train_dir, "labels.npy"),
    transform=transform
)

test_dataset = MelSpectrogramDataset(
    features_dir=test_dir,
    labels_path=os.path.join(test_dir, "labels.npy"),
    transform=transform
)

# DataLoaders for train and test datasets
batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


## Models


In [5]:
model = CRNN(
    input_channels=1,
    img_height=128,
    img_width=216,
    num_classes=50
)

In [6]:
# Train the model
train_model(model, train_loader, test_loader, device, num_epochs=100)

# Evaluate the model on the test set
test_accuracy = evaluate_model(model, test_loader, device)
print(f"Test Accuracy: {test_accuracy:.2f}%")


Epoch [1/100], Loss: 3.7968, Accuracy: 4.19%
Epoch [2/100], Loss: 3.5035, Accuracy: 8.25%
Epoch [3/100], Loss: 3.3261, Accuracy: 9.12%
Epoch [4/100], Loss: 3.2855, Accuracy: 8.44%
Epoch [5/100], Loss: 3.1644, Accuracy: 12.31%
Epoch [6/100], Loss: 3.0673, Accuracy: 14.06%
Epoch [7/100], Loss: 2.9027, Accuracy: 18.06%
Epoch [8/100], Loss: 2.8251, Accuracy: 20.75%
Epoch [9/100], Loss: 2.6938, Accuracy: 20.94%
Epoch [10/100], Loss: 2.5241, Accuracy: 25.25%
Epoch [11/100], Loss: 2.4288, Accuracy: 28.12%
Epoch [12/100], Loss: 2.3685, Accuracy: 30.44%
Epoch [13/100], Loss: 2.2788, Accuracy: 32.69%
Epoch [14/100], Loss: 2.1525, Accuracy: 35.69%
Epoch [15/100], Loss: 2.1471, Accuracy: 35.88%
Epoch [16/100], Loss: 1.9929, Accuracy: 39.25%
Epoch [17/100], Loss: 1.9555, Accuracy: 39.31%
Epoch [18/100], Loss: 1.8934, Accuracy: 40.19%
Epoch [19/100], Loss: 1.8291, Accuracy: 44.19%
Epoch [20/100], Loss: 1.8085, Accuracy: 44.19%
Epoch [21/100], Loss: 1.6790, Accuracy: 47.31%
Epoch [22/100], Loss: 1.56

In [7]:
# The pretrained model is from https://github.com/fschmid56/EfficientAT
from models.mn.model import get_model as get_mn
import torch
from torch.utils.data import Dataset, DataLoader
import numpy as np
import os
from torchvision import transforms
from utils import train_model, evaluate_model
from Datasets import MelSpectrogramDataset
# Models
from models.dymn.model import get_model as get_dymn


In [8]:
pretrained_model = get_mn(pretrained_name="mn10_as")

MN(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
       



In [9]:
sample_input, sample_label = next(iter(train_loader))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

output = pretrained_model.forward(sample_input)

In [10]:
loss = torch.nn.CrossEntropyLoss()
loss.forward(input = output[0], target = sample_label)

tensor(8.2229, grad_fn=<NllLossBackward0>)

In [11]:
def train_model(model, train_loader, test_loader, device, num_epochs=25):
    model = model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        correct_predictions = 0
        total_predictions = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            outputs = outputs[0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)

        epoch_accuracy = (correct_predictions / total_predictions) * 100
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%")

    print("Training complete!")

train_model(model=pretrained_model,
            train_loader=train_loader,
            test_loader=test_loader,
            device=device,
            num_epochs=4)


Epoch [1/4], Loss: 3.0553, Accuracy: 35.88%
Epoch [2/4], Loss: 0.5438, Accuracy: 86.12%
Epoch [3/4], Loss: 0.1536, Accuracy: 95.62%
Epoch [4/4], Loss: 0.0736, Accuracy: 98.44%
Training complete!


In [12]:
test_accuracy = evaluate_model(pretrained_model, test_loader, device)
print(f"Final Test Accuracy: {test_accuracy:.2f}%")


Final Test Accuracy: 86.25%


In [13]:
import os
from Datasets import ESC50Dataset
import torch 
import numpy as np
from mel_spectrogram_extraction import save_mel_spectrogram_dataset

In [14]:


if __name__ == "__main__":
    processed_data_dir = "custom_processed_dataset"
    os.makedirs(processed_data_dir, exist_ok=True)

    dataset_config = {
        'meta_csv': os.path.join("dataset", "esc50.csv"),
        'audio_path': os.path.join("dataset", "audio"),
        'num_of_classes': 50,
    }

    train_dataset = ESC50Dataset(
        metadata=dataset_config['meta_csv'],
        data_dir=dataset_config['audio_path'],
        fold=1,
        train=True
    )

    test_dataset = ESC50Dataset(
        metadata=dataset_config['meta_csv'],
        data_dir=dataset_config['audio_path'],
        fold=1,
        train=False
    )

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False)

    mel_config = {
        'audio_length': 160000,
        'n_fft': 1024,
        'hop_length': 320,
        'win_length': 320,
        'n_mels': 128,
        'fmax': None,
    }

    save_mel_spectrogram_dataset(train_loader, "train", processed_data_dir, mel_config)
    save_mel_spectrogram_dataset(test_loader, "test", processed_data_dir, mel_config)
    

100%|██████████| 1600/1600 [00:21<00:00, 72.82it/s]


train dataset saved in custom_processed_dataset\train


100%|██████████| 400/400 [00:05<00:00, 75.83it/s]

test dataset saved in custom_processed_dataset\test





In [15]:
custom_train_dir = "C:/Users/jimmy/Desktop/Practical_Work/custom_processed_dataset/train"
custom_test_dir = "C:/Users/jimmy/Desktop/Practical_Work/custom_processed_dataset/test"


In [16]:
custom_train_dataset = MelSpectrogramDataset(
    features_dir=custom_train_dir,
    labels_path=os.path.join(custom_train_dir, "labels.npy"),
    transform=transform
)

custom_test_dataset = MelSpectrogramDataset(
    features_dir=custom_test_dir,
    labels_path=os.path.join(custom_test_dir, "labels.npy"),
    transform=transform
)

# DataLoaders for train and test datasets
batch_size = 32
custom_train_loader = DataLoader(custom_train_dataset, batch_size=batch_size, shuffle=True)
custom_test_loader = DataLoader(custom_test_dataset, batch_size=batch_size, shuffle=False)

# Device configuration (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [17]:
# Ensure the model is on the right device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pretrained_model = pretrained_model.to(device)

# Get a sample from the DataLoader
custom_sample_input, custom_sample_label = next(iter(custom_train_loader))

# Move the sample input and label to the device
custom_sample_input = custom_sample_input.to(device)
custom_sample_label = custom_sample_label.to(device)

# Perform the forward pass
output = pretrained_model(custom_sample_input)


In [18]:
loss = torch.nn.CrossEntropyLoss()
loss.forward(input = output[0], target = custom_sample_label)

tensor(1.9449, device='cuda:0', grad_fn=<NllLossBackward0>)

In [21]:
def train_model(model, train_loader, test_loader, device, num_epochs=25):
    model = model.to(device)
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(num_epochs):
        model.train()
        epoch_loss = 0
        correct_predictions = 0
        total_predictions = 0
        
        for features, labels in train_loader:
            features, labels = features.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(features)
            outputs = outputs[0]
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

            # Calculate accuracy
            _, predicted = torch.max(outputs, 1)
            correct_predictions += (predicted == labels).sum().item()
            total_predictions += labels.size(0)

        epoch_accuracy = (correct_predictions / total_predictions) * 100
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss / len(train_loader):.4f}, Accuracy: {epoch_accuracy:.2f}%")

    print("Training complete!")

train_model(model=pretrained_model,
            train_loader=custom_train_loader,
            test_loader=custom_test_loader,
            device=device,
            num_epochs=20)

Epoch [1/20], Loss: 0.0177, Accuracy: 99.56%
Epoch [2/20], Loss: 0.0385, Accuracy: 98.81%
Epoch [3/20], Loss: 0.0923, Accuracy: 97.81%
Epoch [4/20], Loss: 0.0424, Accuracy: 98.88%
Epoch [5/20], Loss: 0.0249, Accuracy: 99.31%
Epoch [6/20], Loss: 0.0138, Accuracy: 99.69%
Epoch [7/20], Loss: 0.0523, Accuracy: 99.19%
Epoch [8/20], Loss: 0.0715, Accuracy: 98.00%
Epoch [9/20], Loss: 0.0679, Accuracy: 98.56%
Epoch [10/20], Loss: 0.0620, Accuracy: 98.56%
Epoch [11/20], Loss: 0.0423, Accuracy: 98.88%
Epoch [12/20], Loss: 0.0249, Accuracy: 99.25%
Epoch [13/20], Loss: 0.0151, Accuracy: 99.62%
Epoch [14/20], Loss: 0.0266, Accuracy: 99.31%
Epoch [15/20], Loss: 0.0160, Accuracy: 99.56%
Epoch [16/20], Loss: 0.0041, Accuracy: 99.88%
Epoch [17/20], Loss: 0.0034, Accuracy: 99.88%
Epoch [18/20], Loss: 0.0032, Accuracy: 99.88%
Epoch [19/20], Loss: 0.0008, Accuracy: 100.00%
Epoch [20/20], Loss: 0.0010, Accuracy: 100.00%
Training complete!


In [22]:
test_accuracy = evaluate_model(pretrained_model, custom_test_loader, device)
print(f"Final Test Accuracy: {test_accuracy:.2f}%")

Final Test Accuracy: 91.25%
