In [1]:
import pandas as pd
from pathlib import Path
import random
import numpy as np
import torch
import torchaudio
import matplotlib.pyplot as plt
from IPython.display import Audio
import yaml
from easydict import EasyDict



In [2]:
#CSV and my dataset with .npy files
metfile= pd.read_csv('/Users/nellygarcia/Documents/InformationRetrivalPhd/Dataset/Processed/metadata.csv')
dataset= Path('/Users/nellygarcia/Documents/InformationRetrivalPhd/Dataset')
# Make a list of classes, converting labels into numbers
labels={}
train_files = metfile[metfile['split'] == 'train']['filename'].tolist()
val_files = metfile[metfile['split'] == 'val']['filename'].tolist()
test_files = metfile[metfile['split'] == 'test']['filename'].tolist()

# Assuming labels are already one-hot encoded in the CSV as lists
train_labels = [eval(label) for label in metfile[metfile['split'] == 'train']['label'].tolist()]
val_labels = [eval(label) for label in metfile[metfile['split'] == 'val']['label'].tolist()]
test_labels = [eval(label) for label in metfile[metfile['split'] == 'test']['label'].tolist()]

with open('/Users/nellygarcia/Documents/InformationRetrivalPhd/config.yaml') as conf:
    cfg = EasyDict(yaml.safe_load(conf))

print(f'Number of train/val/test files are = {len(train_files)}/{len(val_files)}/{len(test_files)}')

Number of train/val/test files are = 1109/181/326


Checking the filename+labels

In [3]:
print("Train Files and Labels:")
for filename, label in zip(train_files, train_labels):
    print(f"Filename: {filename}, Label: {label}")

print("\nValidation Files and Labels:")
for filename, label in zip(val_files, val_labels):
    print(f"Filename: {filename}, Label: {label}")

print("\nTest Files and Labels:")
for filename, label in zip(test_files, test_labels):
    print(f"Filename: {filename}, Label: {label}")

Train Files and Labels:
Filename: F_n_FireEmbersNoRev18-6.npy, Label: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Filename: f_n_DebrisGlass26-17.npy, Label: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Filename: F_n_PouringHotSynthNoRev4-7.npy, Label: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Filename: f_n_ApplauseSynth44-40.npy, Label: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]
Filename: f_n_Applause22-16.npy, Label: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.

In [4]:
class MyDataset(torch.utils.data.Dataset):
    def __init__(self, cfg, filenames, labels, base_path, transforms=None):
        assert len(filenames) == len(labels), f'Inconsistent length of filenames and labels.'

        self.filenames = filenames
        self.labels = labels
        self.transforms = transforms
        self.cfg = cfg
        self.base_path = base_path

        # Calculate length of clip this dataset will make
        self.sample_length = int((cfg.clip_length * cfg.sample_rate + cfg.hop_length - 1) // cfg.hop_length)

        # Test with first file
        assert self[0][0].shape[-1] == self.sample_length, f'Check your files, failed to load {filenames[0]}'

        # Show basic info.
        print(f'Dataset will yield log-mel spectrogram {len(self)} data samples in shape [1, {cfg.n_mels}, {self.sample_length}]')

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, index):
        assert 0 <= index < len(self)

        # Load the log-mel spectrogram
        log_mel_spec = np.load(str(self.base_path / self.filenames[index]))

        # Padding if sample is shorter than expected - both head & tail are filled with 0s
        pad_size = self.sample_length - log_mel_spec.shape[-1]
        if pad_size > 0:
            offset = pad_size // 2
            log_mel_spec = np.pad(log_mel_spec, ((0, 0), (0, 0), (offset, pad_size - offset)), 'constant')

        # Random crop
        crop_size = log_mel_spec.shape[-1] - self.sample_length
        if crop_size > 0:
            start = np.random.randint(0, crop_size)
            log_mel_spec = log_mel_spec[..., start:start + self.sample_length]

        # Apply augmentations
        if self.transforms is not None:
            log_mel_spec = self.transforms(log_mel_spec)

        return torch.Tensor(log_mel_spec), torch.Tensor(self.labels[index])


In [5]:
# Define the processed directory
processed_dir = Path('/Users/nellygarcia/Documents/InformationRetrivalPhd/Dataset/Processed')

# Create dataset instances
train_dataset = MyDataset(cfg, train_files, train_labels, processed_dir)
valid_dataset = MyDataset(cfg, val_files, val_labels, processed_dir)
test_dataset = MyDataset(cfg, test_files, test_labels, processed_dir)


Dataset will yield log-mel spectrogram 1109 data samples in shape [1, 64, 431]
Dataset will yield log-mel spectrogram 181 data samples in shape [1, 64, 431]
Dataset will yield log-mel spectrogram 326 data samples in shape [1, 64, 431]


In [6]:
BS = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BS, shuffle=True, pin_memory=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BS, pin_memory=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BS, pin_memory=True)


Model

In [7]:
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
    def __init__(self, n_classes):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.conv3 = nn.Conv2d(64, 128, 3, 1)
        self.conv4 = nn.Conv2d(128, 256, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.pooling = nn.AdaptiveAvgPool2d((8, 8)) # extended
        self.fc1 = nn.Linear(16384, 128) #fine tune the full convolutional layer. 
        self.fc2 = nn.Linear(128, n_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)

        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = self.pooling(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        return x

In [8]:
import pytorch_lightning as pl
from torchmetrics.functional import accuracy


class MyLearner(pl.LightningModule):

    def __init__(self, model, learning_rate=3e-4):

        super().__init__()
        self.learning_rate = learning_rate
        self.model = model

    def forward(self, x):
        x = self.model(x)
        x = F.log_softmax(x, dim=1)
        return x

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        return loss

    def validation_step(self, batch, batch_idx, split='val'):
        x, y = batch
        logits = self(x)
        loss = F.nll_loss(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = accuracy(preds, y)

        self.log(f'{split}_loss', loss, prog_bar=True)
        self.log(f'{split}_acc', acc, prog_bar=True)
        return loss

    def test_step(self, batch, batch_idx):
        return self.validation_step(batch, batch_idx, split='test')

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        return optimizer

    def train_dataloader(self):
        return train_loader

    def val_dataloader(self):
        return valid_loader

    def test_dataloader(self):
        return test_loader

In [9]:
device = torch.device('cpu')

def eval_acc(model, device, dataloader, debug_name=None):
    model = model.to(device).eval()
    count = correct = 0
    for X, gt in dataloader:
        logits = model(X.to(device))
        preds = torch.argmax(logits, dim=1)
        correct += sum(preds.cpu() == gt)
        count += len(gt)
    acc = correct/count
    if debug_name:
        print(f'{debug_name} acc = {acc:.4f}')
    return acc

In [10]:
learner = MyLearner(Net(len(train_dataset)))
checkpoint = pl.callbacks.ModelCheckpoint(monitor='val_acc')
trainer = pl.Trainer('cpu', max_epochs=100, callbacks=[checkpoint])
trainer.fit(learner);

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
/Users/nellygarcia/Documents/InformationRetrivalPhd/python_env/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:75: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default

  | Name  | Type | Params
-------------------------------
0 | model | Net  | 2.6 M 
-------------------------------
2.6 M     Trainable params
0         Non-trainable params
2.6 M     Total params
10.513    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/nellygarcia/Documents/InformationRetrivalPhd/python_env/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

RuntimeError: 0D or 1D target tensor expected, multi-target not supported

In [None]:
learner.load_state_dict(torch.load(checkpoint.best_model_path)['state_dict'])

eval_acc(learner.model, device, learner.val_dataloader(), 'val')
eval_acc(learner.model, device, learner.test_dataloader(), 'test');

FileNotFoundError: [Errno 2] No such file or directory: ''