In [35]:
from pathlib import Path

import torch
import torch.nn as nn
import torch.optim as optim
import torchaudio
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import DatasetFolder

from sklearn.model_selection import train_test_split


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
# metadata = torchaudio.info(SAMPLE_WAV_PATH)


In [None]:
class AudioDataset(Dataset):
    def __init__(self, root, transform=None):
        self.transform = transform
        self.files = sorted(str(p) for p in Path(root).glob('*/*.wav'))

    def __getitem__(self, index):
        audiopath = self.files[index]
        waveform, sample_rate = torchaudio.load(audiopath)
        label = Path(audiopath).parent.name

        # if self.unaligned:
        #     item_B = Image.open(self.files_B[random.randint(0, len(self.files_B) - 1)])

        # if self.transform:
        #     item_A = self.transform(item_A)
        #     item_B = self.transform(item_B)

        return (waveform, label)

    def __len__(self):
        return len(self.files)


In [None]:
dataset = AudioDataset(root='/home/jose/Documents/Challenge/data/train')

In [33]:
dataset = DatasetFolder(root='./dataset', loader=torchaudio.load, extensions='wav')
dataset = get_audio_dataset(data_directory, max_length_in_seconds=1, pad_and_truncate=True)

for idx, (data, label) in enumerate(dataset):
    
    print(dataset.classes[label])


['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6', '7']
['1', '2', '3', '4', '5', '6

In [None]:
dataset = DatasetFolder(root='./dataset', loader=torchaudio.load, extensions='wav')
train_dataset, valid_dataset = train_test_split(dataset, test_size=0.2)


In [None]:
# ? dataloader
train_dataloader = DataLoader(
    train_dataset,
    batch_size=40,
    shuffle=False,
    num_workers=2,
)

valid_dataloader = DataLoader(
    valid_dataset,
    batch_size=10,
    shuffle=True,
    num_workers=2,
)


In [39]:
class AudioCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()

        self.main = nn.Sequential(
            nn.Conv1d(1, 64, 80, 4, 2),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(64, 128, 80, 4, 2),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(128, 256, 80, 4, 2),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(256, 512, 80, 4, 2),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv1d(512, 512, 40, 4, 2),
            nn.BatchNorm1d(512),
            nn.LeakyReLU(0.2, inplace=True),
        )
        self.classifier = nn.Sequential(
            nn.Linear(512, num_classes),
            nn.Softmax(),
        )

    def forward(self, tensor):
        batch_size = tensor.size(0)
        hidden = self.main(tensor)

        return self.classifier(hidden.view(batch_size, -1))

audio_cnn = AudioCNN(len(dataset.classes)).to(device)
print(audio_cnn)


AudioCNN(
  (main): Sequential(
    (0): Conv1d(1, 64, kernel_size=(80,), stride=(4,), padding=(2,))
    (1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): LeakyReLU(negative_slope=0.2, inplace=True)
    (3): Conv1d(64, 128, kernel_size=(80,), stride=(4,), padding=(2,))
    (4): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): LeakyReLU(negative_slope=0.2, inplace=True)
    (6): Conv1d(128, 256, kernel_size=(80,), stride=(4,), padding=(2,))
    (7): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): LeakyReLU(negative_slope=0.2, inplace=True)
    (9): Conv1d(256, 512, kernel_size=(80,), stride=(4,), padding=(2,))
    (10): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): LeakyReLU(negative_slope=0.2, inplace=True)
    (12): Conv1d(512, 512, kernel_size=(40,), stride=(4,), padding=(2,))
    (13): BatchNorm1d(512, eps=1e-05, 

In [38]:
cross_entropy = nn.CrossEntropyLoss()
optimizer = optim.Adam(audio_cnn.parameters())


In [41]:
def train(model, train_loader, optimizer, criterion, epoch):
    model.train()
    losses = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)

        loss.backward()
        optimizer.step()

        losses.append(loss.item())

        if batch_idx % 100 == 0:
            print(
                f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
                f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}'
            )

    return losses.mean()


In [None]:
def validate(model, val_loader, criterion):
    model.eval()
    val_loss = 0
    correct, total = 0, len(val_loader.dataset)

    with torch.no_grad():
        for batch_idx, (data, target) in enumerate(val_loader):
            data, target = data.to(device), target.to(device)

            output = model(data)
            loss = criterion(output, target)
            _, predicted = torch.max(output.data, 1)

            val_loss += loss.item()
            correct += (predicted == target).sum().item()

    accuracy = 100. * correct / total
    # ! is mean needed?
    val_loss = val_loss.mean() / len(val_loader)

    return val_loss, accuracy


In [42]:
num_epochs = 10

for epoch in range(num_epochs):
    train_loss = train(audio_cnn, train_dataloader, optimizer, cross_entropy, epoch)
    val_loss, val_accuracy = validate(audio_cnn, test_dataloader, cross_entropy)

    print(f"Evaluation accuracy: {val_accuracy}")


NameError: name 'train_dataloader' is not defined