In [77]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split, Dataset
import numpy as np
from occurrence_data import load_filtered_occurrences
from os import path
from data_download import get_preprocessed_song_ids
from torchvision.transforms import Compose, ToTensor

In [3]:
class CNNClassifier(nn.Module):
    def __init__(self, num_classes: int):
        super(CNNClassifier, self).__init__()

        self.conv_block1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.conv_block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.global_pool = nn.AdaptiveAvgPool2d((1, 1))

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        x = self.conv_block1(x)  # [batch, 32, H/2, W/2]
        x = self.conv_block2(x)  # [batch, 64, H/4, W/4]
        x = self.conv_block3(x)  # [batch, 128, H/8, W/8]
        x = self.global_pool(x)  # [batch, 128, 1, 1]
        logits = self.classifier(x)  # [batch, num_classes]
        return logits

In [93]:
class SpectrogramSegmentDataset(Dataset):

    def __init__(self, occurrences, root_dir, transform=None, device='cpu'):
        """
        Arguments:
            csv_file (string): Path to the csv file with annotations.
            root_dir (string): Directory with all the images.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.occurrences = occurrences
        self.root_dir = root_dir
        self.transform = transform
        self.device = device

    def __len__(self):
        return len(self.occurrences)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        id = self.occurrences.iloc[idx, 0]
        spectrogram_path = path.join(self.root_dir, str(id) + '.npy')
        spectrogram = torch.tensor(np.load(spectrogram_path), dtype=torch.float32, device=self.device)

        species = torch.tensor(self.occurrences.iloc[idx, 1], dtype=torch.long, device=self.device)
        sample = {'spectrogram': spectrogram, 'species': species}
        # sample = (s, species)

        if self.transform:
            sample = self.transform(sample)

        return sample

In [None]:
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    mps_device = torch.device("mps")

    # Create a Tensor directly on the mps device
    x = torch.ones(5, device=mps_device)
    # Or
    x = torch.ones(5, device="mps")

    # Any operation happens on the GPU
    y = x * 2

    # Move your model to mps just like any other device
    model = YourFavoriteNet()
    model.to(mps_device)

    # Now every call runs on the GPU
    pred = model(x)

2


In [91]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [92]:
mps_device = torch.device("mps")

In [94]:
class Pad():
    def __init__(self, padded_length):
        assert isinstance(padded_length, int)
        self.padded_length = padded_length

    def __call__(self, sample):
        spectrogram, species = sample['spectrogram'], sample['species']
        if spectrogram.shape[1] < self.padded_length:
            pad_t = self.padded_length - spectrogram.shape[1]
            p = (0, pad_t)
            spectrogram = F.pad(spectrogram, p, "constant", 0)
        spectrogram = spectrogram[None, :, :]
        return {'spectrogram': spectrogram, 'species': species}

In [95]:
occ = load_filtered_occurrences()
fs = get_preprocessed_song_ids()
occ = occ[occ['gbifID'].isin(fs)]
occ['species'] = occ['species'].cat.codes
# X_tensor = torch.tensor(occ['gbifID'].values)
# y_tensor = torch.tensor(occ['species'].cat.codes.values, dtype=torch.long)
# dataset = TensorDataset(X_tensor, y_tensor)
dataset = SpectrogramSegmentDataset(occurrences=occ, root_dir='/Volumes/COCO-DATA/songs_npy/', transform=Pad(280), device=mps_device)

train, val, test = random_split(dataset, [0.7, 0.15, 0.15])

In [13]:
print(dataset.transform)

<class '__main__.Pad'>


In [96]:
dataloader = DataLoader(train, batch_size=4, shuffle=True)
# print(next(iter(dataloader))[1])
print(next(iter(dataloader))['species'])
# for b in next(iter(dataloader)):
#     print(b)

tensor([172, 222, 134, 102], device='mps:0')


In [None]:
m = CNNClassifier(num_classes=231)
m.to(mps_device)
print(m.)

CNNClassifier(
  (conv_block1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv_block3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (global_pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (classifier): Sequential(
    (0): F

In [90]:
num_epochs = 50
batch_size = 128
lr = 0.05

dataloader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=2)
model = CNNClassifier(num_classes=231)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr)

printing_interval = num_epochs // 10
losses_ = []

model.train()
for epoch in range(0, num_epochs):
    running_loss = 0.0

    for b in dataloader:
        X = b['spectrogram']
        y = b['species'].long()
        y_hat = model(X)
        train_loss = criterion(y_hat, y)

        train_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += train_loss.item()

    avg_loss = running_loss / len(dataloader)
    losses_.append(avg_loss)

    # if(epoch % printing_interval == 1):
    #     print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.6f}")
    print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {avg_loss:.6f}")

Traceback (most recent call last):
  File "<string>", line 1, in <module>
  File "/Users/okkokuisma/miniconda3/envs/audio/lib/python3.11/multiprocessing/spawn.py", line 122, in spawn_main
    exitcode = _main(fd, parent_sentinel)
               ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/okkokuisma/miniconda3/envs/audio/lib/python3.11/multiprocessing/spawn.py", line 132, in _main
    self = reduction.pickle.load(from_parent)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: Can't get attribute 'SpectrogramSegmentDataset' on <module '__main__' (built-in)>


KeyboardInterrupt: 