In [1]:
import os
import random
from collections import defaultdict, Counter

import librosa
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.core.transforms_interface import DualTransform, BasicTransform

In [2]:
if torch.cuda.is_available():
    dev = "cuda:0"
    num_workers = 1
    pin_memory = True
else:
    dev = "cpu"
    num_workers = 0
    pin_memory = False
num_workers = 0
pin_memory = False

In [3]:
root_dir = '../slices'

In [4]:
slice_df = pd.read_csv("../slice_filenames.csv")
slice_df.head()

Unnamed: 0.1,Unnamed: 0,fold,filename,classID
0,0,1,102106-3-0-0.wav_0.npy,3
1,1,1,102106-3-0-0.wav_1.npy,3
2,2,1,102305-6-0-0.wav_0.npy,6
3,3,1,102305-6-0-0.wav_1.npy,6
4,4,1,102305-6-0-0.wav_2.npy,6


In [5]:
class SignalDataset(Dataset):
    def __init__(self, slice_df):
        self.slice_df = slice_df

    def __len__(self):
        return len(self.slice_df)

    def __getitem__(self, idx):
        row = self.slice_df.iloc[idx,:]
        filename = row['filename']
        fold = row['fold']
        x = np.load(os.path.join(root_dir, f"fold{fold}", filename))
        x = torch.tensor(x, device=dev).float().unsqueeze(0)
        y = torch.tensor(row['classID'], device=dev)
        return x, y

In [6]:
class AudioTransform(BasicTransform):
    """Transform for Audio task"""

    @property
    def targets(self):
        return {"data": self.apply}

    def update_params(self, params, **kwargs):
        if hasattr(self, "interpolation"):
            params["interpolation"] = self.interpolation
        if hasattr(self, "fill_value"):
            params["fill_value"] = self.fill_value
        return params

class NoiseInjection(AudioTransform):
    """It simply add some random value into data by using numpy"""
    def __init__(self, always_apply=False, p=0.5):
        super(NoiseInjection, self).__init__(always_apply, p)

    def apply(self, data, noise_levels=(0, 0.5), **params):
        sound, sr = data
        noise_level = np.random.uniform(*noise_levels)
        noise = np.random.randn(len(sound))
        augmented_sound = sound + noise_level * noise
        # Cast back to same data type
        augmented_sound = augmented_sound.astype(type(sound[0]))

        return augmented_sound, sr

class PitchShift(AudioTransform):
    """Shifting time axis"""
    def __init__(self, always_apply=False, p=0.5):
        super(PitchShift, self).__init__(always_apply, p)

    def apply(self, data, **params):
        sound, sr = data

        n_steps = np.random.randint(-10, 10)
        augmented_sound = librosa.effects.pitch_shift(sound, sr, n_steps)

        return augmented_sound, sr

In [7]:
class SignalDataset(Dataset):
    def __init__(self, slice_df, do_augment=True):
        self.slice_df = slice_df
        self.do_augment = do_augment
        self.augment = A.Compose([
            NoiseInjection(p=0.5),
            PitchShift(p=0.5),
        ])

    def __len__(self):
        return len(self.slice_df)

    def __getitem__(self, idx):
        row = self.slice_df.loc[idx,:]
        filename = row['filename']
        fold = row['fold']
        x = np.load(os.path.join(root_dir, f"fold{fold}", filename))
        if self.do_augment:
            x = self.augment(data=(x, 44100))
            x = x['data'][0]
        x = torch.tensor(x, device=dev).float().unsqueeze(0)
        y = torch.tensor(row['classID'], device=dev)
        return x, y

In [8]:
train_df = slice_df[(slice_df['fold'] != 8) & (slice_df['fold'] != 9)].reset_index(drop=True)
test_df = slice_df[(slice_df['fold'] == 8) | (slice_df['fold'] == 9)].reset_index(drop=True)

In [9]:
train_ds = SignalDataset(train_df)
test_ds = SignalDataset(test_df)
train_dl = DataLoader(train_ds, batch_size=512)
test_dl = DataLoader(test_ds, batch_size=512)

In [10]:
class SignalModel(nn.Module):
    def __init__(self, n_channels=32):
        super(SignalModel, self).__init__()
        self.relu = nn.ReLU()
        self.log_softmax = nn.LogSoftmax(dim=2)
        self.conv1 = nn.Conv1d(1, n_channels, kernel_size=240, stride=16)
        self.bn1 = nn.BatchNorm1d(n_channels)
        self.pool1 = nn.MaxPool1d(4)
        self.conv2 = nn.Conv1d(n_channels, n_channels, kernel_size=3)
        self.bn2 = nn.BatchNorm1d(n_channels)
        self.pool2 = nn.MaxPool1d(4)
        self.conv3 = nn.Conv1d(n_channels, 2 * n_channels, kernel_size=3)
        self.bn3 = nn.BatchNorm1d(2 * n_channels)
        self.pool3 = nn.MaxPool1d(4)
        self.conv4 = nn.Conv1d(2 * n_channels, 2 * n_channels, kernel_size=3)
        self.bn4 = nn.BatchNorm1d(2 * n_channels)
        self.pool4 = nn.MaxPool1d(4)
        self.fc1 = nn.Linear(2 * n_channels, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu(self.bn1(x))
        x = self.pool1(x)
        x = self.conv2(x)
        x = self.relu(self.bn2(x))
        x = self.pool2(x)
        x = self.conv3(x)
        x = self.relu(self.bn3(x))
        x = self.pool3(x)
        x = self.conv4(x)
        x = self.relu(self.bn4(x))
        x = self.pool4(x)
        x = F.avg_pool1d(x, x.shape[-1])
        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        return torch.squeeze(x)

In [12]:
def train(dataloader, model, loss_fn, optimizer, do_augment=True, update_params=True, print_loss=False):
    for (image, label) in dataloader:
        # Compute prediction and loss
        model.train()
        pred = model(image)
        loss = loss_fn(pred, label)

        if(update_params):
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if(print_loss):
            print(loss.item())

def avg_acc(model, dataloader):
    total_incorrect = 0
    num_samples = 0.0
    softmax = nn.LogSoftmax(dim=1)
    with torch.no_grad():
        for image, label in dataloader:
            pred = torch.argmax(softmax(model(image)), dim=1)
            total_incorrect += torch.count_nonzero(label - pred).item()
            num_samples += len(label)
    return 1 - (total_incorrect / num_samples)

def train_loop(train_dataloader, val_dataloader, model, loss_fn, optimizer, do_augment=True, n_epochs=10):
    for _ in range(n_epochs):
        train(train_dataloader, model, loss_fn, optimizer)
        print(avg_acc(model, val_dataloader))

In [160]:
model = SignalModel().to(device=dev)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.001)
train_loop(train_dl, test_dl, model, loss_fn, optimizer, n_epochs=1)

KeyboardInterrupt: 

In [13]:
def full_training_schedule(train_dl, test_dl, model, augment=True):
    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=0.001)
    train_loop(train_dl, test_dl, model, loss_fn, optimizer)
    for _ in range(3):
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
        train_loop(train_dl, test_dl, model, loss_fn, optimizer)
    for _ in range(6):
        optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.001)
        train_loop(train_dl, test_dl, model, loss_fn, optimizer)

In [15]:
model = SignalModel().to(device=dev)
full_training_schedule(train_dl, test_dl, model)

0.22746441750131785
0.2836056931997891
0.29098576700052714
0.3210332103321033
0.3297311544544017
0.32577754348972066
0.34396415392725355
0.3410648392198208
0.32630469161834474
0.3326304691618345
0.33526620980495514
0.34290985767000526
0.34264628360569316
0.3494992092778071
0.3460727464417501
0.3481813389562467
0.34159198734844487
0.35292567211386394
0.35371639430680024
0.36136004217185025
0.37216657880864523
0.36926726410121247


MemoryError: Unable to allocate 1.02 MiB for an array with shape (1025, 131) and data type complex64

In [175]:
def get_filename_dict(df):
    df['short_filename'] = slice_df['filename'].map(lambda s: str(s).split("_")[0])
    filename_dict = defaultdict(list)
    for row in df.iterrows():
        filename_dict[row[1]['short_filename']] += [row[1]['filename']]
    return filename_dict

In [174]:
train_filename_dict = get_filename_dict(train_df)
test_filename_dict = get_filename_dict(test_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['short_filename'] = slice_df['filename'].map(lambda s: str(s).split("_")[0])


In [176]:
class SignalVoteDataset(Dataset):
    def __init__(self, slice_df, filename_dict):
        self.slice_df = slice_df
        self.filename_dict = filename_dict
        self.filenames = filename_dict.keys()

    def __len__(self):
        return len(self.filename_dict)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        slice_filenames = self.filename_dict[filename]
        x_list, y_list = [], []
        for filename in slice_filenames:
            row = self.slice_df[self.slice_df['filename'] == filename]
            fold = row['fold']
            x = np.load(os.path.join(root_dir, f"fold{fold}", filename))
            x = torch.tensor(x, device=dev).float().unsqueeze(0)
            y = torch.tensor(row['classID'], device=dev)
            x_list += [x]
            y_list += [y]
        return torch.stack(x), torch.stack(y)

In [230]:
def avg_acc_vote(model, dataloader, df):
    total_correct = 0
    num_samples = 0.0
    softmax = nn.LogSoftmax(dim=1)
    filename_dict = get_filename_dict(df)
    with torch.no_grad():
        last_idx = 0
        df['pred'] = np.zeros(len(df))
        for image, label in dataloader:
            pred = torch.argmax(softmax(model(image)), dim=1)
            batch_size = image.shape[0]
            print(last_idx, batch_size, len(df['pred'][last_idx:last_idx+batch_size]), len(pred.cpu().numpy()))
            df['pred'][last_idx:last_idx+batch_size] = pred.cpu().numpy()
            last_idx += batch_size
    for filename in filename_dict:
        df_rows = df[df['short_filename'] == filename]
        votes = df_rows['pred']
        vote_counts = Counter(votes)
        vote = sorted(vote_counts.items(), key=lambda item: item[1], reverse=True)[0][0]
        total_correct += 1 if (vote == df_rows['classID'].iloc[0]) else 0
        num_samples += 1
    return total_correct / num_samples

In [231]:
avg_acc_vote(model, test_dl, test_df)


0 512 512 512


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['pred'][last_idx:last_idx+batch_size] = pred.cpu().numpy()


512 512 512 512
1024 512 512 512
1536 512 512 512
2048 512 512 512
2560 512 512 512
3072 512 512 512
3584 210 210 210


0.218970736629667