# Load libraries

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import random
import pickle
import numpy as np
import pandas as pd

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

import torchvision.models
from torchvision import transforms

In [4]:
# https://pytorch.org/docs/stable/notes/randomness.html

seed = 20200701
random.seed(seed)
torch.manual_seed(seed)
np.random.seed(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

<torch._C.Generator at 0x7f23cf5c1ad0>

In [5]:
from torch.utils.tensorboard import SummaryWriter

In [6]:
this_hparams = {
    'slno': 8,
    'weights': 'imagenet',
    'dataaug': 'mixup',
    'comments': 'metadata-all'
}

In [7]:
import shutil
this_filename = f"{this_hparams['slno']}-{this_hparams['weights']}-{this_hparams['dataaug']}-{this_hparams['comments']}"
this_log_dir = f"./tfboard_out/{this_filename}"
shutil.rmtree(this_log_dir, ignore_errors=True)
tbwriter = SummaryWriter(this_log_dir)

In [8]:
this_filename

'8-imagenet-mixup-metadata-all'

# Prepare the data

In [9]:
def prepare_data(df, unknown_to_known):
    df = df.reset_index()
    df['slno'] = df.assign(slno=1).groupby('audio_filename')['slno'].cumsum()
    df.set_index(['audio_filename', 'slno'], inplace=True)

    df_unknown = df.copy().loc[:, list(unknown_to_known.keys())]
    df.drop(columns=list(unknown_to_known.keys()), inplace=True)

    y_mask = df.copy()
    y_mask.loc[:, :] = 1
    for unknown, known in unknown_to_known.items():
        y_mask.loc[
            df_unknown[unknown] > 0.5,
            known
        ] = 0

    df = df.swaplevel(i=1, j=0, axis=0).sort_index()

    y_mask = y_mask.swaplevel(i=1, j=0, axis=0).sort_index()

    y = np.concatenate([
        df.loc[[1], :].values[..., np.newaxis],
        df.loc[[2], :].values[..., np.newaxis],
        df.loc[[3], :].values[..., np.newaxis]
    ], axis=2)

    y_mask = np.concatenate([
        y_mask.loc[[1], :].values[..., np.newaxis],
        y_mask.loc[[2], :].values[..., np.newaxis],
        y_mask.loc[[3], :].values[..., np.newaxis]
    ], axis=2)

    X = np.concatenate([
        np.expand_dims(np.load('./data/logmelspec/{}.npy'.format(x)).T[:635, :], axis=0)
        for x in df.loc[[1], :].reset_index(1).audio_filename.tolist()])
    X = np.expand_dims(X, axis=1)

    return X, y, y_mask


In [10]:
with open('./data/metadata.pkl', 'rb') as f:
    metadata = pickle.load(f)

In [11]:
unknown_to_known = (
    pd.merge(metadata['taxonomy_df'].loc[lambda x: x.fine_id == 'X', ['fine', 'coarse']],
             metadata['taxonomy_df'].loc[lambda x: x.fine_id != 'X', ['fine', 'coarse']],
             on='coarse', how='inner')
    .drop(columns='coarse')
    .groupby('fine_x')['fine_y']
    .apply(lambda x: list(x)).to_dict())
known_labels = metadata['taxonomy_df'].loc[lambda x: x.fine_id != 'X'].fine.tolist()

In [12]:
train_df = pd.concat([metadata['coarse_train'], metadata['fine_train']], axis=1, sort=True)
valid_df = pd.concat([metadata['coarse_valid'], metadata['fine_valid']], axis=1, sort=True)

In [13]:
train_X, train_y, train_y_mask = prepare_data(train_df, unknown_to_known)
valid_X, valid_y, valid_y_mask = prepare_data(valid_df, unknown_to_known)

---

In [14]:
cols_to_select = ['borough', 'latitude', 'longitude', 'week', 'day', 'hour', 'date_slno']
train_metadata = metadata['train_metadata'].loc[:, cols_to_select]
valid_metadata = metadata['valid_metadata'].loc[:, cols_to_select]

In [15]:
for x in ['borough', 'day', 'hour']:
    train_metadata[x] = train_metadata[x].astype('str')
    valid_metadata[x] = valid_metadata[x].astype('str')

In [16]:
train_metadata = pd.get_dummies(train_metadata)

In [17]:
valid_metadata = pd.get_dummies(valid_metadata)

In [18]:
valid_metadata['borough_4'] = 0

In [19]:
valid_metadata = valid_metadata.loc[:, train_metadata.columns.tolist()]

In [20]:
lat_min = train_metadata.latitude.min()
lat_max = train_metadata.latitude.max()
train_metadata.latitude = (train_metadata.latitude - lat_min) / (lat_max - lat_min)
valid_metadata.latitude = (valid_metadata.latitude - lat_min) / (lat_max - lat_min)

In [21]:
lon_min = train_metadata.longitude.min()
lon_max = train_metadata.longitude.max()
train_metadata.longitude = (train_metadata.longitude - lon_min) / (lon_max - lon_min)
valid_metadata.longitude = (valid_metadata.longitude - lon_min) / (lon_max - lon_min)

In [22]:
wk_min = train_metadata.week.min()
wk_max = train_metadata.week.max()
train_metadata.week = (train_metadata.week - wk_min) / (wk_max - wk_min)
valid_metadata.week = (valid_metadata.week - wk_min) / (wk_max - wk_min)

In [23]:
dt_min = train_metadata.date_slno.min()
dt_max = train_metadata.date_slno.max()
train_metadata.date_slno = (train_metadata.date_slno - dt_min) / (dt_max - dt_min)
valid_metadata.date_slno = (valid_metadata.date_slno - dt_min) / (dt_max - dt_min)

In [24]:
valid_metadata.head(2)

Unnamed: 0_level_0,latitude,longitude,week,date_slno,borough_1,borough_3,borough_4,day_0,day_1,day_2,...,hour_21,hour_22,hour_23,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9
audio_filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00_000066.wav,0.41245,0.072543,0.529412,0.392655,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
00_000071.wav,0.41245,0.072543,0.960784,0.19774,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [25]:
train_metadata.head(2)

Unnamed: 0_level_0,latitude,longitude,week,date_slno,borough_1,borough_3,borough_4,day_0,day_1,day_2,...,hour_21,hour_22,hour_23,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9
audio_filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01_000006.wav,0.42201,0.041071,0.313725,0.318267,1,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
01_000038.wav,0.42201,0.041071,0.392157,0.346516,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [26]:
train_metadata = train_metadata.values
valid_metadata = valid_metadata.values

---

In [27]:
train_X.shape
train_y.shape
train_y_mask.shape
train_metadata.shape
valid_X.shape
valid_y.shape
valid_y_mask.shape
valid_metadata.shape

(13538, 1, 635, 128)

(13538, 31, 3)

(13538, 31, 3)

(13538, 38)

(4308, 1, 635, 128)

(4308, 31, 3)

(4308, 31, 3)

(4308, 38)

In [28]:
channel_means = train_X.reshape(-1, 128).mean(axis=0).reshape(1, 1, 1, -1)
channel_stds = train_X.reshape(-1, 128).std(axis=0).reshape(1, 1, 1, -1)
train_X = (train_X - channel_means) / channel_stds
valid_X = (valid_X - channel_means) / channel_stds
#np.save('data/channel_means.npy', channel_means)
#np.save('data/channel_stds.npy', channel_stds)

In [29]:
import gc
gc.collect()

22

# Prepare the PyTorch datasets

In [30]:
class AudioDataset(Dataset):

    def __init__(self, X, y, weights, metadata, transform=None):
        self.X = X
        self.y = y
        self.weights = weights
        self.metadata = metadata

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        sample = self.X[idx, ...]
        return sample, self.y[idx, ...], self.weights[idx, ...], self.metadata[idx, ...]

In [31]:
train_dataset = AudioDataset(torch.Tensor(train_X),
                             torch.Tensor(train_y),
                             torch.Tensor(train_y_mask),
                             torch.Tensor(train_metadata),
                             None)
valid_dataset = AudioDataset(torch.Tensor(valid_X),
                             torch.Tensor(valid_y),
                             torch.Tensor(valid_y_mask),
                             torch.Tensor(valid_metadata),
                             None)

In [32]:
val_loader = DataLoader(valid_dataset, 128, shuffle=False,
                        num_workers=2, drop_last=False, pin_memory=True)
train_loader_1 = DataLoader(train_dataset, 128, shuffle=True,
                            num_workers=2, drop_last=True, pin_memory=True)
train_loader_2 = DataLoader(train_dataset, 128, shuffle=True,
                            num_workers=2, drop_last=True, pin_memory=True)

# Define the model

In [33]:
cuda = True
device = torch.device('cuda:0' if cuda else 'cpu')
print('Device: ', device)

Device:  cuda:0


In [34]:
class Model(nn.Module):

    def __init__(self, num_classes):

        super().__init__()

        self.bw2col = nn.Sequential(
            nn.BatchNorm2d(1),
            nn.Conv2d(1, 10, 1, padding=0), nn.ReLU(),
            nn.Conv2d(10, 3, 1, padding=0), nn.ReLU())

        self.mv2 = torchvision.models.mobilenet_v2(pretrained=True)
        self.mv2.classifier = None
        
        self.after_mv2 = nn.Sequential(
            nn.BatchNorm1d(1280),
            nn.Dropout(0.7),
            nn.Linear(1280, 48),
            nn.ReLU(),
            nn.BatchNorm1d(48),
        )
        
        self.meta_seq = nn.Sequential(
            nn.BatchNorm1d(38),
            nn.Linear(38, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Linear(128, 16),
            nn.BatchNorm1d(16),
        )
        
        self.final_dense = nn.Linear(64, num_classes)

    def forward(self, x, with_metadata=False, metadata=None):
        
        x = self.bw2col(x)
        x = self.mv2.features(x)
        x = x.max(dim=-1)[0].max(dim=-1)[0]
        x = self.after_mv2(x)
        
        if not with_metadata:
            raise('this shouldnt happen')
        
        meta_out = self.meta_seq(metadata)
        
        x = torch.cat((x, meta_out), axis=-1)
        x = self.final_dense(x)
        return x

In [35]:
model = Model(31)

In [36]:
model

Model(
  (bw2col): Sequential(
    (0): BatchNorm2d(1, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Conv2d(1, 10, kernel_size=(1, 1), stride=(1, 1))
    (2): ReLU()
    (3): Conv2d(10, 3, kernel_size=(1, 1), stride=(1, 1))
    (4): ReLU()
  )
  (mv2): MobileNetV2(
    (features): Sequential(
      (0): ConvBNReLU(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): ConvBNReLU(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): Batc

In [37]:
model = model.to(device)

# Bias training

In [38]:
optimizer = optim.Adam([model.final_dense.bias], lr=1, amsgrad=True)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=True)
criterion = nn.BCEWithLogitsLoss(reduction='none')

In [39]:
num_bias_training = 2
train_loss_hist = []
valid_loss_hist = []
lowest_val_loss = np.inf
epochs_without_new_lowest = 0
train_batch_slno=0

for i in range(num_bias_training):
    print('Epoch: ', i)

    this_epoch_train_loss = 0
    for i1, i2 in zip(train_loader_1, train_loader_2):

        # mixup the inputs ---------
        alpha = 1
        mixup_vals = np.random.beta(alpha, alpha, i1[0].shape[0])

        lam = torch.Tensor(mixup_vals.reshape(mixup_vals.shape[0], 1, 1, 1))
        inputs = (lam * i1[0]) + ((1 - lam) * i2[0])

        lam = torch.Tensor(mixup_vals.reshape(mixup_vals.shape[0], 1, 1))
        labels = (lam * i1[1]) + ((1 - lam) * i2[1])
        masks = (lam * i1[2]) + ((1 - lam) * i2[2])
        
        lam = torch.Tensor(mixup_vals.reshape(mixup_vals.shape[0], 1))
        meta = (lam * i1[3]) + ((1 - lam) * i2[3])
        # mixup ends ----------

        inputs = inputs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        masks = masks.to(device, non_blocking=True)
        meta = meta.to(device, non_blocking=True)

        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            model = model.train()
            outputs = model(inputs, with_metadata=True, metadata=meta)
            # calculate loss for each set of annotations
            loss_0 = criterion(outputs, labels[:, :, 0]) * masks[:, :, 0]
            loss_1 = criterion(outputs, labels[:, :, 1]) * masks[:, :, 1]
            loss_2 = criterion(outputs, labels[:, :, 2]) * masks[:, :, 2]
            loss = (loss_0.sum() + loss_1.sum() + loss_2.sum()) / masks.sum()
            loss.backward()
            optimizer.step()
            this_epoch_train_loss += loss.detach().cpu().numpy()
            tbwriter.add_scalar('Batch/Loss/Train',
                                loss.detach().cpu().numpy(),
                                train_batch_slno)
            train_batch_slno += 1

    this_epoch_valid_loss = 0
    for inputs, labels, masks, meta in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        masks = masks.to(device)
        meta = meta.to(device)
        optimizer.zero_grad()
        with torch.set_grad_enabled(False):
            model = model.eval()
            outputs = model(inputs, with_metadata=True, metadata=meta)
            loss_0 = criterion(outputs, labels[:, :, 0]) * masks[:, :, 0]
            loss_1 = criterion(outputs, labels[:, :, 1]) * masks[:, :, 1]
            loss_2 = criterion(outputs, labels[:, :, 2]) * masks[:, :, 2]
            loss = (loss_0.sum() + loss_1.sum() + loss_2.sum()) / masks.sum()
            this_epoch_valid_loss += loss.detach().cpu().numpy()

    this_epoch_train_loss /= len(train_loader_1)
    this_epoch_valid_loss /= len(val_loader)

    train_loss_hist.append(this_epoch_train_loss)
    valid_loss_hist.append(this_epoch_valid_loss)

    if this_epoch_valid_loss < lowest_val_loss:
        lowest_val_loss = this_epoch_valid_loss
        torch.save(model.state_dict(), f"./models/{this_filename}")
        epochs_without_new_lowest = 0
    else:
        epochs_without_new_lowest += 1

    if epochs_without_new_lowest >= 25:
        break

    print(this_epoch_train_loss, this_epoch_valid_loss)
    tbwriter.add_scalar('Epoch/Loss/Train', this_epoch_train_loss, i)
    tbwriter.add_scalar('Epoch/Loss/Valid', this_epoch_valid_loss, i)
    tbwriter.add_scalar('Epoch/lr', optimizer.param_groups[0]['lr'], i)
    tbwriter.flush()
    
    scheduler.step(this_epoch_valid_loss)

Epoch:  0
0.2169676885718391 0.19866639375686646
Epoch:  1
0.20116266239257086 0.20099910830750184


# Training

In [40]:
optimizer = optim.Adam(model.parameters(), lr=0.0005, amsgrad=True)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, verbose=True)
criterion = nn.BCEWithLogitsLoss(reduction='none')

In [41]:
epochs = 100
for i in range(num_bias_training, epochs):
    print('Epoch: ', i)

    this_epoch_train_loss = 0
    for i1, i2 in zip(train_loader_1, train_loader_2):

        # mixup the inputs ---------
        alpha = 1
        mixup_vals = np.random.beta(alpha, alpha, i1[0].shape[0])

        lam = torch.Tensor(mixup_vals.reshape(mixup_vals.shape[0], 1, 1, 1))
        inputs = (lam * i1[0]) + ((1 - lam) * i2[0])

        lam = torch.Tensor(mixup_vals.reshape(mixup_vals.shape[0], 1, 1))
        labels = (lam * i1[1]) + ((1 - lam) * i2[1])
        masks = (lam * i1[2]) + ((1 - lam) * i2[2])
        
        lam = torch.Tensor(mixup_vals.reshape(mixup_vals.shape[0], 1))
        meta = (lam * i1[3]) + ((1 - lam) * i2[3])
        # mixup ends ----------

        inputs = inputs.to(device, non_blocking=True)
        labels = labels.to(device, non_blocking=True)
        masks = masks.to(device, non_blocking=True)
        meta = meta.to(device, non_blocking=True)

        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            model = model.train()
            outputs = model(inputs, with_metadata=True, metadata=meta)
            # calculate loss for each set of annotations
            loss_0 = criterion(outputs, labels[:, :, 0]) * masks[:, :, 0]
            loss_1 = criterion(outputs, labels[:, :, 1]) * masks[:, :, 1]
            loss_2 = criterion(outputs, labels[:, :, 2]) * masks[:, :, 2]
            loss = (loss_0.sum() + loss_1.sum() + loss_2.sum()) / masks.sum()
            loss.backward()
            optimizer.step()
            this_epoch_train_loss += loss.detach().cpu().numpy()
            tbwriter.add_scalar('Batch/Loss/Train',
                                loss.detach().cpu().numpy(),
                                train_batch_slno)
            train_batch_slno += 1

    this_epoch_valid_loss = 0
    for inputs, labels, masks, meta in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        masks = masks.to(device)
        meta = meta.to(device)
        optimizer.zero_grad()
        with torch.set_grad_enabled(False):
            model = model.eval()
            outputs = model(inputs, with_metadata=True, metadata=meta)
            loss_0 = criterion(outputs, labels[:, :, 0]) * masks[:, :, 0]
            loss_1 = criterion(outputs, labels[:, :, 1]) * masks[:, :, 1]
            loss_2 = criterion(outputs, labels[:, :, 2]) * masks[:, :, 2]
            loss = (loss_0.sum() + loss_1.sum() + loss_2.sum()) / masks.sum()
            this_epoch_valid_loss += loss.detach().cpu().numpy()

    this_epoch_train_loss /= len(train_loader_1)
    this_epoch_valid_loss /= len(val_loader)

    train_loss_hist.append(this_epoch_train_loss)
    valid_loss_hist.append(this_epoch_valid_loss)

    if this_epoch_valid_loss < lowest_val_loss:
        lowest_val_loss = this_epoch_valid_loss
        torch.save(model.state_dict(), f"./models/{this_filename}")
        epochs_without_new_lowest = 0
    else:
        epochs_without_new_lowest += 1

    if epochs_without_new_lowest >= 25:
        break

    print(this_epoch_train_loss, this_epoch_valid_loss)
    tbwriter.add_scalar('Epoch/Loss/Train', this_epoch_train_loss, i)
    tbwriter.add_scalar('Epoch/Loss/Valid', this_epoch_valid_loss, i)
    tbwriter.add_scalar('Epoch/lr', optimizer.param_groups[0]['lr'], i)
    tbwriter.flush()
    
    scheduler.step(this_epoch_valid_loss)

Epoch:  2
0.17045468489329021 0.14706880182904356
Epoch:  3
0.15727403944446927 0.1436290125198224
Epoch:  4
0.15275365057445708 0.13608157897696777
Epoch:  5
0.150939203727813 0.13224683592424674
Epoch:  6
0.14906418266750518 0.13081745869096587
Epoch:  7
0.14783645896684555 0.13024026747135556
Epoch:  8
0.14625373425937835 0.12934942543506622
Epoch:  9
0.145275702221053 0.1282193509533125
Epoch:  10
0.14414949928011214 0.12729057297110558
Epoch:  11
0.1441092753694171 0.12715048299116247
Epoch:  12
0.1434506112620944 0.12656551577589092
Epoch:  13
0.14278551538785297 0.12712694836013458
Epoch:  14
0.14160769837243217 0.12728235673378496
Epoch:  15
0.14122980563413529 0.12749606195618124
Epoch:  16
0.14067870605559576 0.1288188351866077
Epoch:  17
0.14050089504037586 0.1283610414932756
Epoch:  18
0.1403930698122297 0.12838667696889708
Epoch    17: reducing learning rate of group 0 to 5.0000e-05.
Epoch:  19
0.13809238303275334 0.1258260408306823
Epoch:  20
0.13707891546544573 0.1256669

# Evaluation metrics

In [42]:
model.load_state_dict(torch.load(
    f"./models/{this_filename}"
))

<All keys matched successfully>

In [43]:
this_metrics = {}

---

In [44]:
this_epoch_valid_loss = 0
val_preds = []
for inputs, labels, masks, meta in val_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        masks = masks.to(device)
        meta = meta.to(device)
        optimizer.zero_grad()
        with torch.set_grad_enabled(False):
            model = model.eval()
            outputs = model(inputs, with_metadata=True, metadata=meta)
            loss_0 = criterion(outputs, labels[:, :, 0]) * masks[:, :, 0]
            loss_1 = criterion(outputs, labels[:, :, 1]) * masks[:, :, 1]
            loss_2 = criterion(outputs, labels[:, :, 2]) * masks[:, :, 2]
            loss = (loss_0.sum() + loss_1.sum() + loss_2.sum()) / masks.sum()
            this_epoch_valid_loss += loss.detach().cpu().numpy()
            val_preds.append(outputs.detach().cpu().numpy())
this_epoch_valid_loss /= len(val_loader)
val_preds = np.concatenate(val_preds, axis=0)

In [45]:
this_epoch_valid_loss, val_preds.shape

(0.1256669082624071, (4308, 31))

In [46]:
this_metrics['valid_loss'] = this_epoch_valid_loss

In [47]:
train_nodataaug_dataset = AudioDataset(torch.Tensor(train_X),
                                       torch.Tensor(train_y),
                                       torch.Tensor(train_y_mask),
                                       torch.Tensor(train_metadata),
                                       None)
train_nodataaug_loader = DataLoader(train_nodataaug_dataset, 64, shuffle=False)

this_epoch_train_nodataaug_loss = 0
train_preds = []
for inputs, labels, masks, meta in train_nodataaug_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        masks = masks.to(device)
        meta = meta.to(device)
        optimizer.zero_grad()
        with torch.set_grad_enabled(False):
            model = model.eval()
            outputs = model(inputs, with_metadata=True, metadata=meta)
            loss_0 = criterion(outputs, labels[:, :, 0]) * masks[:, :, 0]
            loss_1 = criterion(outputs, labels[:, :, 1]) * masks[:, :, 1]
            loss_2 = criterion(outputs, labels[:, :, 2]) * masks[:, :, 2]
            loss = (loss_0.sum() + loss_1.sum() + loss_2.sum()) / masks.sum()
            this_epoch_train_nodataaug_loss += loss.detach().cpu().numpy()
            train_preds.append(outputs.detach().cpu().numpy())
this_epoch_train_nodataaug_loss /= len(train_nodataaug_loader)
train_preds = np.concatenate(train_preds, axis=0)

In [48]:
this_epoch_train_nodataaug_loss, train_preds.shape

(0.10229387387352169, (13538, 31))

In [49]:
this_metrics['train_loss'] = this_epoch_train_nodataaug_loss

---

In [50]:
our_cols = ['1_engine', '2_machinery-impact', '3_non-machinery-impact',
            '4_powered-saw', '5_alert-signal', '6_music', '7_human-voice', '8_dog',
            '1-1_small-sounding-engine', '1-2_medium-sounding-engine',
            '1-3_large-sounding-engine', '2-1_rock-drill', '2-2_jackhammer',
            '2-3_hoe-ram', '2-4_pile-driver', '3-1_non-machinery-impact',
            '4-1_chainsaw', '4-2_small-medium-rotating-saw',
            '4-3_large-rotating-saw', '5-1_car-horn', '5-2_car-alarm', '5-3_siren',
            '5-4_reverse-beeper', '6-1_stationary-music', '6-2_mobile-music',
            '6-3_ice-cream-truck', '7-1_person-or-small-group-talking',
            '7-2_person-or-small-group-shouting', '7-3_large-crowd',
            '7-4_amplified-speech', '8-1_dog-barking-whining']
cols_for_out = [
    "audio_filename", "1-1_small-sounding-engine",
    "1-2_medium-sounding-engine", "1-3_large-sounding-engine",
    "2-1_rock-drill",
    "2-2_jackhammer", "2-3_hoe-ram", "2-4_pile-driver",
    "3-1_non-machinery-impact",
    "4-1_chainsaw", "4-2_small-medium-rotating-saw",
    "4-3_large-rotating-saw",
    "5-1_car-horn", "5-2_car-alarm", "5-3_siren", "5-4_reverse-beeper",
    "6-1_stationary-music",
    "6-2_mobile-music", "6-3_ice-cream-truck",
    "7-1_person-or-small-group-talking",
    "7-2_person-or-small-group-shouting", "7-3_large-crowd",
    "7-4_amplified-speech",
    "8-1_dog-barking-whining", "1_engine", "2_machinery-impact",
    "3_non-machinery-impact", "4_powered-saw", "5_alert-signal",
    "6_music", "7_human-voice", "8_dog"]

---

In [51]:
val_df = pd.DataFrame((1 / (1 + np.exp(-val_preds))), columns = our_cols)
val_df['audio_filename'] = pd.Series(
    sorted(set(metadata['coarse_valid'].index.tolist())),
    index=val_df.index)
val_df = val_df.loc[:, cols_for_out]
val_df = val_df.loc[lambda x: x.audio_filename.isin(
    metadata['coarse_valid_gt'].index.tolist())]

In [52]:
val_df.to_csv('/tmp/val_subm.csv', index=False)

---

In [53]:
import sys
sys.path.append('./baseline_code')

In [54]:
from metrics import evaluate, macro_averaged_auprc, micro_averaged_auprc

---

In [55]:
df_dict = evaluate(
    '/tmp/val_subm.csv',
    './data/annotations.csv',
    './data/dcase-ust-taxonomy.yaml',
    'coarse'
)
micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True)
thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).to_numpy().nonzero()[0][0]

  exec(code_obj, self.user_global_ns, self.user_ns)


In [56]:
micro_auprc, macro_auprc, eval_df['F'][thresh_0pt5_idx]
class_auprc

(0.8753853200842361, 0.7449134657633407, 0.7057877813504823)

{1: 0.8831959227255064,
 2: 0.7143397046005772,
 3: 0.5876655847608367,
 4: 0.7070967739352572,
 5: 0.9526328558115206,
 6: 0.569143693796885,
 7: 0.9756253876126462,
 8: 0.5696078028634965}

In [57]:
this_metrics['valid_coarse_micro_auprc'] = micro_auprc
this_metrics['valid_coarse_macro_auprc'] = macro_auprc
this_metrics['valid_coarse_f1'] = eval_df['F'][thresh_0pt5_idx]
for k, v in class_auprc.items():
    this_metrics[f"valid_coarse_auprc_class_{k}"] = v

In [58]:
df_dict = evaluate(
    '/tmp/val_subm.csv',
    './data/annotations.csv',
    './data/dcase-ust-taxonomy.yaml',
    'fine'
)
micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True)
thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).to_numpy().nonzero()[0][0]



In [59]:
micro_auprc, macro_auprc, eval_df['F'][thresh_0pt5_idx]
class_auprc

(0.7885323826887137, 0.6376440709172527, 0.5592654424040067)

{1: 0.7128869026438804,
 2: 0.5596352977611223,
 3: 0.6007781323061736,
 4: 0.42170684129179037,
 5: 0.9168613701464452,
 6: 0.3889315037322234,
 7: 0.9256520577613885,
 8: 0.5747004616949982}

In [60]:
this_metrics['valid_fine_micro_auprc'] = micro_auprc
this_metrics['valid_fine_macro_auprc'] = macro_auprc
this_metrics['valid_fine_f1'] = eval_df['F'][thresh_0pt5_idx]
for k, v in class_auprc.items():
    this_metrics[f"valid_fine_auprc_class_{k}"] = v

---

In [61]:
train_nodataaug_df = pd.DataFrame((1 / (1 + np.exp(-train_preds))), columns = our_cols)
train_nodataaug_df['audio_filename'] = pd.Series(
    sorted(set(metadata['coarse_train'].index.tolist())),
    index=train_nodataaug_df.index)
train_nodataaug_df = train_nodataaug_df.loc[:, cols_for_out]
train_nodataaug_df = train_nodataaug_df.loc[lambda x: x.audio_filename.isin(
    metadata['coarse_train_gt'].index.tolist())]

In [62]:
train_nodataaug_df.to_csv('/tmp/train_nodataaug.csv', index=False)

In [63]:
df_dict = evaluate(
    '/tmp/train_nodataaug.csv',
    './data/annotations.csv',
    './data/dcase-ust-taxonomy.yaml',
    'coarse',
    True
)
micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True)
thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).to_numpy().nonzero()[0][0]

In [64]:
micro_auprc, macro_auprc, eval_df['F'][thresh_0pt5_idx]
class_auprc

(0.930557883214178, 0.7905393637259761, 0.759493670886076)

{1: 0.9416277816816561,
 2: 0.6894696037576742,
 3: 0.556871196652954,
 4: 0.6154809651133181,
 5: 0.9662188845698323,
 6: 0.6361742424242425,
 7: 0.9747492918851879,
 8: 0.9437229437229437}

In [65]:
this_metrics['train_coarse_micro_auprc'] = micro_auprc
this_metrics['train_coarse_macro_auprc'] = macro_auprc
this_metrics['train_coarse_f1'] = eval_df['F'][thresh_0pt5_idx]
for k, v in class_auprc.items():
    this_metrics[f"train_coarse_auprc_class_{k}"] = v

In [66]:
df_dict = evaluate(
    '/tmp/train_nodataaug.csv',
    './data/annotations.csv',
    './data/dcase-ust-taxonomy.yaml',
    'fine',
    True
)
micro_auprc, eval_df = micro_averaged_auprc(df_dict, return_df=True)
macro_auprc, class_auprc = macro_averaged_auprc(df_dict, return_classwise=True)
thresh_0pt5_idx = (eval_df['threshold'] >= 0.5).to_numpy().nonzero()[0][0]

In [67]:
micro_auprc, macro_auprc, eval_df['F'][thresh_0pt5_idx]
class_auprc

(0.823725953039439, 0.734781246709947, 0.5978835978835979)

{1: 0.6924094223526359,
 2: 0.7419054713853576,
 3: 0.5447679076835333,
 4: 0.4726752316315679,
 5: 0.9407072447606555,
 6: 0.6373106060606061,
 7: 0.8841883755195059,
 8: 0.9642857142857142}

In [68]:
this_metrics['train_fine_micro_auprc'] = micro_auprc
this_metrics['train_fine_macro_auprc'] = macro_auprc
this_metrics['train_fine_f1'] = eval_df['F'][thresh_0pt5_idx]
for k, v in class_auprc.items():
    this_metrics[f"train_fine_auprc_class_{k}"] = v

---

In [69]:
this_metrics = {
    f'hparams/{k}': v
    for k, v in this_metrics.items()
}

In [70]:
tbwriter.add_hparams(hparam_dict=this_hparams, metric_dict=this_metrics)

In [71]:
tbwriter.flush()