In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time

import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.utils.tensorboard import SummaryWriter

# dataset manager
from dcase2020.datasetManager import DESEDManager
from dcase2020.datasets import DESEDDataset

# utility function & metrics & augmentation
import dcase2020.augmentation_utils.signal_augmentations as signal_augmentations
import dcase2020.augmentation_utils.spec_augmentations as spec_augmentations
import dcase2020.augmentation_utils.signal_augmentations as signal_augmentations
from dcase2020.pytorch_metrics.metrics import FScore, BinaryAccuracy
from dcase2020.util.utils import get_datetime, reset_seed

# models
from dcase2020.models import WeakBaseline

In [3]:
# ==== set the log ====
import logging
import logging.config
from dcase2020.util.log import DEFAULT_LOGGING
logging.config.dictConfig(DEFAULT_LOGGING)
log = logging.getLogger(__name__)

In [4]:
# ==== reset the seed for reproductability ====
reset_seed(1234)

# Loading the dataset

In [5]:
# ==== load the dataset ====
dese_metadata_root = "../dataset/DESED/dataset/metadata"
desed_audio_root = "../dataset/DESED/dataset/audio"

manager = DESEDManager(
    dese_metadata_root, desed_audio_root,
    sampling_rate = 22050,
    validation_ratio=0.2,
    verbose = 2
)

[1;34mDEBUG --- datasetManager.__init__ >>> ../dataset/dcase2020_dataset_22050.hdf5[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/weak.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/unlabel_in_domain.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/synthetic20.tsv[0m


In [6]:
manager.add_subset("weak")

[1;37mINFO --- datasetManager.add_subset >>> Loading dataset: train, subset: weak[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: ../dataset/DESED/dataset/audio/train/weak[0m
[1;34mDEBUG --- datasetManager.get_subset >>> output size: 1340[0m


In [7]:
manager.split_train_validation()

[1;37mINFO --- datasetManager.split_train_validation >>> Creating new train / validation split[0m
[1;37mINFO --- datasetManager.split_train_validation >>> validation ratio : 0.2[0m


In [8]:
augments = [
    # signal_augmentation.Noise(0.5, target_snr=15),
    # signal_augmentation.RandomTimeDropout(0.5, dropout=0.2)
]

train_dataset = DESEDDataset(manager, train=True, val=False, augments=augments, cached=True)
val_dataset = DESEDDataset(manager, train=False, val=True, augments=[], cached=True)

In [9]:
len(train_dataset.filenames), len(val_dataset.filenames)

(1097, 243)

# Prepare training

## model

In [10]:
model = WeakBaseline()
model.cuda()

WeakBaseline(
  (features): Sequential(
    (0): ConvPoolReLU(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout2d(p=0.0, inplace=False)
      (4): ReLU6(inplace=True)
    )
    (1): ConvPoolReLU(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout2d(p=0.3, inplace=False)
      (4): ReLU6(inplace=True)
    )
    (2): ConvPoolReLU(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=

In [11]:
from torchsummaryX import summary
input_tensor = torch.zeros((1, 64, 431), dtype=torch.float)
input_tensor = input_tensor.cuda()

s = summary(model, input_tensor)


                               Kernel Shape      Output Shape  Params  \
Layer                                                                   
0_features.0.Conv2d_0         [1, 32, 3, 3]  [1, 32, 64, 431]   320.0   
1_features.0.MaxPool2d_1                  -  [1, 32, 16, 215]       -   
2_features.0.BatchNorm2d_2             [32]  [1, 32, 16, 215]    64.0   
3_features.0.Dropout2d_3                  -  [1, 32, 16, 215]       -   
4_features.0.ReLU6_4                      -  [1, 32, 16, 215]       -   
5_features.1.Conv2d_0        [32, 32, 3, 3]  [1, 32, 16, 215]  9.248k   
6_features.1.MaxPool2d_1                  -   [1, 32, 4, 107]       -   
7_features.1.BatchNorm2d_2             [32]   [1, 32, 4, 107]    64.0   
8_features.1.Dropout2d_3                  -   [1, 32, 4, 107]       -   
9_features.1.ReLU6_4                      -   [1, 32, 4, 107]       -   
10_features.2.Conv2d_0       [32, 32, 3, 3]   [1, 32, 4, 107]  9.248k   
11_features.2.MaxPool2d_1                 -    [1, 

## Training parameters (crit & callbacks & loaders & metrics)

In [12]:
# training parameters
nb_epochs = 100
batch_size = 32
nb_batch = len(train_dataset) // batch_size

# criterion & optimizers
criterion = nn.BCEWithLogitsLoss(reduction="mean")

optimizers = torch.optim.Adam(model.parameters(), lr=0.003)

# callbacks
callbacks = []

# tensorboard
title = "WeakBaseline_%s" % (get_datetime())
tensorboard = SummaryWriter(log_dir="../tensorboard/%s" % title, comment="weak baseline")

# loaders
training_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Metrics
binacc_func = BinaryAccuracy()

## Training functions

In [13]:
def train(epoch: int):
    start_time = time.time()
    binacc_func.reset()
    model.train()
    print("") # <-- Force new line
    
    for i, (X, y) in enumerate(training_loader):
        X, y = X.cuda().float(), y.cuda().float()
        
        logits = model(X)
        
        loss = criterion(logits, y)
        
        # calc metrics
        pred = F.sigmoid(logits)
        binacc = binacc_func(pred, y)
        
        # back propagation
        optimizers.zero_grad()
        loss.backward()
        optimizers.step()
        
        # logs
        print("Epoch {}, {:d}% \t loss: {:.4e} - acc: {:.4e} - took {:.2f}s".format(
            epoch + 1,
            int(100 * (i + 1) / nb_batch),
            loss.item(),
            binacc,
            time.time() - start_time
        ), end="\r")
        
    # tensorboard logs
    tensorboard.add_scalar("train/loss", loss.item(), epoch)
    tensorboard.add_scalar("train/acc", binacc, epoch)
    

In [14]:
def val(epoch):
    binacc_func.reset()
    model.train()
    print("") # <-- Force new line
    
    for i, (X, y) in enumerate(val_loader):
        X, y = X.cuda().float(), y.cuda().float()
        
        logits = model(X)
        
        loss = criterion(logits, y)
        
        # calc metrics
        pred = F.sigmoid(logits)
        binacc = binacc_func(pred, y)
        
        # back propagation
        optimizers.zero_grad()
        loss.backward()
        optimizers.step()
        
        # logs
        print("validation \t val_loss: {:.4e} - val_acc: {:.4e}".format(
            loss.item(),
            binacc,
        ), end="\r")
        
    # tensorboard logs
    tensorboard.add_scalar("val/loss", loss.item(), epoch)
    tensorboard.add_scalar("val/acc", binacc, epoch)

# Train

In [15]:
for e in range(nb_epochs):
    train(e)
    val(e)






Epoch 1, 102% 	 loss: 3.5969e-01 - acc: 8.4414e-01 - took 10.90s
validation 	 val_loss: 4.4592e-01 - val_acc: 8.4969e-01
Epoch 2, 102% 	 loss: 3.8914e-01 - acc: 8.5528e-01 - took 0.49s
validation 	 val_loss: 4.6168e-01 - val_acc: 8.4681e-01
Epoch 3, 102% 	 loss: 2.4749e-01 - acc: 8.5740e-01 - took 0.44s
validation 	 val_loss: 4.6852e-01 - val_acc: 8.4850e-01
Epoch 4, 102% 	 loss: 2.7380e-01 - acc: 8.6133e-01 - took 0.44s
validation 	 val_loss: 5.0707e-01 - val_acc: 8.5512e-01
Epoch 5, 102% 	 loss: 3.6495e-01 - acc: 8.7035e-01 - took 0.44s
validation 	 val_loss: 5.1737e-01 - val_acc: 8.6083e-01
Epoch 6, 102% 	 loss: 2.7983e-01 - acc: 8.7504e-01 - took 0.44s
validation 	 val_loss: 4.6521e-01 - val_acc: 8.7062e-01
Epoch 7, 102% 	 loss: 3.9089e-01 - acc: 8.7562e-01 - took 0.44s
validation 	 val_loss: 5.0123e-01 - val_acc: 8.7007e-01
Epoch 8, 102% 	 loss: 3.0307e-01 - acc: 8.8002e-01 - took 0.44s
validation 	 val_loss: 4.6439e-01 - val_acc: 8.7494e-01
Epoch 9, 102% 	 loss: 2.7678e-01 - acc:

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪