In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import os
from pathlib import Path
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.utils.tensorboard import SummaryWriter

import sys
sys.path.append("..")

# dataset manager
from dcase2020.datasetManager import DESEDManager
from dcase2020.datasets import DESEDDataset

# utility function & metrics & augmentation
from metric_utils.metrics import FScore, BinaryAccuracy
from dcase2020_task4.util.utils import get_datetime, reset_seed

# models
from dcase2020_task4.baseline.models import WeakBaseline

In [3]:
# ==== set the log ====
import logging
import logging.config
from dcase2020.util.log import DEFAULT_LOGGING
logging.config.dictConfig(DEFAULT_LOGGING)
log = logging.getLogger(__name__)

In [4]:
# ==== reset the seed for reproductability ====
reset_seed(1234)

# Prepare the data

In [5]:
# ==== load the dataset ====
desed_metadata_root = "../dataset/DESED/dataset/metadata"
desed_audio_root = "../dataset/DESED/dataset/audio"
# desed_metadata_root = os.path.join("e:/", "Corpus", "dcase2020", "DESED", "dataset", "metadata")
# desed_audio_root = os.path.join("e:/", "Corpus", "dcase2020", "DESED", "dataset", "audio")

manager = DESEDManager(
    desed_metadata_root, desed_audio_root,
    sampling_rate = 22050,
    validation_ratio=0.2,
    from_disk=False,
    nb_vector_bin=53, # The model output localisation with a résolution of ~ 18ms --> 53 temporal bins
    verbose = 1
)

[1;34mDEBUG --- datasetManager.__init__ >>> ../dataset/DESED/dataset/audio/dcase2020_dataset_22050.hdf5[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/weak.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/unlabel_in_domain.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/synthetic20.tsv[0m


100%|██████████| 7582/7582 [00:16<00:00, 471.07it/s]


## Add weak ans synthetic20 subset

In [6]:
manager.add_subset("weak")


[1;37mINFO --- datasetManager.add_subset >>> Loading dataset: train, subset: weak[0m
Loading dataset: train, subset: weak
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/weak[0m


## Create the train / validation split

In [7]:
manager.split_train_validation()

[1;37mINFO --- datasetManager.split_train_validation >>> Creating new train / validation split[0m
[1;37mINFO --- datasetManager.split_train_validation >>> validation ratio : 0.2[0m


## Prep dataset

In [8]:
augments = [
    # signal_augmentation.Noise(0.5, target_snr=15),
    # signal_augmentation.RandomTimeDropout(0.5, dropout=0.2)
]

train_dataset = DESEDDataset(manager, train=True, val=False, augments=augments, cached=True)
val_dataset = DESEDDataset(manager, train=False, val=True, augments=[], cached=True)

In [9]:
len(train_dataset.filenames), len(val_dataset.filenames)

(1202, 263)

# Prepare training

## Prep dataset

- We want both the weak and strong ground truth --> the *weak* and *strong* parameters to True

In [10]:
augments = [
    # signal_augmentation.Noise(0.5, target_snr=15),
    # signal_augmentation.RandomTimeDropout(0.5, dropout=0.2)
]

train_dataset = DESEDDataset(manager, train=True, val=False, weak=True, strong=False, augments=augments, cached=True)
val_dataset = DESEDDataset(manager, train=False, val=True, weak=True, strong=False, augments=[], cached=True)

## model

This model is the same than the weak baseline but have an extra output. <br />
the loc_output is compose of a single convolution layer with nb_filters == nb_class. <br />
Since their is some pooling layer, the *loc_ouput* have a precision of 53 bins (~= 18 ms)

In [11]:
torch.cuda.empty_cache() 

model = WeakBaseline()
model.cuda()

WeakBaseline(
  (features): Sequential(
    (0): ConvPoolReLU(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout2d(p=0.0, inplace=False)
      (4): ReLU6(inplace=True)
    )
    (1): ConvPoolReLU(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout2d(p=0.3, inplace=False)
      (4): ReLU6(inplace=True)
    )
    (2): ConvPoolReLU(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=

In [12]:
from torchsummaryX import summary
input_tensor = torch.zeros((1, 64, 431), dtype=torch.float)
input_tensor = input_tensor.cuda()

s = summary(model, input_tensor)


                               Kernel Shape      Output Shape  Params  \
Layer                                                                   
0_features.0.Conv2d_0         [1, 32, 3, 3]  [1, 32, 64, 431]   320.0   
1_features.0.MaxPool2d_1                  -  [1, 32, 16, 215]       -   
2_features.0.BatchNorm2d_2             [32]  [1, 32, 16, 215]    64.0   
3_features.0.Dropout2d_3                  -  [1, 32, 16, 215]       -   
4_features.0.ReLU6_4                      -  [1, 32, 16, 215]       -   
5_features.1.Conv2d_0        [32, 32, 3, 3]  [1, 32, 16, 215]  9.248k   
6_features.1.MaxPool2d_1                  -   [1, 32, 4, 107]       -   
7_features.1.BatchNorm2d_2             [32]   [1, 32, 4, 107]    64.0   
8_features.1.Dropout2d_3                  -   [1, 32, 4, 107]       -   
9_features.1.ReLU6_4                      -   [1, 32, 4, 107]       -   
10_features.2.Conv2d_0       [32, 32, 3, 3]   [1, 32, 4, 107]  9.248k   
11_features.2.MaxPool2d_1                 -    [1, 

## Create a custom loss function

Since not all file have strong truth, it is necessary to remove those files. <br />
For that, the strong mask is computed. If the sum of the strong ground truth is equal to 0 then it is a fake one <br />
This file strong loss must not be taken into account.

## Training parameters (crit & callbacks & loaders & metrics)m

In [13]:
# training parameters
nb_epochs = 100
batch_size = 32
nb_batch = len(train_dataset) // batch_size

optimizers = torch.optim.Adam(model.parameters(), lr=0.003)
criterion = nn.BCEWithLogitsLoss(reduction="mean")

# callbacks
callbacks = []

# tensorboard
title = "WeakBaseline_%s" % (get_datetime())
tensorboard = SummaryWriter(log_dir=Path("../tensorboard/%s" % title), comment="weak baseline")

# loaders
training_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Metrics
weak_binacc_func = BinaryAccuracy()
weak_f_func = FScore()


In [14]:
def reset_all_metrics():
    metrics = [weak_binacc_func, weak_f_func]
    
    for m in metrics:
        m.reset()

## Training functions

In [15]:
UNDERLINE_SEQ = "\033[1;4m"
RESET_SEQ = "\033[0m"


header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6} - {:<9.9} {:<12.12}| {:<9.9}- {:<6.6}"
value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.4f} - {:<9.9} {:<10.4f}| {:<9.4f}- {:<6.4f}"
header = header_form.format(
    "", "Epoch", "%", "Losses:", "Weak ", "metrics: ", "Weak acc ", "Weak F1 ","Time"
)


train_form = value_form
val_form = UNDERLINE_SEQ + value_form + RESET_SEQ

print(header)

         Epoch  - %      - Losses:  Weak   - metrics:  Weak acc    | Weak F1  - Time  


In [16]:
def train(epoch: int):
    start_time = time.time()
    
    reset_all_metrics()
    
    model.train()
    print("") # <-- Force new line
    
    for i, (X, y) in enumerate(training_loader):
        # The DESEDDataset return a list of ground truth depending on the selecting option.
        # If weak and strong ground truth are selected, the list order is [WEAK, STRONG]
        # here there is only one [WEAK]
        X = X.cuda().float()
        y_weak = y[0].cuda().float()
        
        weak_logits = model(X)
        
        # calc the loss
        weak_loss = criterion(weak_logits, y_weak)
        
        # back propagation
        optimizers.zero_grad()
        weak_loss.backward()
        optimizers.step()
        
        with torch.set_grad_enabled(False):

            # calc metrics
            weak_pred = torch.sigmoid(weak_logits)

            # tagging
            weak_binacc = weak_binacc_func(weak_pred, y_weak)
            weak_fscore = weak_f_func(weak_pred, y_weak)

            # logs
            print(train_form.format(
                "Training: ",
                epoch + 1,
                int(100 * (i + 1) / nb_batch),
                "", weak_loss.item(),
                "", weak_binacc, weak_fscore,
                time.time() - start_time
            ), end="\r")

        # tensorboard logs
        tensorboard.add_scalar("train/weak_loss", weak_loss.item(), epoch)

        tensorboard.add_scalar("train/weak_acc", weak_binacc, epoch)
        tensorboard.add_scalar("train/weak_f1", weak_fscore, epoch)


In [17]:
def val(epoch):
    start_time = time.time()

    reset_all_metrics()
    
    model.eval()
    print("") # <-- Force new line
    
    with torch.set_grad_enabled(False):
        for i, (X, y) in enumerate(val_loader):
            X = X.cuda().float()
            y_weak = y[0].cuda().float()

            weak_logits = model(X)

            # calc the loss
            weak_loss = criterion(weak_logits, y_weak)

             # calc metrics
            weak_pred = torch.sigmoid(weak_logits)

            # tagging
            weak_binacc = weak_binacc_func(weak_pred, y_weak)
            weak_fscore = weak_f_func(weak_pred, y_weak)


            # logs
            print(val_form.format(
                "Validation: ",
                epoch + 1,
                int(100 * (i + 1) / nb_batch),
                "", weak_loss.item(),
                "", weak_binacc, weak_fscore,
                time.time() - start_time
            ), end="\r")

        # tensorboard logs
        tensorboard.add_scalar("val/weak_loss", weak_loss.item(), epoch)

        tensorboard.add_scalar("val/weak_acc", weak_binacc, epoch)
        tensorboard.add_scalar("val/weak_f1", weak_fscore, epoch)


# Train

In [18]:
print(header)
for e in range(nb_epochs):
    train(e)
    val(e)

         Epoch  - %      - Losses:  Weak   - metrics:  Weak acc    | Weak F1  - Time  

Training 1      - 102    -          0.3927 -           0.8454    | 0.0631   - 15.2248
[1;4mValidati 1      - 24     -          0.4046 -           0.8623    | 0.0274   - 3.2933[0m
Training 2      - 102    -          0.4035 -           0.8562    | 0.1249   - 0.5406
[1;4mValidati 2      - 24     -          0.4280 -           0.8644    | 0.1926   - 0.0408[0m
Training 3      - 102    -          0.3545 -           0.8602    | 0.1878   - 0.4840
[1;4mValidati 3      - 24     -          0.4555 -           0.8460    | 0.1802   - 0.0400[0m
Training 4      - 102    -          0.3142 -           0.8655    | 0.2628   - 0.4848
[1;4mValidati 4      - 24     -          0.4324 -           0.8734    | 0.2412   - 0.0401[0m
Training 5      - 102    -          0.3565 -           0.8731    | 0.3323   - 0.5095
[1;4mValidati 5      - 24     -          0.4045 -           0.8790    | 0.3341   - 0.0455[0m
Training 6 

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪