In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import os
from pathlib import Path
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.utils.tensorboard import SummaryWriter

import sys
sys.path.append("..")

# dataset manager
from dcase2020.datasetManager import DESEDManager
from dcase2020.datasets import DESEDDataset

# utility function & metrics & augmentation
from metric_utils.metrics import FScore, BinaryAccuracy
from dcase2020_task4.util.utils import get_datetime, reset_seed

# models
from dcase2020_task4.dcase2019.models import dcase2019_model

In [3]:
# ==== set the log ====
import logging
import logging.config
from dcase2020.util.log import DEFAULT_LOGGING
logging.config.dictConfig(DEFAULT_LOGGING)
log = logging.getLogger(__name__)

In [4]:
# ==== reset the seed for reproductability ====
reset_seed(1234)

# Prepare the data

In [5]:
# ==== load the dataset ====
desed_metadata_root = "../dataset/DESED/dataset/metadata"
desed_audio_root = "../dataset/DESED/dataset/audio"
# desed_metadata_root = os.path.join("e:/", "Corpus", "dcase2020", "DESED", "dataset", "metadata")
# desed_audio_root = os.path.join("e:/", "Corpus", "dcase2020", "DESED", "dataset", "audio")

manager = DESEDManager(
    desed_metadata_root, desed_audio_root,
    sampling_rate = 22050,
    validation_ratio=0.2,
    from_disk=False,
    nb_vector_bin=431, # there is no temporal reduction in this model
    verbose = 1
)

[1;34mDEBUG --- datasetManager.__init__ >>> ../dataset/DESED/dataset/audio/dcase2020_dataset_22050.hdf5[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/weak.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/unlabel_in_domain.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/synthetic20.tsv[0m


100%|██████████| 7582/7582 [00:15<00:00, 485.93it/s]


## Add weak ans synthetic20 subset

In [6]:
manager.add_subset("weak")
manager.add_subset("synthetic20")

[1;37mINFO --- datasetManager.add_subset >>> Loading dataset: train, subset: weak[0m
Loading dataset: train, subset: weak
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/weak[0m
[1;37mINFO --- datasetManager.add_subset >>> Loading dataset: train, subset: synthetic20[0m
Loading dataset: train, subset: synthetic20
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/synthetic20[0m


## Create the train / validation split

In [7]:
manager.split_train_validation()

[1;37mINFO --- datasetManager.split_train_validation >>> Creating new train / validation split[0m
[1;37mINFO --- datasetManager.split_train_validation >>> validation ratio : 0.2[0m


## Prep dataset

In [8]:
augments = [
    # signal_augmentation.Noise(0.5, target_snr=15),
    # signal_augmentation.RandomTimeDropout(0.5, dropout=0.2)
]

train_dataset = DESEDDataset(manager, train=True, val=False, augments=augments, cached=True)
val_dataset = DESEDDataset(manager, train=False, val=True, augments=[], cached=True)

In [9]:
len(train_dataset.filenames), len(val_dataset.filenames)

(3322, 727)

# Prepare training

## Prep dataset

- We want both the weak and strong ground truth --> the *weak* and *strong* parameters to True

In [10]:
augments = [
    # signal_augmentation.Noise(0.5, target_snr=15),
    # signal_augmentation.RandomTimeDropout(0.5, dropout=0.2)
]

train_dataset = DESEDDataset(manager, train=True, val=False, weak=True, strong=True, augments=augments, cached=True)
val_dataset = DESEDDataset(manager, train=False, val=True, weak=True, strong=True, augments=[], cached=True)

## model

This model is the same than the weak baseline but have an extra output. <br />
the loc_output is compose of a single convolution layer with nb_filters == nb_class. <br />
Since their is some pooling layer, the *loc_ouput* have a precision of 53 bins (~= 18 ms)

In [11]:
torch.cuda.empty_cache() 
model = dcase2019_model()
model.cuda()

dcase2019_model(
  (features): Sequential(
    (0): ConvBNReLUPool(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout2d(p=0.0, inplace=False)
      (3): ReLU6(inplace=True)
      (4): MaxPool2d(kernel_size=(4, 1), stride=(4, 1), padding=0, dilation=1, ceil_mode=False)
    )
    (1): ConvBNReLUPool(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout2d(p=0.0, inplace=False)
      (3): ReLU6(inplace=True)
      (4): MaxPool2d(kernel_size=(4, 1), stride=(4, 1), padding=0, dilation=1, ceil_mode=False)
    )
    (2): ConvBNReLUPool(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Dropout2d(p=0.0,

In [12]:
from torchsummaryX import summary
input_tensor = torch.zeros((2, 64, 431), dtype=torch.float)
input_tensor = input_tensor.cuda()

s = summary(model, input_tensor)


                                 Kernel Shape      Output Shape   Params  \
Layer                                                                      
0_features.0.Conv2d_0           [1, 64, 3, 3]  [2, 64, 64, 431]    640.0   
1_features.0.BatchNorm2d_1               [64]  [2, 64, 64, 431]    128.0   
2_features.0.Dropout2d_2                    -  [2, 64, 64, 431]        -   
3_features.0.ReLU6_3                        -  [2, 64, 64, 431]        -   
4_features.0.MaxPool2d_4                    -  [2, 64, 16, 431]        -   
5_features.1.Conv2d_0          [64, 64, 3, 3]  [2, 64, 16, 431]  36.928k   
6_features.1.BatchNorm2d_1               [64]  [2, 64, 16, 431]    128.0   
7_features.1.Dropout2d_2                    -  [2, 64, 16, 431]        -   
8_features.1.ReLU6_3                        -  [2, 64, 16, 431]        -   
9_features.1.MaxPool2d_4                    -   [2, 64, 4, 431]        -   
10_features.2.Conv2d_0         [64, 64, 3, 3]   [2, 64, 4, 431]  36.928k   
11_features.

## Create a custom loss function

Since not all file have strong truth, it is necessary to remove those files. <br />
For that, the strong mask is computed. If the sum of the strong ground truth is equal to 0 then it is a fake one <br />
This file strong loss must not be taken into account.

In [13]:
def weak_synth_loss(logits_weak, logits_strong, y_weak, y_strong, reduce: str = "mean"):
    assert reduce in ["mean", "sum"], "support only \"mean\" and \"sum\""
    
    #  Reduction function
    if reduce == "mean":
        reduce_fn = torch.mean
    elif reduce == "sum":
        reduce_fn = torch.sum
    
    # based on Binary Cross Entropy loss
    weak_criterion = nn.BCEWithLogitsLoss(reduction="none")
    strong_criterion = nn.BCEWithLogitsLoss(reduction="none")
    
    # calc separate loss function
    weak_bce = weak_criterion(logits_weak, y_weak)
    strong_bce = strong_criterion(logits_strong, y_strong)
    
    weak_bce = reduce_fn(weak_bce, dim=1)
    strong_bce = reduce_fn(strong_bce, dim=(1, 2))
    
    # calc strong mask
    strong_mask = torch.clamp(torch.sum(y_strong, dim=(1, 2)), 0, 1) # vector of 0 or 1
#     strong_mask = strong_mask.detach() # declared not to need gradients
    
    # Output the different loss for logging purpose
    weak_loss = reduce_fn(weak_bce)
    strong_loss = reduce_fn(strong_mask * strong_bce)
    total_loss = reduce_fn(weak_bce + strong_mask * strong_bce)
    
    return weak_loss, strong_loss, total_loss

## Training parameters (crit & callbacks & loaders & metrics)m

In [14]:
# training parameters
nb_epochs = 100
batch_size = 32
nb_batch = len(train_dataset) // batch_size

optimizers = torch.optim.Adam(model.parameters(), lr=0.003)

# callbacks
callbacks = []

# tensorboard
title = "WeakBaseline_%s" % (get_datetime())
tensorboard = SummaryWriter(log_dir=Path("../tensorboard/%s" % title), comment="weak baseline")

# loaders
training_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Metrics
weak_binacc_func = BinaryAccuracy()
strong_binacc_func = BinaryAccuracy()
weak_f_func = FScore()
strong_f_func = FScore()
metrics = [weak_binacc_func, strong_binacc_func, weak_f_func, strong_f_func]

In [15]:
def reset_all_metrics(metrics):
    for m in metrics:
        m.reset()

## Training functions

In [16]:
UNDERLINE_SEQ = "\033[1;4m"
RESET_SEQ = "\033[0m"

header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6}| {:<8.8}| {:<6.6} - {:<9.9} {:<10.10}| {:<12.12}| {:<9.9}| {:<11.11}- {:<6.6}"

value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.4f}| {:<8.4f}| {:<6.4f} - {:<9.9} {:<10.4f}| {:<12.4f}| {:<9.4f}| {:<11.4f}- {:<6.4f}"

train_form = value_form
val_form = UNDERLINE_SEQ + value_form + RESET_SEQ

header = header_form.format(
    "", "Epoch", "%", "Losses:", "Weak ", "Strong ", "Total ", "metrics: ", "Weak acc ", "Strong acc ", "Weak F1 ", "Strong F1", "Time"
)

print(header)

         Epoch  - %      - Losses:  Weak  | Strong  | Total  - metrics:  Weak acc  | Strong acc  | Weak F1  | Strong F1  - Time  


In [17]:
def train(epoch: int):
    start_time = time.time()
    
    reset_all_metrics(metrics)
    
    model.train()
    print("") # <-- Force new line
    
    for i, (X, y) in enumerate(training_loader):
        # The DESEDDataset return a list of ground truth depending on the selecting option.
        # If weak and strong ground truth are selected, the list order is [WEAK, STRONG]
        # here there is only one [WEAK]
        X = X.cuda().float()
        y_weak = y[0].cuda().float()
        y_strong = y[1].cuda().float()
        
        weak_logits, strong_logits = model(X)
        
        # calc the loss
        weak_loss, strong_loss, total_loss = weak_synth_loss(
            weak_logits, strong_logits,
            y_weak, y_strong,
            reduce="mean"
        )
        
        # back propagation
        optimizers.zero_grad()
        total_loss.backward()
        optimizers.step()
        
        with torch.set_grad_enabled(False):
            # calc metrics
            weak_pred = torch.sigmoid(weak_logits)
            strong_pred = torch.sigmoid(strong_logits)

            # tagging
            weak_binacc = weak_binacc_func(weak_pred, y_weak)
            weak_fscore = weak_f_func(weak_pred, y_weak)

            # loc
            strong_binacc = strong_binacc_func(strong_pred, y_strong)
            strong_fscore = strong_f_func(strong_pred, y_strong)
        
            # logs
            print(train_form.format(
                "Training: ",
                epoch + 1,
                int(100 * (i + 1) / len(training_loader)),
                "", weak_loss.item(), strong_loss.item(), total_loss.item(),
                "", weak_binacc, strong_binacc, weak_fscore, strong_fscore,
                time.time() - start_time
            ), end="\r")

        # tensorboard logs
        tensorboard.add_scalar("train/weak_loss", weak_loss.item(), epoch)
        tensorboard.add_scalar("train/strong_loss", strong_loss.item(), epoch)
        tensorboard.add_scalar("train/total_loss", total_loss.item(), epoch)

        tensorboard.add_scalar("train/weak_acc", weak_binacc, epoch)
        tensorboard.add_scalar("train/strong_acc", strong_binacc, epoch)
        tensorboard.add_scalar("train/weak_f1", weak_fscore, epoch)
        tensorboard.add_scalar("train/strong_f1", strong_fscore, epoch)

In [18]:
def val(epoch):
    start_time = time.time()

        
    reset_all_metrics(metrics)
    
    model.eval()
    print("") # <-- Force new line
    
    with torch.set_grad_enabled(False):
        for i, (X, y) in enumerate(val_loader):
            X = X.cuda().float()
            y_weak = y[0].cuda().float()
            y_strong = y[1].cuda().float()

            weak_logits, strong_logits = model(X)

            # calc the loss
            weak_loss, strong_loss, total_loss = weak_synth_loss(
                weak_logits, strong_logits,
                y_weak, y_strong,
                reduce="mean"
            )
            
             # calc metrics
            weak_pred = torch.sigmoid(weak_logits)
            strong_pred = torch.sigmoid(strong_logits)

            # tagging
            weak_binacc = weak_binacc_func(weak_pred, y_weak)
            weak_fscore = weak_f_func(weak_pred, y_weak)

            # loc
            strong_binacc = strong_binacc_func(strong_pred, y_strong)
            strong_fscore = strong_f_func(strong_pred, y_strong)

            # logs
            print(val_form.format(
                "Validation: ",
                epoch + 1,
                int(100 * (i + 1) / len(val_loader)),
                "", weak_loss.item(), strong_loss.item(), total_loss.item(),
                "", weak_binacc, strong_binacc, weak_fscore, strong_fscore,
                time.time() - start_time
            ), end="\r")

        # tensorboard logs
        tensorboard.add_scalar("val/weak_loss", weak_loss.item(), epoch)
        tensorboard.add_scalar("val/strong_loss", strong_loss.item(), epoch)
        tensorboard.add_scalar("val/total_loss", total_loss.item(), epoch)

        tensorboard.add_scalar("val/weak_acc", weak_binacc, epoch)
        tensorboard.add_scalar("val/strong_acc", strong_binacc, epoch)
        tensorboard.add_scalar("val/weak_f1", weak_fscore, epoch)
        tensorboard.add_scalar("val/strong_f1", strong_fscore, epoch)

# Train

In [19]:
# tensorboard
title = "dcase2019_system_%s" % (get_datetime())
tensorboard = SummaryWriter(log_dir=Path("../tensorboard/%s" % title), comment="weak baseline")

print(header)
for e in range(nb_epochs):
    train(e)
    val(e)

         Epoch  - %      - Losses:  Weak  | Strong  | Total  - metrics:  Weak acc  | Strong acc  | Weak F1  | Strong F1  - Time  

Training 1      - 100    -          0.3613| 0.0654  | 0.4266 -           0.8330    | 0.9593      | 0.2731   | 0.0015     - 48.2136
[1;4mValidati 1      - 100    -          0.4649| 0.2065  | 0.6715 -           0.8330    | 0.9668      | 0.0000   | 0.0000     - 9.6528[0m
Training 2      - 100    -          0.3280| 0.0970  | 0.4250 -           0.8481    | 0.9631      | 0.3446   | 0.1316     - 6.0403
[1;4mValidati 2      - 100    -          0.3941| 0.1871  | 0.5812 -           0.8557    | 0.9666      | 0.3784   | 0.0843     - 0.4965[0m
Training 3      - 100    -          0.2840| 0.0662  | 0.3502 -           0.8682    | 0.9625      | 0.4556   | 0.2201     - 5.9595
[1;4mValidati 3      - 100    -          0.3885| 0.1928  | 0.5813 -           0.8717    | 0.9595      | 0.5078   | 0.2096     - 0.4835[0m
Training 4      - 100    -          0.3122| 0.0519  | 0.36

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪