In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import os
from pathlib import Path
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.utils.tensorboard import SummaryWriter

import sys
sys.path.append("..")

# dataset manager
from dcase2020.datasetManager import DESEDManager
from dcase2020.datasets import DESEDDataset

# utility function & metrics & augmentation
from metric_utils.metrics import FScore, BinaryAccuracy
from dcase2020_task4.util.utils import get_datetime, reset_seed

# models
from dcase2020_task4.baseline.models import WeakBaseline, WeakStrongBaseline

In [3]:
# ==== set the log ====
import logging
import logging.config
from dcase2020.util.log import DEFAULT_LOGGING
logging.config.dictConfig(DEFAULT_LOGGING)
log = logging.getLogger(__name__)

In [4]:
# ==== reset the seed for reproductability ====
reset_seed(1234)

# Prepare the data

In [5]:
# ==== load the dataset ====
desed_metadata_root = "../dataset/DESED/dataset/metadata"
desed_audio_root = "../dataset/DESED/dataset/audio"
# desed_metadata_root = os.path.join("e:/", "Corpus", "dcase2020", "DESED", "dataset", "metadata")
# desed_audio_root = os.path.join("e:/", "Corpus", "dcase2020", "DESED", "dataset", "audio")

manager = DESEDManager(
    desed_metadata_root, desed_audio_root,
    sampling_rate = 22050,
    validation_ratio=0.2,
    from_disk=False,
    nb_vector_bin=53,
    verbose = 1
)

[1;34mDEBUG --- datasetManager.__init__ >>> ../dataset/DESED/dataset/audio/dcase2020_dataset_22050.hdf5[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/weak.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/unlabel_in_domain.tsv[0m
[1;37mINFO --- datasetManager._load_metadata >>> Reading metadata: ../dataset/DESED/dataset/metadata/train/synthetic20.tsv[0m


100%|██████████| 7582/7582 [00:15<00:00, 477.94it/s]


## Add weak subset

In [6]:
manager.add_subset("weak")
# manager.add_subset("synthetic20")

[1;37mINFO --- datasetManager.add_subset >>> Loading dataset: train, subset: weak[0m
Loading dataset: train, subset: weak
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> hdf_file: <HDF5 file "dcase2020_dataset_22050.hdf5" (mode r)>[0m
[1;34mDEBUG --- datasetManager._hdf_to_dict >>> path: DESED/dataset/audio/train/weak[0m


## Create the train / validation split

In [7]:
manager.split_train_validation()

[1;37mINFO --- datasetManager.split_train_validation >>> Creating new train / validation split[0m
[1;37mINFO --- datasetManager.split_train_validation >>> validation ratio : 0.2[0m


## Create the train and validation dataset

In [8]:
augments = [
    # signal_augmentation.Noise(0.5, target_snr=15),
    # signal_augmentation.RandomTimeDropout(0.5, dropout=0.2)
]

train_dataset = DESEDDataset(manager, train=True, val=False, augments=augments, cached=True)
val_dataset = DESEDDataset(manager, train=False, val=True, augments=[], cached=True)

In [9]:
len(train_dataset.filenames), len(val_dataset.filenames)

(1097, 243)

# Prepare training

## model

In [10]:
model = WeakBaseline()
model.cuda()

WeakBaseline(
  (features): Sequential(
    (0): ConvPoolReLU(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout2d(p=0.0, inplace=False)
      (4): ReLU6(inplace=True)
    )
    (1): ConvPoolReLU(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Dropout2d(p=0.3, inplace=False)
      (4): ReLU6(inplace=True)
    )
    (2): ConvPoolReLU(
      (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=

In [11]:
from torchsummaryX import summary
input_tensor = torch.zeros((1, 64, 431), dtype=torch.float)
input_tensor = input_tensor.cuda()

s = summary(model, input_tensor)


                               Kernel Shape      Output Shape  Params  \
Layer                                                                   
0_features.0.Conv2d_0         [1, 32, 3, 3]  [1, 32, 64, 431]   320.0   
1_features.0.MaxPool2d_1                  -  [1, 32, 16, 215]       -   
2_features.0.BatchNorm2d_2             [32]  [1, 32, 16, 215]    64.0   
3_features.0.Dropout2d_3                  -  [1, 32, 16, 215]       -   
4_features.0.ReLU6_4                      -  [1, 32, 16, 215]       -   
5_features.1.Conv2d_0        [32, 32, 3, 3]  [1, 32, 16, 215]  9.248k   
6_features.1.MaxPool2d_1                  -   [1, 32, 4, 107]       -   
7_features.1.BatchNorm2d_2             [32]   [1, 32, 4, 107]    64.0   
8_features.1.Dropout2d_3                  -   [1, 32, 4, 107]       -   
9_features.1.ReLU6_4                      -   [1, 32, 4, 107]       -   
10_features.2.Conv2d_0       [32, 32, 3, 3]   [1, 32, 4, 107]  9.248k   
11_features.2.MaxPool2d_1                 -    [1, 

## Training parameters (crit & callbacks & loaders & metrics)

In [12]:
# training parameters
nb_epochs = 100
batch_size = 32
nb_batch = len(train_dataset) // batch_size

# criterion & optimizers
criterion = nn.BCEWithLogitsLoss(reduction="mean")

optimizers = torch.optim.Adam(model.parameters(), lr=0.003)

# callbacks
callbacks = []

# tensorboard
title = "WeakBaseline_%s" % (get_datetime())
tensorboard = SummaryWriter(log_dir=Path("../tensorboard/%s" % title), comment="weak baseline")

# loaders
training_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Metrics
binacc_func = BinaryAccuracy()
f_func = FScore()

## Training functions

In [13]:
header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6} - {:<9.9} {:<10.10}| {:<9.9}- {:<6.6}"
value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.4f} - {:<9.9} {:<10.4f}| {:<9.4f}- {:<6.4f}"
header = header_form.format(
    "", "Epoch", "%", "Losses:", "Weak ", "metrics: ", "Weak acc ", "Weak F1 ", "Time"
)
print(header)

         Epoch  - %      - Losses:  Weak   - metrics:  Weak acc  | Weak F1  - Time  


In [14]:
def train(epoch: int):
    start_time = time.time()
    
    binacc_func.reset()
    f_func.reset()
    
    model.train()
    print("") # <-- Force new line
    
    for i, (X_weak, y_weak) in enumerate(training_loader):
        # The DESEDDataset return a list of ground truth depending on the selecting option.
        # If weak and strong ground truth are selected, the list order is [WEAK, STRONG]
        # here there is only one [WEAK]
        X_weak, y_weak = X_weak.cuda().float(), y_weak[0].cuda().float()

        logits = model(X_weak)
        
        loss = criterion(logits, y_weak)
        
        # calc metrics
        pred = torch.sigmoid(logits)
        binacc = binacc_func(pred, y_weak)
        fscore = f_func(pred, y_weak)
        
        # back propagation
        optimizers.zero_grad()
        loss.backward()
        optimizers.step()
        
        # logs
        print(value_form.format(
            "Training: ",
            epoch + 1,
            int(100 * (i + 1) / len(training_loader)),
            "", loss.item(),
            "", binacc, fscore,
            time.time() - start_time
        ), end="\r")
        
    # tensorboard logs
    tensorboard.add_scalar("train/loss", loss.item(), epoch)
    tensorboard.add_scalar("train/acc", binacc, epoch)
    tensorboard.add_scalar("train/f1", fscore, epoch)
    tensorboard.add_scalar("train/precision", f_func.precision, epoch)
    tensorboard.add_scalar("train/recall", f_func.recall, epoch)

    

In [15]:
def val(epoch):
    start_time = time.time()

    binacc_func.reset()
    f_func.reset()
    
    model.eval()
    print("") # <-- Force new line
    
    with torch.set_grad_enabled(False):
        for i, (X_weak, y_weak) in enumerate(val_loader):
            X_weak, y_weak = X_weak.cuda().float(), y_weak[0].cuda().float()

            logits = model(X_weak)

            loss = criterion(logits, y_weak)

            # calc metrics
            pred = torch.sigmoid(logits)
            binacc = binacc_func(pred, y_weak)
            fscore = f_func(pred, y_weak)

            # logs
            print(value_form.format(
                "Validation: ",
                epoch + 1,
                int(100 * (i + 1) / len(val_loader)),
                "", loss.item(),
                "", binacc, fscore,
                time.time() - start_time
            ), end="\r")

        # tensorboard logs
        tensorboard.add_scalar("val/loss", loss.item(), epoch)
        tensorboard.add_scalar("val/acc", binacc, epoch)
        tensorboard.add_scalar("val/f1", fscore, epoch)
        tensorboard.add_scalar("val/precision", f_func.precision, epoch)
        tensorboard.add_scalar("val/recall", f_func.recall, epoch)

# Train

In [16]:
print(header)
for e in range(nb_epochs):
    train(e)
    val(e)

         Epoch  - %      - Losses:  Weak   - metrics:  Weak acc  | Weak F1  - Time  

Training 1      - 100    -          0.3416 -           0.8454    | 0.0565   - 14.1074
--------------------------------------------------     0.8503    | 0.0573   - 3.0254

Training 2      - 100    -          0.3938 -           0.8554    | 0.0923   - 0.4879
--------------------------------------------------     0.8565    | 0.2980   - 0.0390

Training 3      - 100    -          0.2879 -           0.8590    | 0.1447   - 0.4591
--------------------------------------------------     0.8549    | 0.1423   - 0.0392

Training 4      - 100    -          0.2753 -           0.8668    | 0.2402   - 0.4597
--------------------------------------------------     0.8698    | 0.3746   - 0.0389

Training 5      - 100    -          0.3121 -           0.8705    | 0.3019   - 0.4508
--------------------------------------------------     0.8794    | 0.4096   - 0.0364

Training 6      - 100    -          0.2963 -           0.8

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪