# import

In [6]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import os
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["NUMEXPR_NU M_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"
import time

import numpy as np
import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
from torch.utils.tensorboard import SummaryWriter

In [10]:
from ubs8k.datasetManager import DatasetManager
from ubs8k.datasets import Dataset


import sys
sys.path.append("../..")

from util.utils import reset_seed, get_datetime, get_model_from_name
from util.checkpoint import CheckPoint
from metric_utils.metrics import CategoricalAccuracy, FScore, ContinueAverage

from UrbanSound8k.models import ScalableCnn

# Arguments

In [4]:
import argparse
parser = argparse.ArgumentParser()
# dataset related parameters
parser.add_argument("-d", "--dataset_root", default="../../datasets/ubs8k", type=str)
parser.add_argument("--supervised_ratio", default=1.0, type=float)
parser.add_argument("-t", "--train_folds", nargs="+", default=[1, 2, 3, 4, 5, 6, 7, 8, 9], type=int)
parser.add_argument("-v", "--val_folds", nargs="+", default=[10], type=int)

# learning parameters
parser.add_argument("--model", default="cnn0", type=str)
parser.add_argument("--batch_size", default=32, type=int)
parser.add_argument("--nb_epoch", default=100, type=int)
parser.add_argument("--learning_rate", default=0.003, type=int)

# compound scaling parameters
parser.add_argument("-a", "--alpha", default=1.0, type=float)
parser.add_argument("-b", "--beta", default=1.0, type=float)
parser.add_argument("-g", "--gamma", default=1.0, type=float)
parser.add_argument("-p", "--phi", default=1.0, type=float)

# extra utility parameters
parser.add_argument("--checkpoint_path", default="../../model_save/ubs8k/full_supervised_test", type=str)
parser.add_argument("--resume", action="store_true", default=False)
parser.add_argument("--tensorboard_path", default="../../tensorboard/ubs8k/full_supervised_test", type=str)
parser.add_argument("--tensorboard_sufix", default="", type=str)

args = parser.parse_args("")

# initialisation

In [22]:
reset_seed(1234)


In [11]:
valid_scales = []
scales = np.linspace(1.0, 2.0, 15)
tolerance = 0.05

for a in scales:
    for b in scales:
        for c in scales:
            v = a * b**2 * c**2
            
            if v-tolerance < 2 < v+tolerance:
                valid_scales.append((a, b, c))

In [12]:
len(valid_scales), valid_scales

(20,
 [(1.0, 1.0, 1.4285714285714286),
  (1.0, 1.4285714285714286, 1.0),
  (1.0714285714285714, 1.0, 1.3571428571428572),
  (1.0714285714285714, 1.0714285714285714, 1.2857142857142856),
  (1.0714285714285714, 1.2857142857142856, 1.0714285714285714),
  (1.0714285714285714, 1.3571428571428572, 1.0),
  (1.2142857142857142, 1.0, 1.2857142857142856),
  (1.2142857142857142, 1.2857142857142856, 1.0),
  (1.3571428571428572, 1.0, 1.2142857142857142),
  (1.3571428571428572, 1.0714285714285714, 1.1428571428571428),
  (1.3571428571428572, 1.1428571428571428, 1.0714285714285714),
  (1.3571428571428572, 1.2142857142857142, 1.0),
  (1.5, 1.0, 1.1428571428571428),
  (1.5, 1.0714285714285714, 1.0714285714285714),
  (1.5, 1.1428571428571428, 1.0),
  (1.7142857142857142, 1.0, 1.0714285714285714),
  (1.7142857142857142, 1.0714285714285714, 1.0),
  (1.7857142857142856, 1.0, 1.0714285714285714),
  (1.7857142857142856, 1.0714285714285714, 1.0),
  (2.0, 1.0, 1.0)])

In [15]:
print("scales=(")
for (a, b, g) in valid_scales:
    print("\t\"-a %f -b %f -g %f\" \\" % (a, b, g))
print(")")    

scales=(
	"-a 1.000000 -b 1.000000 -g 1.428571" \
	"-a 1.000000 -b 1.428571 -g 1.000000" \
	"-a 1.071429 -b 1.000000 -g 1.357143" \
	"-a 1.071429 -b 1.071429 -g 1.285714" \
	"-a 1.071429 -b 1.285714 -g 1.071429" \
	"-a 1.071429 -b 1.357143 -g 1.000000" \
	"-a 1.214286 -b 1.000000 -g 1.285714" \
	"-a 1.214286 -b 1.285714 -g 1.000000" \
	"-a 1.357143 -b 1.000000 -g 1.214286" \
	"-a 1.357143 -b 1.071429 -g 1.142857" \
	"-a 1.357143 -b 1.142857 -g 1.071429" \
	"-a 1.357143 -b 1.214286 -g 1.000000" \
	"-a 1.500000 -b 1.000000 -g 1.142857" \
	"-a 1.500000 -b 1.071429 -g 1.071429" \
	"-a 1.500000 -b 1.142857 -g 1.000000" \
	"-a 1.714286 -b 1.000000 -g 1.071429" \
	"-a 1.714286 -b 1.071429 -g 1.000000" \
	"-a 1.785714 -b 1.000000 -g 1.071429" \
	"-a 1.785714 -b 1.071429 -g 1.000000" \
	"-a 2.000000 -b 1.000000 -g 1.000000" \
)


# Prepare the dataset

In [25]:
audio_root = os.path.join(args.dataset_root, "audio")
metadata_root = os.path.join(args.dataset_root, "metadata")
all_folds = args.train_folds + args.val_folds

manager = DatasetManager(
    metadata_root, audio_root,
    folds=all_folds,
    verbose=2
)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [26]:
# prepare the sampler with the specified number of supervised file
train_dataset = Dataset(manager, folds=args.train_folds, cached=True)
val_dataset = Dataset(manager, folds=args.val_folds, cached=True)

# Prep model

## test if model is valid

In [30]:
from torchsummaryX import summary

common_parameters = dict(
     dataset = manager,
     initial_conv_inputs=[1, 24, 48, 48],
     initial_conv_outputs=[24, 48, 48, 48],
     initial_linear_inputs=[720, ],
     initial_linear_outputs=[10, ],
     initial_resolution=[64, 173],
     round_up = False,
)

for (a, b, g) in valid_scales:
    model = ScalableCnn(compound_scales=(a, b, g), **common_parameters)
    
    new_res = model.scaled_resolution
    input_tensor = torch.zeros((1, new_res[0], new_res[1]), dtype=torch.float)
    
    s = summary(model, input_tensor)

new scaled resolution:  (91, 247)
new feature extraction function generation: hop_length = 357
new conv layers:
inputs:  [1, 24, 48, 48]
ouputs:  [24, 48, 48, 48]
new dense layers:
inputs:  [3600]
ouputs:  [10]
                               Kernel Shape      Output Shape   Params  \
Layer                                                                    
0_features.0.Conv2d_0         [1, 24, 3, 3]  [1, 24, 91, 247]    240.0   
1_features.0.BatchNorm2d_1             [24]  [1, 24, 91, 247]     48.0   
2_features.0.Dropout2d_2                  -  [1, 24, 91, 247]        -   
3_features.0.ReLU6_3                      -  [1, 24, 91, 247]        -   
4_features.0.MaxPool2d_4                  -  [1, 24, 45, 123]        -   
5_features.1.Conv2d_0        [24, 48, 3, 3]  [1, 48, 45, 123]  10.416k   
6_features.1.BatchNorm2d_1             [48]  [1, 48, 45, 123]     96.0   
7_features.1.Dropout2d_2                  -  [1, 48, 45, 123]        -   
8_features.1.ReLU6_3                      -  [1, 

## Prep training

In [22]:
# create model
torch.cuda.empty_cache()

model = model_func()
model.cuda()

cnn07(
  (features): Sequential(
    (0): ConvPoolReLU(
      (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU6(inplace=True)
    )
    (1): ConvPoolReLU(
      (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(4, 2), stride=(4, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU6(inplace=True)
    )
    (2): ConvPoolReLU(
      (0): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): MaxPool2d(kernel_size=(2, 2), stride=(2, 2), padding=0, dilation=1, ceil_mode=False)
      (2): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ReLU6(inplace=True)
  

In [23]:
s_idx, u_idx = train_dataset.split_s_u(args.supervised_ratio)
S_sampler = torch.utils.data.SubsetRandomSampler(s_idx)

training_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, sampler=S_sampler)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=True)

# training parameters

In [24]:
# tensorboard
tensorboard_title = "%s_%s_%.1fS_%s" % (get_datetime(), model_func.__name__, args.supervised_ratio, args.tensorboard_sufix)
checkpoint_title = "%s_%.1fS" % (model_func.__name__, args.supervised_ratio)
tensorboard = SummaryWriter(log_dir="%s/%s" % (args.tensorboard_path, tensorboard_title), comment=model_func.__name__)

# losses
loss_ce = nn.CrossEntropyLoss(reduction="mean")

# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)

# callbacks
lr_lambda = lambda epoch: (1.0 + numpy.cos((epoch-1)*numpy.pi/args.nb_epoch))
lr_scheduler = LambdaLR(optimizer, lr_lambda)

# Checkpoint
checkpoint = CheckPoint(model, optimizer, mode="max", name="%s/%s.torch" % (args.checkpoint_path, checkpoint_title))

# Metrics
fscore_fn = FScore()
acc_fn = CategoricalAccuracy()
avg = ContinueAverage()

reset_metrics = lambda : [m.reset() for m in [fscore_fn, acc_fn, avg]]

def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

## Can resume previous training

In [25]:
if args.resume:
    checkpoint.load_last()

## training function

In [26]:
UNDERLINE_SEQ = "\033[1;4m"
RESET_SEQ = "\033[0m"

header_form = "{:<8.8} {:<6.6} - {:<6.6} - {:<8.8} {:<6.6} - {:<9.9} {:<12.12}| {:<9.9}- {:<6.6}"
value_form  = "{:<8.8} {:<6} - {:<6} - {:<8.8} {:<6.4f} - {:<9.9} {:<10.4f}| {:<9.4f}- {:<6.4f}"

header = header_form.format(
    "", "Epoch", "%", "Losses:", "ce", "metrics: ", "acc", "F1 ","Time"
)

train_form = value_form
val_form = UNDERLINE_SEQ + value_form + RESET_SEQ

print(header)

         Epoch  - %      - Losses:  ce     - metrics:  acc         | F1       - Time  


In [27]:
def maximum():
    def func(key, value):
        if key not in func.max:
            func.max[key] = value
        else:
            if func.max[key] < value:
                func.max[key] = value
        return func.max[key]

    func.max = dict()
    return func
maximum_fn = maximum()

In [28]:
def train(epoch):
    start_time = time.time()
    print("")

    reset_metrics()
    model.train()

    for i, (X, y) in enumerate(training_loader):
        X = X.cuda()
        y = y.cuda()

        logits = model(X)
        loss = loss_ce(logits, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        with torch.set_grad_enabled(False):
            pred = torch.softmax(logits, dim=1)
            pred_arg = torch.argmax(logits, dim=1)
            y_one_hot = F.one_hot(y, num_classes=10)

            acc = acc_fn(pred_arg, y).mean
            fscore = fscore_fn(pred, y_one_hot).mean
            avg_ce = avg(loss.item()).mean

            # logs
            print(train_form.format(
                "Training: ",
                epoch + 1,
                int(100 * (i + 1) / len(training_loader)),
                "", avg_ce,
                "", acc, fscore,
                time.time() - start_time
            ), end="\r")

    tensorboard.add_scalar("train/Lce", avg_ce, epoch)
    tensorboard.add_scalar("train/f1", fscore, epoch)
    tensorboard.add_scalar("train/acc", acc, epoch)

In [29]:
def val(epoch):
    start_time = time.time()
    print("")
    reset_metrics()
    model.eval()

    for i, (X, y) in enumerate(val_loader):
        X = X.cuda()
        y = y.cuda()

        logits = model(X)
        loss = loss_ce(logits, y)

        with torch.set_grad_enabled(False):
            pred = torch.softmax(logits, dim=1)
            pred_arg = torch.argmax(logits, dim=1)
            y_one_hot = F.one_hot(y, num_classes=10)

            acc = acc_fn(pred_arg, y).mean
            fscore = fscore_fn(pred, y_one_hot).mean
            avg_ce = avg(loss.item()).mean

            # logs
            print(val_form.format(
                "Validation: ",
                epoch + 1,
                int(100 * (i + 1) / len(val_loader)),
                "", avg_ce,
                "", acc, fscore,
                time.time() - start_time
            ), end="\r")

    tensorboard.add_scalar("val/Lce", avg_ce, epoch)
    tensorboard.add_scalar("val/f1", fscore, epoch)
    tensorboard.add_scalar("val/acc", acc, epoch)

    tensorboard.add_scalar("max/f1", maximum_fn("fscore", fscore), epoch )
    tensorboard.add_scalar("max/acc", maximum_fn("acc", acc), epoch )

    tensorboard.add_scalar("hyperparameters/learning_rate", get_lr(optimizer), epoch)

    checkpoint.step(acc)
    lr_scheduler.step()

In [30]:
print(header)

start_epoch = checkpoint.epoch_counter
end_epoch = args.nb_epoch

for e in range(start_epoch, args.nb_epoch):
    train(e)
    val(e)

tensorboard.flush()
tensorboard.close()

         Epoch  - %      - Losses:  ce     - metrics:  acc         | F1       - Time  

Training 1      - 100    -          1.7510 -           0.3530    | 0.2027   - 47.6553
[1;4mValidati 1      - 100    -          1.5187 -           0.4581    | 0.2465   - 4.7405[0m
 better performance: saving ...

Training 2      - 100    -          1.4810 -           0.4572    | 0.3454   - 3.7519
[1;4mValidati 2      - 100    -          1.3632 -           0.5102    | 0.4020   - 0.1475[0m
 better performance: saving ...

Training 3      - 100    -          1.2396 -           0.5687    | 0.4938   - 3.5014
[1;4mValidati 3      - 100    -          1.1689 -           0.5963    | 0.5325   - 0.1284[0m
 better performance: saving ...

Training 4      - 100    -          1.1013 -           0.6419    | 0.5916   - 3.5514
[1;4mValidati 4      - 100    -          1.6847 -           0.4850    | 0.4882   - 0.1397[0m
Training 5      - 100    -          0.9654 -           0.6860    | 0.6580   - 3.4296
[1;4mV

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪