# import

In [None]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.environ["MKL_NUM_THREADS"] = "2"
os.environ["NUMEXPR_NU M_THREADS"] = "2"
os.environ["OMP_NUM_THREADS"] = "2"
import numpy as np
import tqdm
import time

import torch
import torch.nn as nn
import torch.utils.data as data
import torch.nn.functional as F
from torch.optim.lr_scheduler import LambdaLR
from advertorch.attacks import GradientSignAttack
from torch.utils.tensorboard import SummaryWriter

from functools import reduce 


In [3]:
import sys
sys.path.append("../src/")

from datasetManager import DatasetManager
from generators import Generator
import signal_augmentations as sa 

# Utils

## Metrics

In [4]:
class Metrics:
    def __init__(self, epsilon=1e-10):
        self.value = 0
        self.accumulate_value = 0
        self.count = 0
        self.epsilon = epsilon
        
    def reset(self):
        self.accumulate_value = 0
        self.count = 0
        
    def __call__(self):
        self.count += 1

        
class BinaryAccuracy(Metrics):
    def __init__(self, epsilon=1e-10):
        Metrics.__init__(self, epsilon)
        
    def __call__(self, y_pred, y_true):
        super().__call__()
        
        with torch.set_grad_enabled(False):
            y_pred = (y_pred>0.5).float()
            correct = (y_pred == y_true).float().sum()
            self.value = correct/ (y_true.shape[0] * y_true.shape[1])
            
            self.accumulate_value += self.value
            return self.accumulate_value / self.count
        
        
class CategoricalAccuracy(Metrics):
    def __init__(self, epsilon=1e-10):
        Metrics.__init__(self, epsilon)
        
    def __call__(self, y_pred, y_true):
        super().__call__()
        
        with torch.set_grad_enabled(False):
            self.value = torch.mean((y_true == y_pred).float())
            self.accumulate_value += self.value

            return self.accumulate_value / self.count

        
class Ratio(Metrics):
    def __init__(self, epsilon=1e-10):
        Metrics.__init__(self, epsilon)
        
    def __call__(self, y_pred, y_adv_pred):
        super().__call__()
        
        results = zip(y_pred, y_adv_pred)
        results_bool = [int(r[0] != r[1]) for r in results]
        self.value = sum(results_bool) / len(results_bool) * 100
        self.accumulate_value += self.value
        
        return self.accumulate_value / self.count

In [5]:
import datetime
def get_datetime():
    now = datetime.datetime.now()
    return str(now)[:10] + "_" + str(now)[11:-7]

# Initialization

## set seeds

In [6]:
def reset_seed(seed=43):
    np.random.seed(seed)
    torch.manual_seed(seed)
reset_seed()

## Prepare GPU

In [7]:
# CUDA for PyTorch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
# cudnn.benchmark = True

# Model definition

## CNN original
https://arxiv.org/pdf/1608.04363.pdf

In [8]:
class ConvPoolReLU(nn.Sequential):
    def __init__(self, in_size, out_size, kernel_size, stride, padding,
                pool_kernel_size, pool_stride):
        super(ConvPoolReLU, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.MaxPool2d(kernel_size=pool_kernel_size, stride=pool_stride),
            nn.BatchNorm2d(out_size),
            nn.ReLU6(inplace=True),
        )
        
class ConvReLU(nn.Sequential):
    def __init__(self, in_size, out_size, kernel_size, stride, padding):
        super(ConvReLU, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.ReLU6(inplace=True),
        )

In [7]:
class cnn(nn.Module):
    def __init__(self):
        super(cnn, self).__init__()
        
        self.features = nn.Sequential(
            ConvPoolReLU(1, 24, 3, 1, 1, (4,2), (4,2)),
            ConvPoolReLU(24, 48, 3, 1, 1, (4,2), (4,2)),
            ConvPoolReLU(48, 48, 3, 1, 1, (4,2), (4,2)),
            ConvReLU(48, 48, 3, 1, 1),
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(1008, 10),
#             nn.ReLU(inplace=True),
#             nn.Dropout(0.5),
#             nn.Linear(64, 10),
        )
                
        
    def forward(self, x):
        x = x.view(-1, 1, *x.shape[1:])

        x = self.features(x)
        x = self.classifier(x)
        
        return x

## RNN

In [34]:
class ConvBNReLU(nn.Sequential):
    def __init__(self, in_size, out_size, conv_kernel_size, conv_stride, conv_padding):
        super(ConvBNReLU, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=conv_kernel_size, stride=conv_stride, padding=conv_padding),
            nn.BatchNorm2d(out_size),
            nn.ReLU(inplace=True),
        )

In [88]:
class crnn(nn.Module):
    def __init__(self):
        super(crnn, self).__init__()
        
        self.features = nn.Sequential(
            ConvBNReLU(1, 64, 3, 1, 1),
            nn.MaxPool2d(kernel_size=(4,2), stride=(4,2)),
            ConvBNReLU(64, 64, 3, 1, 1),
            nn.MaxPool2d(kernel_size=(4,2), stride=(4,2)),
            ConvBNReLU(64, 64, 3, 1, 1),
            nn.MaxPool2d(kernel_size=(4,1), stride=(4,1)),
        )
        
        self.rnn = nn.GRU(64, 64, num_layers=1, batch_first=True, bidirectional=True)

        self.strong = nn.Sequential(
            nn.Linear(128, 10),
        )
                
        
    def forward(self, x):
        x = x.view(-1, 1, *x.shape[1:])

        x = self.features(x)
        
        x = x.squeeze(dim=-2)
        x = x.permute(0, 2, 1)
        
        x, h = self.rnn(x)
        
        strong = self.strong(x)
        
        weak = strong.permute(0, 2, 1)
        weak = F.avg_pool1d(weak, kernel_size=weak.size()[2:])
#         max_pool2d(x, kernel_size=x.size()[2:])
        weak = weak.view(-1, weak.shape[1])
        
        
        return weak

## EfficientNet

In [42]:
class MultisampleDropout2d(nn.Module):
    """https://arxiv.org/pdf/1905.09788.pdf"""
    def __init__(self, ratio, nb_sample):
        super(MultisampleDropout2d, self).__init__()
        self.nb_sample = nb_sample
        
        self.dropouts = [nn.Dropout2d(ratio) for _ in range(nb_sample)]
        
    def forward(self, x):
        d = [dropout(x) for dropout in self.dropouts]
        return torch.mean(torch.stack(d, dim=0), dim=0)
    
class MultisampleDropout1d(nn.Module):
    """https://arxiv.org/pdf/1905.09788.pdf"""
    def __init__(self, ratio, nb_sample):
        super(MultisampleDropout1d, self).__init__()
        self.nb_sample = nb_sample
        
        self.dropouts = [nn.Dropout(ratio) for _ in range(nb_sample)]
        
    def forward(self, x):
        d = [dropout(x) for dropout in self.dropouts]
        return torch.mean(torch.stack(d, dim=0), dim=0)

In [26]:
class MBConv(nn.Module):
    def __init__(self, in_size, out_size, t, kernel_size, stride, padding):
        super(MBConv, self).__init__()
        expand_dim = in_size * t
        self.stride = stride

        self.conv = nn.Sequential(
            nn.Conv2d(in_size, expand_dim, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(expand_dim),
            nn.ReLU6(inplace=True),

            nn.Conv2d(expand_dim, expand_dim, kernel_size=kernel_size, stride=stride, padding=padding, groups=expand_dim),
            nn.BatchNorm2d(expand_dim),
            nn.ReLU6(inplace=True),

            nn.Conv2d(expand_dim, out_size, kernel_size=1, stride=1, padding=0),
            nn.BatchNorm2d(out_size),
            nn.ReLU6(inplace=True),
        )
    
    def forward(self, x):
        if self.stride == 1:
            return x + self.conv(x)
        return self.conv(x)

In [30]:
class EfficientNet(nn.Module):
    def __init__(self,
                 conv_input_dim: tuple = (64, 431),
                 conv_in_size: list = [1, 64, 64],
                 conv_out_size: list = [64, 64, 64],
                 t = [1, 6, 6],
                 s = [1, 2, 2],
                 n = [1, 2, 2],
                ):
        super(EfficientNet, self).__init__()
        self.i =0
        
        self.conv_input_dim = conv_input_dim
        self.conv_in_size = conv_in_size
        self.conv_out_size = conv_out_size
        self.t = t
        
        conv_layers = []
        for i in range(len(conv_in_size)):
            if i == 0:
                conv_layers.append(nn.Conv2d(conv_in_size[i], conv_out_size[i], 3, 1, 1))
                continue
            
            conv_layers.append( MBConv(conv_in_size[i], conv_out_size[i], t[i], 3, s[i], 1) )
            for j in range(n[i]-1):
                conv_layers.append( MBConv(conv_out_size[i], conv_out_size[i], t[i], 3, 1, 1) )
    
        self.features = nn.Sequential(*conv_layers)

        self.classifier = nn.Sequential(
            MultisampleDropout2d(0.2, 8),
            nn.Conv2d(self.conv_out_size[-1], 10, kernel_size=1, stride=1, padding=0),
#             nn.AdaptiveMaxPool2d((1, 1)),
        )
        
        
    def forward(self, x):
        x = x.view(-1, 1, *x.shape[1:])
#         x = x.view(-1, 1, self.conv_input_dim[0], self.conv_input_dim[1])

        x = self.features(x)
        x = self.classifier(x)
        
        x = F.avg_pool2d(x, kernel_size=x.size()[2:])
        x= x.view(-1, x.shape[1])
        
        return x


## CNN With dropout

In [8]:
class ConvBNReLUPool(nn.Sequential):
    def __init__(self, in_size, out_size, kernel_size, stride, padding,
                pool_kernel_size, pool_stride, dropout: float = 0.0):
        super(ConvBNReLUPool, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(out_size),
            nn.Dropout2d(dropout),
            nn.ReLU6(inplace=True),
            nn.MaxPool2d(kernel_size=pool_kernel_size, stride=pool_stride),
        )
        
class ConvReLU(nn.Sequential):
    def __init__(self, in_size, out_size, kernel_size, stride, padding):
        super(ConvReLU, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.ReLU6(inplace=True),
        )

In [9]:
class cnn_d(nn.Module):
    def __init__(self):
        super(cnn_d, self).__init__()
        
        self.features = nn.Sequential(
            ConvBNReLUPool(1, 32, 3, 1, 1, (4,2), (4,2), 0.0),
            ConvBNReLUPool(32, 64, 3, 1, 1, (4,2), (4,2), 0.3),
            ConvBNReLUPool(64, 64, 3, 1, 1, (4,2), (4,2), 0.3),
            ConvReLU(64, 64, 3, 1, 1),
        )

        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            nn.Linear(1344, 10),
#             nn.ReLU(inplace=True),
#             nn.Dropout(0.5),
#             nn.Linear(64, 10),
        )
                
        
    def forward(self, x):
        x = x.view(-1, 1, *x.shape[1:])

        x = self.features(x)
        x = self.classifier(x)
        
        return x

## CNN compound scaling

### Automatic find valid scaling

In [16]:
alpha = np.linspace(1, 2, 6)
beta = np.linspace(1, 2, 6)
gamma = np.linspace(1, 1, 1)

import itertools

valid_scaling = []
for a, b, g in itertools.product(alpha, beta, gamma):
    M = a * b**2 * g**2
    
    if M <= 4:
        valid_scaling.append((a, b, g))

In [17]:
valid_scaling

[(1.0, 1.0, 1.0),
 (1.0, 1.2, 1.0),
 (1.0, 1.4, 1.0),
 (1.0, 1.6, 1.0),
 (1.0, 1.8, 1.0),
 (1.0, 2.0, 1.0),
 (1.2, 1.0, 1.0),
 (1.2, 1.2, 1.0),
 (1.2, 1.4, 1.0),
 (1.2, 1.6, 1.0),
 (1.2, 1.8, 1.0),
 (1.4, 1.0, 1.0),
 (1.4, 1.2, 1.0),
 (1.4, 1.4, 1.0),
 (1.4, 1.6, 1.0),
 (1.6, 1.0, 1.0),
 (1.6, 1.2, 1.0),
 (1.6, 1.4, 1.0),
 (1.8, 1.0, 1.0),
 (1.8, 1.2, 1.0),
 (1.8, 1.4, 1.0),
 (2.0, 1.0, 1.0),
 (2.0, 1.2, 1.0),
 (2.0, 1.4, 1.0)]

In [18]:
class ConvBNReLUPool(nn.Sequential):
    def __init__(self, in_size, out_size, kernel_size, stride, padding,
                pool_kernel_size, pool_stride, dropout: float = 0.0):
        super(ConvBNReLUPool, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.BatchNorm2d(out_size),
            nn.Dropout2d(dropout),
            nn.ReLU6(inplace=True),
            nn.MaxPool2d(kernel_size=pool_kernel_size, stride=pool_stride),
        )
        
class ConvReLU(nn.Sequential):
    def __init__(self, in_size, out_size, kernel_size, stride, padding):
        super(ConvReLU, self).__init__(
            nn.Conv2d(in_size, out_size, kernel_size=kernel_size, stride=stride, padding=padding),
            nn.ReLU6(inplace=True),
        )

In [22]:
class ScalableCnn1(nn.Module):
    def __init__(self, compound_scales: tuple = (1, 1, 1)):
        super(ScalableCnn1, self).__init__()
        alpha, beta, gamma = compound_scales[0], compound_scales[1], compound_scales[2]
        
        initial_conv_inputs = [1, 32, 64, 64]
        initial_conv_outputs = [32, 64, 64, 64]
        initial_nb_conv = 4
        initial_dense_inputs = [1344]
        initial_dense_outputs = [10]
        initial_nb_dense = 1
        initial_resolution = (64, 173)
        
        # Apply compound scaling
        # depth ----
        scaled_nb_conv = np.floor(initial_nb_conv * alpha)
        scaled_nb_dense = np.floor(initial_nb_dense * alpha)
        
        if scaled_nb_conv != initial_nb_conv:  # Another conv layer must be created
            print("More conv layer must be created")
            gaps = np.array(initial_conv_outputs) - np.array(initial_conv_inputs) # average filter gap
            avg_gap = gaps.mean()
            
            while len(initial_conv_inputs) < scaled_nb_conv:
                initial_conv_outputs.append(int(np.floor(initial_conv_outputs[-1] + avg_gap)))
                initial_conv_inputs.append(initial_conv_outputs[-2])
                
            print("new conv layers:")
            print("inputs: ", initial_conv_inputs)
            print("ouputs: ", initial_conv_outputs)
            
        if scaled_nb_dense != initial_nb_dense:  # Another dense layer must be created
            print("More dense layer must be created")
            dense_list = np.linspace(initial_dense_inputs[0], initial_dense_outputs[-1], scaled_nb_dense+1)
            initial_dense_inputs = dense_list[:-1]
            initial_dense_outputs = dense_list[1:]
            
            print("new dense layers:")
            print("inputs: ", initial_dense_inputs)
            print("ouputs: ", initial_dense_outputs)
                
        # width ----
        scaled_conv_inputs = [int(np.floor(i * beta)) for i in initial_conv_inputs]
        scaled_conv_outputs = [int(np.floor(i * beta)) for i in initial_conv_outputs]
        scaled_dense_inputs = [int(np.floor(i * beta)) for i in initial_dense_inputs]
        scaled_dense_outputs = [int(np.floor(i * beta)) for i in initial_dense_outputs]
        
        # Check how many conv with pooling layer can be used
        nb_max_pooling = np.min([np.log2(initial_resolution[0]), int(np.log2(initial_resolution[1]))])
        nb_model_pooling = len(scaled_conv_inputs)
        
        if nb_model_pooling > nb_max_pooling:
            nb_model_pooling = nb_max_pooling
        
        # fixe initial and final conv & linear input
        scaled_conv_inputs[0] = 1
        scaled_dense_inputs[0] = self.calc_initial_dense_input(initial_resolution, nb_model_pooling, scaled_conv_outputs)
        scaled_dense_outputs[-1] = 10
        
        # ======== Create the convolution part ========
        features = []
        
        # Create the layers
        for idx, (inp, out) in enumerate(zip(scaled_conv_inputs, scaled_conv_outputs)):
            if idx < nb_model_pooling:
                dropout = 0.3 if idx != 0 else 0.0
                features.append(ConvBNReLUPool( inp, out, 3, 1, 1, (2, 2), (2, 2), dropout))
            
            else:
                features.append(ConvReLU(inp, out, 3, 1, 1))
            
        self.features = nn.Sequential(
            *features,
        )

        # ======== Craete the classifier part ========
        linears = []
        for inp, out in zip(scaled_dense_inputs[:-1], scaled_dense_outputs[:-1]):
            print(inp, out)
            linears.append(nn.Linear(inp, out))
            linears.append(nn.ReLU6(inplace=True))
            
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Dropout(0.5),
            *linears,
            nn.Linear(scaled_dense_inputs[-1], scaled_dense_outputs[-1])
        )
                      
    def forward(self, x):
        x = x.view(-1, 1, *x.shape[1:])

        x = self.features(x)
        x = self.classifier(x)
        
        return x
    
    def calc_initial_dense_input(self, initial_resolution, nb_model_pooling, conv_outputs):
        dim1 = initial_resolution[0]
        dim2 = initial_resolution[1]
        
        for i in range(int(nb_model_pooling)):
            dim1 = dim1 // 2
            dim2 = dim2 // 2
            
        return dim1 * dim2 * conv_outputs[-1]

# ======== Training ========

## Prep model

In [27]:
torch.cuda.empty_cache()

# ---- Efficient net ----
# model_func = EfficientNet
# m1 = EfficientNet(
#     conv_in_size= [1, 8, 16, 24, 40],
#     conv_out_size= [8, 16, 24, 40, 40],
#     t = [1, 6, 6, 4, 6, 6],
#     s = [1, 2, 2, 2, 2, 1],
#     n = [1, 3, 3, 1, 1, 1]
# )

# ---- Cnn with dropout ----
# model_func = cnn_d
# m1 = model_func()

# ---- cnn ----
# m1 = cnn()

# ---- ScallableCNN ----
model_func = ScalableCnn1
m1 = model_func(valid_scaling[14])

# Just trying the different model generated
# model_func = ScalableCnn1
# for compound_scaler in valid_scaling:
#     m1 = model_func(compound_scaler)

#     #m1 = m1.cuda()
#     print(m1.features)
#     print(m1.classifier)
    
#     from torchsummaryX import summary
#     input_tensor = torch.zeros((100, 64, 173), dtype=torch.float)
#     #input_tensor = input_tensor.cuda()

#     s = summary(m1, input_tensor)

More conv layer must be created
new conv layers:
inputs:  [1, 32, 64, 64, 64]
ouputs:  [32, 64, 64, 64, 79]


In [28]:
from torchsummaryX import summary
input_tensor = torch.zeros((100, 64, 173), dtype=torch.float)
s = summary(m1, input_tensor)

                                 Kernel Shape        Output Shape    Params  \
Layer                                                                         
0_features.0.Conv2d_0           [1, 51, 3, 3]  [100, 51, 64, 173]     510.0   
1_features.0.BatchNorm2d_1               [51]  [100, 51, 64, 173]     102.0   
2_features.0.Dropout2d_2                    -  [100, 51, 64, 173]         -   
3_features.0.ReLU6_3                        -  [100, 51, 64, 173]         -   
4_features.0.MaxPool2d_4                    -   [100, 51, 32, 86]         -   
5_features.1.Conv2d_0         [51, 102, 3, 3]  [100, 102, 32, 86]    46.92k   
6_features.1.BatchNorm2d_1              [102]  [100, 102, 32, 86]     204.0   
7_features.1.Dropout2d_2                    -  [100, 102, 32, 86]         -   
8_features.1.ReLU6_3                        -  [100, 102, 32, 86]         -   
9_features.1.MaxPool2d_4                    -  [100, 102, 16, 43]         -   
10_features.2.Conv2d_0       [102, 102, 3, 3]  [100,

## Prep data

In [29]:
audio_root = "../dataset/audio"
metadata_root = "../dataset/metadata"

dataset = DatasetManager(metadata_root, audio_root, verbose=2)

HBox(children=(IntProgress(value=0, max=9), HTML(value='')))




HBox(children=(IntProgress(value=0, max=1), HTML(value='')))




## Prep training

**using the combination dictionary, several models will be tested. Procedure to follow:**
- create feature extract function using *extract feature helper*
- change feature extract function from the dataset_manager
- if extract parameters change, invalide the validation cache
- create the model using the parameters
- define criterion and optimizer
- generate loader
- create tensorboard log name
- perform training

In [30]:
# create model
torch.cuda.empty_cache()

m1 = model_func()
m1.cuda()

# loss and optimizer
criterion_bce = nn.CrossEntropyLoss(reduction="mean")

# optimizer = torch.optim.SGD(
#     m1.parameters(),
#     weight_decay=1e-3,
#     lr=0.05
# )
optimizer = torch.optim.AdamW(m1.parameters(), weight_decay=1e-3)

# Augmentation to use
augments = []

# train and val loaders
train_dataset = Generator(dataset, augments=augments)

x, y = train_dataset.validation
x = torch.from_numpy(x)
y = torch.from_numpy(y)
val_dataset = torch.utils.data.TensorDataset(x, y)

HBox(children=(IntProgress(value=0, max=837), HTML(value='')))




In [31]:
# training parameters
nb_epoch = 200
batch_size = 64
nb_batch = len(train_dataset) // batch_size

training_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=4)

# scheduler
lr_lambda = lambda epoch: 0.05 * (np.cos(np.pi * epoch / nb_epoch) + 1)
lr_scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda)
callbacks = [lr_scheduler]
callbacks = []

# tensorboard
title = "%s_%s_Cosd-lr_sgd-0.01lr-wd0.001_%de_no_augment" % ( get_datetime(), model_func.__name__, nb_epoch )
tensorboard = SummaryWriter(log_dir="tensorboard/%s" % title, comment=model_func.__name__)

## training

In [32]:
acc_func = CategoricalAccuracy()

for epoch in tqdm.tqdm_notebook(range(nb_epoch)):
    start_time = time.time()
    print("")
    
    acc_func.reset()

    m1.train()

    for i, (X, y) in enumerate(training_loader):        
        # Transfer to GPU
        X = X.cuda().float()
        y = y.cuda().long()
        
        # predict
        logits = m1(X)

        weak_loss = criterion_bce(logits, y)

        total_loss = weak_loss

        # calc metrics
#         y_pred = torch.log_softmax(logits, dim=1)
        _, y_pred = torch.max(logits, 1)
        acc = acc_func(y_pred, y)

        # ======== back propagation ========
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        # ======== history ========
        print("Epoch {}, {:d}% \t ce: {:.4f} - acc: {:.4f} - took: {:.2f}s".format(
            epoch+1,
            int(100 * (i+1) / nb_batch),
            total_loss.item(),
            acc,
            time.time() - start_time
        ),end="\r")

    # using tensorboard to monitor loss and acc
    tensorboard.add_scalar('train/ce', total_loss.item(), epoch)
    tensorboard.add_scalar("train/acc", 100. * acc, epoch )

    # Validation
    with torch.set_grad_enabled(False):
        # reset metrics
        acc_func.reset()
        m1.eval()

        for X_val, y_val in val_loader:
            # Transfer to GPU
            X_val = X_val.cuda().float()
            y_val = y_val.cuda().long()


#             y_weak_val_pred, _ = model(X_val)
            logits = m1(X_val)

            # calc loss
            weak_loss_val = criterion_bce(logits, y_val)

            # metrics
#             y_val_pred =torch.log_softmax(logits, dim=1)
            _, y_val_pred = torch.max(logits, 1)
            acc_val = acc_func(y_val_pred, y_val)

            #Print statistics
            print("Epoch {}, {:d}% \t ce: {:.4f} - acc: {:.4f} - ce val: {:.4f} - acc val: {:.4f} - took: {:.2f}s".format(
                epoch+1,
                int(100 * (i+1) / nb_batch),
                total_loss.item(),
                acc,
                weak_loss_val.item(),
                acc_val,
                time.time() - start_time
            ),end="\r")

        # using tensorboard to monitor loss and acc
        tensorboard.add_scalar('validation/ce', weak_loss_val.item(), epoch)
        tensorboard.add_scalar("validation/acc", 100. * acc_val, epoch )

    for callback in callbacks:
        callback.step()

HBox(children=(IntProgress(value=0, max=200), HTML(value='')))


Epoch 1, 100% 	 ce: 2.0356 - acc: 0.2333 - ce val: 1.7893 - acc val: 0.4147 - took: 11.83s
Epoch 2, 100% 	 ce: 2.0286 - acc: 0.3437 - ce val: 1.7250 - acc val: 0.4775 - took: 11.53s
Epoch 3, 100% 	 ce: 1.5805 - acc: 0.4139 - ce val: 2.2310 - acc val: 0.5310 - took: 11.79s
Epoch 4, 100% 	 ce: 1.2979 - acc: 0.4673 - ce val: 1.0758 - acc val: 0.5373 - took: 11.28s
Epoch 5, 100% 	 ce: 1.5698 - acc: 0.5062 - ce val: 1.0975 - acc val: 0.5554 - took: 11.76s
Epoch 6, 100% 	 ce: 1.5627 - acc: 0.5334 - ce val: 1.6052 - acc val: 0.5386 - took: 11.64s
Epoch 7, 100% 	 ce: 1.1646 - acc: 0.5644 - ce val: 1.0828 - acc val: 0.5875 - took: 11.76s
Epoch 8, 100% 	 ce: 0.8448 - acc: 0.5762 - ce val: 1.7150 - acc val: 0.5810 - took: 11.47s
Epoch 9, 100% 	 ce: 0.8625 - acc: 0.6010 - ce val: 0.4507 - acc val: 0.6453 - took: 11.84s
Epoch 10, 100% 	 ce: 1.1111 - acc: 0.6195 - ce val: 2.1911 - acc val: 0.5926 - took: 11.51s
Epoch 11, 100% 	 ce: 1.0652 - acc: 0.6317 - ce val: 0.4785 - acc val: 0.6699 - took: 11.

# ♫♪.ılılıll|̲̅̅●̲̅̅|̲̅̅=̲̅̅|̲̅̅●̲̅̅|llılılı.♫♪