In [1]:
#!/usr/bin/env python
"""Train a CNN for Google speech commands."""

__author__ = 'Yuan Xu, Erdene-Ochir Tuguldur'

import argparse
import time

from tqdm import *

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler

import torchvision
from torchvision.transforms import *

from tensorboardX import SummaryWriter

import models
from datasets import *
from transforms import *
from mixup import *

In [2]:
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo

In [3]:
def conv1x1(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False, padding=1)

class depthwise_separable_conv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, padding=0, stride=1, bias=False):
        super(depthwise_separable_conv, self).__init__()
        self.depthwise = nn.Conv2d(in_planes, in_planes, kernel_size=kernel_size, stride=stride, groups=in_planes, bias=bias, padding=1)
        self.pointwise = nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=bias)

    def forward(self, x):
        out = self.depthwise(x)
        out = self.pointwise(out)
        return out

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, inplanes, planes, stride=1, conv3x1_sample=None, conv1x3_sample=None):
        super(BasicBlock, self).__init__()
        #self.piece = 4
        #adaptive_plane = round((planes-inplanes)/self.piece)
        self.conv1 = depthwise_separable_conv(inplanes, planes, 3, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = depthwise_separable_conv(planes, planes, 3, 1)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3x1_sample = conv3x1_sample
        self.conv1x3_sample = conv1x3_sample
        #print(self.downsample)
        self.stride = stride

    def forward(self, x):
        #print(x.shape)
        residual = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        #print("out {}".format(out.shape))
        if self.conv1x3_sample is not None and self.conv3x1_sample is not None:
            N, C, W, H = out.shape
            time_sample = self.conv3x1_sample(x)
            frequency_sample = self.conv1x3_sample(x)
            residual = torch.zeros(N, C, W, H).cuda()
            residual[:,:,:int(W/2),:int(H/2)] = time_sample
            residual[:,:,int(W/2):,int(H/2):] = frequency_sample
        
        #print("res {}".format(residual.shape))
        out += residual
        out = self.relu(out)
        
        return out

class ResNet(nn.Module):

    def __init__(self, block, layers, num_classes=1000, in_channels=2):
        self.inplanes = 16
        super(ResNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, self.inplanes, kernel_size=3, stride=2, bias=False, padding=3)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1)
        self.layer1 = self._make_layer(block, 24, layers[0], stride=2)
        self.layer2 = self._make_layer(block, 32, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 48, layers[2], stride=2)
        #self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(48, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

    def _make_layer(self, block, planes, blocks, stride=1):
        h_sample, v_sample = None, None
        if stride != 1 or self.inplanes != planes * block.expansion:
            h_sample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=(2, 1), stride=stride, bias=False, padding=(0,0)),
                nn.BatchNorm2d(planes * block.expansion),
                nn.ReLU(inplace=True)
            )
            v_sample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes * block.expansion,
                          kernel_size=(1, 2), stride=stride, bias=False, padding=(0,0)),
                nn.BatchNorm2d(planes * block.expansion),
                nn.ReLU(inplace=True)
            )
        #print(downsample)
        layers = []
        layers.append(block(self.inplanes, planes, stride, h_sample, v_sample))
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        #x = self.layer4(x)
        #print(x.shape)
        x = self.avgpool(x)
        #print(x.shape)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [4]:
model = ResNet(BasicBlock, [2, 2, 2], num_classes=len(CLASSES), in_channels=1)
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(model)

30620

In [5]:
class Arg():
    def __init__(self):
        self.train_dataset="/home/cilab/LabMembers/DJ/sr_dataset/speech_command/train"
        self.valid_dataset="/home/cilab/LabMembers/DJ/sr_dataset/speech_command/valid"
        self.background_noise="/home/cilab/LabMembers/DJ/sr_dataset/speech_command/train/_background_noise_"
        self.comment=""
        self.batch_size=64
        self.dataload_workers_nums=6
        self.weight_decay=1e-2
        self.optim='sgd'
        self.learning_rate=0.01
        self.lr_scheduler='plateau'
        self.lr_scheduler_patience=5
        self.lr_scheduler_step_size=50
        self.lr_scheduler_gamma=0.1
        self.max_epochs=70
        self.resume=None
        self.model="resnet18-light-Unet"
        self.input="mel40"
        self.mixup=True
args = Arg()

In [6]:
use_gpu = torch.cuda.is_available()
print('use_gpu', use_gpu)
if use_gpu:
    torch.backends.cudnn.benchmark = True

n_mels = 32
if args.input == 'mel40':
    n_mels = 40

data_aug_transform = Compose([ChangeAmplitude(), ChangeSpeedAndPitchAudio(), FixAudioLength(), ToSTFT(), StretchAudioOnSTFT(), TimeshiftAudioOnSTFT(), FixSTFTDimension()])
bg_dataset = BackgroundNoiseDataset(args.background_noise, data_aug_transform)
add_bg_noise = AddBackgroundNoiseOnSTFT(bg_dataset)
train_feature_transform = Compose([ToMelSpectrogramFromSTFT(n_mels=n_mels), DeleteSTFT(), ToTensor('mel_spectrogram', 'input')])
train_dataset = SpeechCommandsDataset(args.train_dataset,
                                Compose([LoadAudio(),
                                         data_aug_transform,
                                         add_bg_noise,
                                         train_feature_transform]))

valid_feature_transform = Compose([ToMelSpectrogram(n_mels=n_mels), ToTensor('mel_spectrogram', 'input')])
valid_dataset = SpeechCommandsDataset(args.valid_dataset,
                                Compose([LoadAudio(),
                                         FixAudioLength(),
                                         valid_feature_transform]))

weights = train_dataset.make_weights_for_balanced_classes()
sampler = WeightedRandomSampler(weights, len(weights))
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=sampler,
                              pin_memory=use_gpu, num_workers=args.dataload_workers_nums)
valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False,
                              pin_memory=use_gpu, num_workers=args.dataload_workers_nums)

use_gpu True


In [7]:
# a name used to save checkpoints etc.
full_name = '%s_%s_%s_bs%d_lr%.1e_wd%.1e' % (args.model, args.optim, args.lr_scheduler, args.batch_size, args.learning_rate, args.weight_decay)
if args.comment:
    full_name = '%s_%s' % (full_name, args.comment)

#model = models.create_model(model_name=args.model, num_classes=len(CLASSES), in_channels=1)
model = ResNet(BasicBlock, [2, 2, 2], num_classes=len(CLASSES), in_channels=1)
print(model)
if use_gpu:
    model = torch.nn.DataParallel(model).cuda()

criterion = torch.nn.CrossEntropyLoss()

if args.optim == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=args.weight_decay)
else:
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)

start_timestamp = int(time.time()*1000)
start_epoch = 0
best_accuracy = 0
best_loss = 1e100
global_step = 0

if args.resume:
    print("resuming a checkpoint '%s'" % args.resume)
    checkpoint = torch.load(args.resume)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    optimizer.load_state_dict(checkpoint['optimizer'])

    best_accuracy = checkpoint.get('accuracy', best_accuracy)
    best_loss = checkpoint.get('loss', best_loss)
    start_epoch = checkpoint.get('epoch', start_epoch)
    global_step = checkpoint.get('step', global_step)

    del checkpoint  # reduce memory

if args.lr_scheduler == 'plateau':
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=args.lr_scheduler_patience, factor=args.lr_scheduler_gamma)
else:
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_scheduler_step_size, gamma=args.lr_scheduler_gamma, last_epoch=start_epoch-1)

ResNet(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=1, padding=0, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): depthwise_separable_conv(
        (depthwise): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
        (pointwise): Conv2d(16, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): depthwise_separable_conv(
        (depthwise): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=24, bias=False)
        (pointwise): Conv2d(24, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (bn2): BatchNorm2d(24, eps=1e-05, mom

In [8]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

count_parameters(model)

30620

In [9]:
def get_lr():
    return optimizer.param_groups[0]['lr']

writer = SummaryWriter(comment=('_speech_commands_' + full_name))

def train(epoch):
    global global_step

    print("epoch %3d with lr=%.02e" % (epoch, get_lr()))
    phase = 'train'
    writer.add_scalar('%s/learning_rate' % phase,  get_lr(), epoch)

    model.train()  # Set model to training mode

    running_loss = 0.0
    it = 0
    correct = 0
    total = 0

    pbar = tqdm(train_dataloader, unit="audios", unit_scale=train_dataloader.batch_size)
    for batch in pbar:
        inputs = batch['input']
        inputs = torch.unsqueeze(inputs, 1)
        targets = batch['target']

        if args.mixup:
            inputs, targets = mixup(inputs, targets, num_classes=len(CLASSES))

        inputs = Variable(inputs, requires_grad=True)
        targets = Variable(targets, requires_grad=False)
        if use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda(async=True)

        # forward/backward
        outputs = model(inputs)
        if args.mixup:
            loss = mixup_cross_entropy_loss(outputs, targets)
        else:
            loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # statistics
        it += 1
        global_step += 1
        running_loss += loss.item()
        pred = outputs.data.max(1, keepdim=True)[1]
        if args.mixup:
            targets = batch['target']
            targets = Variable(targets, requires_grad=False).cuda(async=True)
        correct += pred.eq(targets.data.view_as(pred)).sum()
        total += targets.size(0)

        writer.add_scalar('%s/loss' % phase, loss.item(), global_step)

        # update the progress bar
        pbar.set_postfix({
            'loss': "%.05f" % (running_loss / it),
            'acc': "%.02f%%" % (100*correct/total)
        })

    accuracy = correct/total
    epoch_loss = running_loss / it
    writer.add_scalar('%s/accuracy' % phase, 100*accuracy, epoch)
    writer.add_scalar('%s/epoch_loss' % phase, epoch_loss, epoch)

In [10]:
def valid(epoch):
    global best_accuracy, best_loss, global_step

    phase = 'valid'
    model.eval()  # Set model to evaluate mode

    running_loss = 0.0
    it = 0
    correct = 0
    total = 0

    pbar = tqdm(valid_dataloader, unit="audios", unit_scale=valid_dataloader.batch_size)
    for batch in pbar:
        inputs = batch['input']
        inputs = torch.unsqueeze(inputs, 1)
        targets = batch['target']

        inputs = Variable(inputs, volatile = True)
        targets = Variable(targets, requires_grad=False)

        if use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda(async=True)

        # forward
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # statistics
        it += 1
        global_step += 1
        running_loss += loss.item()
        pred = outputs.data.max(1, keepdim=True)[1]
        correct += pred.eq(targets.data.view_as(pred)).sum()
        total += targets.size(0)

        writer.add_scalar('%s/loss' % phase, loss.item(), global_step)

        # update the progress bar
        pbar.set_postfix({
            'loss': "%.05f" % (running_loss / it),
            'acc': "%.02f%%" % (100*correct/total)
        })

    accuracy = 100*correct/total
    epoch_loss = running_loss / it
    writer.add_scalar('%s/accuracy' % phase, accuracy, epoch)
    writer.add_scalar('%s/epoch_loss' % phase, epoch_loss, epoch)
    checkpoint = {
        'epoch': epoch,
        'step': global_step,
        'state_dict': model.state_dict(),
        'loss': epoch_loss,
        'accuracy': accuracy,
        'optimizer' : optimizer.state_dict(),
    }
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(checkpoint, 'checkpoints/best-acc-%s.pth' % full_name)
        torch.save(model, '%d-%s-best-acc.pth' % (start_timestamp, full_name))
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        torch.save(checkpoint, 'checkpoints/best-loss-%s.pth' % full_name)
        torch.save(model, '%d-%s-best-loss.pth' % (start_timestamp, full_name))
    torch.save(model, './%s.pth'%(full_name))
    #torch.save(checkpoint, 'checkpoints/Resnet18.pth')
    del checkpoint  # reduce memory

    return epoch_loss

In [11]:
print("training %s for Google speech commands..." % args.model)
since = time.time()
for epoch in range(start_epoch, args.max_epochs):
    if args.lr_scheduler == 'step':
        lr_scheduler.step()

    train(epoch)
    epoch_loss = valid(epoch)

    if args.lr_scheduler == 'plateau':
        print(type(epoch_loss))
        lr_scheduler.step(metrics=epoch_loss)

    time_elapsed = time.time() - since
    time_str = 'total time elapsed: {:.0f}h {:.0f}m {:.0f}s '.format(time_elapsed // 3600, time_elapsed % 3600 // 60, time_elapsed % 60)
    print("%s, best accuracy: %.02f%%, best loss %f" % (time_str, best_accuracy, best_loss))
    with open('./train_res18-light-unet.log', 'a+') as f:
        f.write("%s, epoch: %s, best accuracy: %.02f%%, best loss %f\n" % (time_str, epoch,best_accuracy, best_loss))
print("finished")

  0%|          | 0/56256 [00:00<?, ?audios/s]

training resnet18-light-Unet for Google speech commands...
epoch   0 with lr=1.00e-02


100%|██████████| 56256/56256 [05:07<00:00, 183.23audios/s, loss=2.32313, acc=16.00%]
100%|██████████| 7488/7488 [00:18<00:00, 396.42audios/s, loss=2.12086, acc=19.00%]
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 5m 26s , best accuracy: 19.00%, best loss 2.120865
epoch   1 with lr=1.00e-02


100%|██████████| 56256/56256 [05:58<00:00, 157.11audios/s, loss=2.09408, acc=24.00%]
100%|██████████| 7488/7488 [00:19<00:00, 389.49audios/s, loss=2.02736, acc=18.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 11m 43s , best accuracy: 19.00%, best loss 2.027361
epoch   2 with lr=1.00e-02


100%|██████████| 56256/56256 [06:59<00:00, 134.11audios/s, loss=1.94998, acc=30.00%]
100%|██████████| 7488/7488 [00:20<00:00, 371.84audios/s, loss=2.65616, acc=17.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 19m 3s , best accuracy: 19.00%, best loss 2.027361
epoch   3 with lr=1.00e-02


100%|██████████| 56256/56256 [06:56<00:00, 135.09audios/s, loss=1.87540, acc=33.00%]
100%|██████████| 7488/7488 [00:19<00:00, 377.64audios/s, loss=1.41501, acc=42.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 26m 19s , best accuracy: 42.00%, best loss 1.415015
epoch   4 with lr=1.00e-02


100%|██████████| 56256/56256 [07:00<00:00, 133.65audios/s, loss=1.84082, acc=35.00%]
100%|██████████| 7488/7488 [00:18<00:00, 396.23audios/s, loss=1.53026, acc=40.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 33m 39s , best accuracy: 42.00%, best loss 1.415015
epoch   5 with lr=1.00e-02


100%|██████████| 56256/56256 [07:28<00:00, 125.43audios/s, loss=1.81066, acc=35.00%]
100%|██████████| 7488/7488 [00:22<00:00, 327.79audios/s, loss=1.66694, acc=39.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 41m 31s , best accuracy: 42.00%, best loss 1.415015
epoch   6 with lr=1.00e-02


100%|██████████| 56256/56256 [09:26<00:00, 99.36audios/s, loss=1.80472, acc=36.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 295.90audios/s, loss=1.94320, acc=30.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 0h 51m 22s , best accuracy: 42.00%, best loss 1.415015
epoch   7 with lr=1.00e-02


100%|██████████| 56256/56256 [09:25<00:00, 99.50audios/s, loss=1.79246, acc=37.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.81audios/s, loss=1.53635, acc=41.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 1h 1m 13s , best accuracy: 42.00%, best loss 1.415015
epoch   8 with lr=1.00e-02


100%|██████████| 56256/56256 [09:25<00:00, 99.47audios/s, loss=1.78620, acc=36.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 285.88audios/s, loss=1.44576, acc=43.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 1h 11m 5s , best accuracy: 43.00%, best loss 1.415015
epoch   9 with lr=1.00e-02


100%|██████████| 56256/56256 [09:25<00:00, 99.41audios/s, loss=1.78023, acc=37.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 279.29audios/s, loss=1.53527, acc=45.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 1h 20m 58s , best accuracy: 45.00%, best loss 1.415015
epoch  10 with lr=1.00e-03


100%|██████████| 56256/56256 [09:25<00:00, 99.50audios/s, loss=1.66138, acc=41.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 294.93audios/s, loss=0.79277, acc=79.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 1h 30m 49s , best accuracy: 79.00%, best loss 0.792770
epoch  11 with lr=1.00e-03


100%|██████████| 56256/56256 [09:21<00:00, 100.10audios/s, loss=1.63608, acc=42.00%]
100%|██████████| 7488/7488 [00:26<00:00, 285.08audios/s, loss=0.86893, acc=76.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 1h 40m 37s , best accuracy: 79.00%, best loss 0.792770
epoch  12 with lr=1.00e-03


100%|██████████| 56256/56256 [09:25<00:00, 99.44audios/s, loss=1.62486, acc=42.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 290.03audios/s, loss=0.84112, acc=77.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 1h 50m 28s , best accuracy: 79.00%, best loss 0.792770
epoch  13 with lr=1.00e-03


100%|██████████| 56256/56256 [09:23<00:00, 99.78audios/s, loss=1.60990, acc=42.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 284.81audios/s, loss=0.81546, acc=77.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 2h 0m 19s , best accuracy: 79.00%, best loss 0.792770
epoch  14 with lr=1.00e-03


100%|██████████| 56256/56256 [09:29<00:00, 98.73audios/s, loss=1.60310, acc=43.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 296.62audios/s, loss=0.95609, acc=72.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 2h 10m 14s , best accuracy: 79.00%, best loss 0.792770
epoch  15 with lr=1.00e-03


100%|██████████| 56256/56256 [09:26<00:00, 99.29audios/s, loss=1.60105, acc=43.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 286.36audios/s, loss=0.78453, acc=78.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 2h 20m 6s , best accuracy: 79.00%, best loss 0.784531
epoch  16 with lr=1.00e-03


100%|██████████| 56256/56256 [09:30<00:00, 98.67audios/s, loss=1.59893, acc=43.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.76audios/s, loss=0.59175, acc=85.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 2h 30m 2s , best accuracy: 85.00%, best loss 0.591752
epoch  17 with lr=1.00e-03


100%|██████████| 56256/56256 [09:38<00:00, 97.29audios/s, loss=1.59000, acc=43.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 278.63audios/s, loss=0.92002, acc=71.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 2h 40m 7s , best accuracy: 85.00%, best loss 0.591752
epoch  18 with lr=1.00e-03


100%|██████████| 56256/56256 [09:56<00:00, 94.33audios/s, loss=1.58634, acc=43.00%] 
100%|██████████| 7488/7488 [00:27<00:00, 277.27audios/s, loss=1.19469, acc=61.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 2h 50m 31s , best accuracy: 85.00%, best loss 0.591752
epoch  19 with lr=1.00e-03


100%|██████████| 56256/56256 [09:53<00:00, 94.76audios/s, loss=1.58167, acc=43.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 279.18audios/s, loss=0.80780, acc=77.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 3h 0m 51s , best accuracy: 85.00%, best loss 0.591752
epoch  20 with lr=1.00e-03


100%|██████████| 56256/56256 [09:58<00:00, 94.03audios/s, loss=1.57632, acc=43.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 284.43audios/s, loss=0.67461, acc=81.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 3h 11m 16s , best accuracy: 85.00%, best loss 0.591752
epoch  21 with lr=1.00e-03


100%|██████████| 56256/56256 [09:56<00:00, 94.39audios/s, loss=1.56833, acc=44.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 284.52audios/s, loss=0.95650, acc=73.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 3h 21m 38s , best accuracy: 85.00%, best loss 0.591752
epoch  22 with lr=1.00e-03


100%|██████████| 56256/56256 [09:52<00:00, 94.89audios/s, loss=1.56916, acc=44.00%] 
100%|██████████| 7488/7488 [00:27<00:00, 276.38audios/s, loss=0.65828, acc=84.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 3h 31m 58s , best accuracy: 85.00%, best loss 0.591752
epoch  23 with lr=1.00e-04


100%|██████████| 56256/56256 [09:55<00:00, 94.41audios/s, loss=1.52538, acc=45.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 278.61audios/s, loss=0.56645, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 3h 42m 21s , best accuracy: 86.00%, best loss 0.566450
epoch  24 with lr=1.00e-04


100%|██████████| 56256/56256 [09:44<00:00, 96.30audios/s, loss=1.51593, acc=45.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 296.21audios/s, loss=0.57170, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 3h 52m 31s , best accuracy: 86.00%, best loss 0.566450
epoch  25 with lr=1.00e-04


100%|██████████| 56256/56256 [09:37<00:00, 97.44audios/s, loss=1.51179, acc=45.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 293.36audios/s, loss=0.51606, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 4h 2m 34s , best accuracy: 87.00%, best loss 0.516059
epoch  26 with lr=1.00e-04


100%|██████████| 56256/56256 [09:26<00:00, 99.30audios/s, loss=1.50634, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 289.24audios/s, loss=0.56372, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 4h 12m 26s , best accuracy: 87.00%, best loss 0.516059
epoch  27 with lr=1.00e-04


100%|██████████| 56256/56256 [09:27<00:00, 99.17audios/s, loss=1.50546, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 283.03audios/s, loss=0.50194, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 4h 22m 20s , best accuracy: 87.00%, best loss 0.501935
epoch  28 with lr=1.00e-04


100%|██████████| 56256/56256 [09:27<00:00, 99.10audios/s, loss=1.50595, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 294.29audios/s, loss=0.54594, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 4h 32m 13s , best accuracy: 87.00%, best loss 0.501935
epoch  29 with lr=1.00e-04


100%|██████████| 56256/56256 [09:28<00:00, 98.90audios/s, loss=1.50269, acc=45.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 296.90audios/s, loss=0.52359, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 4h 42m 7s , best accuracy: 87.00%, best loss 0.501935
epoch  30 with lr=1.00e-04


100%|██████████| 56256/56256 [09:28<00:00, 99.00audios/s, loss=1.49903, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 291.92audios/s, loss=0.47436, acc=88.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 4h 52m 1s , best accuracy: 88.00%, best loss 0.474357
epoch  31 with lr=1.00e-04


100%|██████████| 56256/56256 [09:31<00:00, 98.40audios/s, loss=1.50119, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 288.41audios/s, loss=0.47378, acc=88.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 5h 1m 59s , best accuracy: 88.00%, best loss 0.473783
epoch  32 with lr=1.00e-04


100%|██████████| 56256/56256 [09:27<00:00, 99.21audios/s, loss=1.50163, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 291.46audios/s, loss=0.54936, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 5h 11m 52s , best accuracy: 88.00%, best loss 0.473783
epoch  33 with lr=1.00e-04


100%|██████████| 56256/56256 [09:28<00:00, 99.03audios/s, loss=1.50237, acc=45.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 287.81audios/s, loss=0.56956, acc=85.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 5h 21m 46s , best accuracy: 88.00%, best loss 0.473783
epoch  34 with lr=1.00e-04


100%|██████████| 56256/56256 [09:25<00:00, 99.55audios/s, loss=1.49391, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 285.71audios/s, loss=0.58050, acc=84.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 5h 31m 37s , best accuracy: 88.00%, best loss 0.473783
epoch  35 with lr=1.00e-04


100%|██████████| 56256/56256 [09:33<00:00, 98.12audios/s, loss=1.50026, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 291.93audios/s, loss=0.59410, acc=85.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 5h 41m 36s , best accuracy: 88.00%, best loss 0.473783
epoch  36 with lr=1.00e-04


100%|██████████| 56256/56256 [09:25<00:00, 99.45audios/s, loss=1.49952, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 289.10audios/s, loss=0.55284, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 5h 51m 28s , best accuracy: 88.00%, best loss 0.473783
epoch  37 with lr=1.00e-04


100%|██████████| 56256/56256 [09:24<00:00, 99.67audios/s, loss=1.49348, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.41audios/s, loss=0.52669, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 6h 1m 18s , best accuracy: 88.00%, best loss 0.473783
epoch  38 with lr=1.00e-05


100%|██████████| 56256/56256 [09:25<00:00, 99.51audios/s, loss=1.48526, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.52audios/s, loss=0.52531, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 6h 11m 9s , best accuracy: 88.00%, best loss 0.473783
epoch  39 with lr=1.00e-05


100%|██████████| 56256/56256 [09:28<00:00, 98.98audios/s, loss=1.48209, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 294.47audios/s, loss=0.55853, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 6h 21m 3s , best accuracy: 88.00%, best loss 0.473783
epoch  40 with lr=1.00e-05


100%|██████████| 56256/56256 [09:27<00:00, 99.19audios/s, loss=1.47793, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 295.86audios/s, loss=0.50273, acc=88.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 6h 30m 55s , best accuracy: 88.00%, best loss 0.473783
epoch  41 with lr=1.00e-05


100%|██████████| 56256/56256 [09:29<00:00, 98.83audios/s, loss=1.48325, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 294.30audios/s, loss=0.58266, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 6h 40m 50s , best accuracy: 88.00%, best loss 0.473783
epoch  42 with lr=1.00e-05


100%|██████████| 56256/56256 [09:30<00:00, 98.57audios/s, loss=1.48566, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 287.05audios/s, loss=0.53271, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 6h 50m 47s , best accuracy: 88.00%, best loss 0.473783
epoch  43 with lr=1.00e-05


100%|██████████| 56256/56256 [09:26<00:00, 99.31audios/s, loss=1.48289, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 287.01audios/s, loss=0.56445, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 0m 39s , best accuracy: 88.00%, best loss 0.473783
epoch  44 with lr=1.00e-06


100%|██████████| 56256/56256 [09:27<00:00, 99.21audios/s, loss=1.47944, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.70audios/s, loss=0.45657, acc=89.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 10m 32s , best accuracy: 89.00%, best loss 0.456570
epoch  45 with lr=1.00e-06


100%|██████████| 56256/56256 [09:23<00:00, 99.85audios/s, loss=1.48388, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 291.29audios/s, loss=0.48679, acc=89.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 20m 21s , best accuracy: 89.00%, best loss 0.456570
epoch  46 with lr=1.00e-06


100%|██████████| 56256/56256 [09:25<00:00, 99.51audios/s, loss=1.48589, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 290.49audios/s, loss=0.52805, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 30m 12s , best accuracy: 89.00%, best loss 0.456570
epoch  47 with lr=1.00e-06


100%|██████████| 56256/56256 [09:26<00:00, 99.26audios/s, loss=1.48346, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 287.32audios/s, loss=0.53906, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 40m 5s , best accuracy: 89.00%, best loss 0.456570
epoch  48 with lr=1.00e-06


100%|██████████| 56256/56256 [09:26<00:00, 99.30audios/s, loss=1.47586, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 298.70audios/s, loss=0.53421, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 49m 57s , best accuracy: 89.00%, best loss 0.456570
epoch  49 with lr=1.00e-06


100%|██████████| 56256/56256 [09:30<00:00, 98.59audios/s, loss=1.48409, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.36audios/s, loss=0.55966, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 7h 59m 53s , best accuracy: 89.00%, best loss 0.456570
epoch  50 with lr=1.00e-06


100%|██████████| 56256/56256 [09:31<00:00, 98.44audios/s, loss=1.48697, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 294.24audios/s, loss=0.51363, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 8h 9m 50s , best accuracy: 89.00%, best loss 0.456570
epoch  51 with lr=1.00e-07


100%|██████████| 56256/56256 [09:25<00:00, 99.42audios/s, loss=1.48244, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 284.95audios/s, loss=0.65838, acc=83.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 8h 19m 42s , best accuracy: 89.00%, best loss 0.456570
epoch  52 with lr=1.00e-07


100%|██████████| 56256/56256 [09:30<00:00, 98.59audios/s, loss=1.48143, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 295.90audios/s, loss=0.66554, acc=83.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 8h 29m 38s , best accuracy: 89.00%, best loss 0.456570
epoch  53 with lr=1.00e-07


100%|██████████| 56256/56256 [09:26<00:00, 99.27audios/s, loss=1.48865, acc=46.00%] 
100%|██████████| 7488/7488 [00:24<00:00, 300.93audios/s, loss=0.47243, acc=89.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 8h 39m 30s , best accuracy: 89.00%, best loss 0.456570
epoch  54 with lr=1.00e-07


100%|██████████| 56256/56256 [09:23<00:00, 99.79audios/s, loss=1.47598, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 296.44audios/s, loss=0.56093, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 8h 49m 19s , best accuracy: 89.00%, best loss 0.456570
epoch  55 with lr=1.00e-07


100%|██████████| 56256/56256 [09:27<00:00, 99.16audios/s, loss=1.47692, acc=46.00%] 
100%|██████████| 7488/7488 [00:24<00:00, 300.11audios/s, loss=0.52095, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 8h 59m 11s , best accuracy: 89.00%, best loss 0.456570
epoch  56 with lr=1.00e-07


100%|██████████| 56256/56256 [09:28<00:00, 99.02audios/s, loss=1.48049, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 290.81audios/s, loss=0.52790, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 9h 9m 5s , best accuracy: 89.00%, best loss 0.456570
epoch  57 with lr=1.00e-08


100%|██████████| 56256/56256 [09:24<00:00, 99.68audios/s, loss=1.47804, acc=46.00%] 
100%|██████████| 7488/7488 [00:26<00:00, 283.23audios/s, loss=0.48466, acc=89.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 9h 18m 56s , best accuracy: 89.00%, best loss 0.456570
epoch  58 with lr=1.00e-08


100%|██████████| 56256/56256 [09:26<00:00, 99.28audios/s, loss=1.48551, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 291.03audios/s, loss=0.48806, acc=88.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 9h 28m 48s , best accuracy: 89.00%, best loss 0.456570
epoch  59 with lr=1.00e-08


100%|██████████| 56256/56256 [09:27<00:00, 99.05audios/s, loss=1.48760, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 296.50audios/s, loss=0.54916, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 9h 38m 41s , best accuracy: 89.00%, best loss 0.456570
epoch  60 with lr=1.00e-08


100%|██████████| 56256/56256 [09:22<00:00, 99.99audios/s, loss=1.47999, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 297.22audios/s, loss=0.50774, acc=88.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 9h 48m 29s , best accuracy: 89.00%, best loss 0.456570
epoch  61 with lr=1.00e-08


100%|██████████| 56256/56256 [09:30<00:00, 98.53audios/s, loss=1.47586, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 292.37audios/s, loss=0.51191, acc=88.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 9h 58m 26s , best accuracy: 89.00%, best loss 0.456570
epoch  62 with lr=1.00e-08


100%|██████████| 56256/56256 [09:31<00:00, 98.38audios/s, loss=1.48475, acc=46.00%] 
100%|██████████| 7488/7488 [00:24<00:00, 301.64audios/s, loss=0.57106, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 8m 23s , best accuracy: 89.00%, best loss 0.456570
epoch  63 with lr=1.00e-08


100%|██████████| 56256/56256 [09:26<00:00, 99.24audios/s, loss=1.47670, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 299.17audios/s, loss=0.48530, acc=89.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 18m 15s , best accuracy: 89.00%, best loss 0.456570
epoch  64 with lr=1.00e-08


100%|██████████| 56256/56256 [09:25<00:00, 99.39audios/s, loss=1.48444, acc=46.00%] 
100%|██████████| 7488/7488 [00:25<00:00, 296.67audios/s, loss=0.54297, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 28m 6s , best accuracy: 89.00%, best loss 0.456570
epoch  65 with lr=1.00e-08


100%|██████████| 56256/56256 [08:34<00:00, 109.32audios/s, loss=1.47930, acc=46.00%]
100%|██████████| 7488/7488 [00:20<00:00, 369.18audios/s, loss=0.56712, acc=86.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 37m 1s , best accuracy: 89.00%, best loss 0.456570
epoch  66 with lr=1.00e-08


100%|██████████| 56256/56256 [06:39<00:00, 140.67audios/s, loss=1.48364, acc=46.00%]
100%|██████████| 7488/7488 [00:16<00:00, 447.73audios/s, loss=0.53542, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 43m 57s , best accuracy: 89.00%, best loss 0.456570
epoch  67 with lr=1.00e-08


100%|██████████| 56256/56256 [05:41<00:00, 164.86audios/s, loss=1.48322, acc=46.00%]
100%|██████████| 7488/7488 [00:16<00:00, 444.38audios/s, loss=0.47246, acc=89.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 49m 55s , best accuracy: 89.00%, best loss 0.456570
epoch  68 with lr=1.00e-08


100%|██████████| 56256/56256 [05:41<00:00, 164.53audios/s, loss=1.48119, acc=46.00%]
100%|██████████| 7488/7488 [00:16<00:00, 463.60audios/s, loss=0.54115, acc=87.00%]
  0%|          | 0/56256 [00:00<?, ?audios/s]

<class 'float'>
total time elapsed: 10h 55m 54s , best accuracy: 89.00%, best loss 0.456570
epoch  69 with lr=1.00e-08


100%|██████████| 56256/56256 [05:45<00:00, 162.60audios/s, loss=1.48152, acc=46.00%]
100%|██████████| 7488/7488 [00:17<00:00, 437.61audios/s, loss=0.53932, acc=86.00%]

<class 'float'>
total time elapsed: 11h 1m 57s , best accuracy: 89.00%, best loss 0.456570
finished



