In [1]:
#!/usr/bin/env python
"""Train a CNN for Google speech commands."""

__author__ = 'Yuan Xu, Erdene-Ochir Tuguldur'

import argparse
import time

from tqdm import *

import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.sampler import WeightedRandomSampler

import torchvision
from torchvision.transforms import *

from tensorboardX import SummaryWriter

import models
from datasets import *
from transforms import *
from mixup import *

In [2]:
for i, m in enumerate(models.available_models):
    print(i, m)

0 vgg19_bn
1 resnet18
2 resnet34
3 resnet50
4 resnet101
5 resnet152
6 wideresnet28_10
7 wideresnet28_10D
8 wideresnet52_10
9 resnext29_8_64
10 dpn92
11 densenet_bc_100_12
12 densenet_bc_250_24
13 densenet_bc_190_40


In [3]:
class Arg():
    def __init__(self):
        self.train_dataset="/home/cilab/LabMembers/DJ/sr_dataset/speech_command/train"
        self.valid_dataset="/home/cilab/LabMembers/DJ/sr_dataset/speech_command/valid"
        self.background_noise="/home/cilab/LabMembers/DJ/sr_dataset/speech_command/train/_background_noise_"
        self.comment=""
        self.batch_size=128
        self.dataload_workers_nums=6
        self.weight_decay=1e-2
        self.optim='sgd'
        self.learning_rate=1e-4
        self.lr_scheduler='step'
        self.lr_scheduler_patience=5,
        self.lr_scheduler_step_size=50
        self.lr_scheduler_gamma=0.1
        self.max_epochs=70
        self.resume=None
        self.model=models.available_models[8]
        self.input="mel40"
        self.mixup=True
args = Arg()

In [4]:
use_gpu = torch.cuda.is_available()
print('use_gpu', use_gpu)
if use_gpu:
    torch.backends.cudnn.benchmark = True

n_mels = 32
if args.input == 'mel40':
    n_mels = 40

data_aug_transform = Compose([ChangeAmplitude(), ChangeSpeedAndPitchAudio(), FixAudioLength(), ToSTFT(), StretchAudioOnSTFT(), TimeshiftAudioOnSTFT(), FixSTFTDimension()])
bg_dataset = BackgroundNoiseDataset(args.background_noise, data_aug_transform)
add_bg_noise = AddBackgroundNoiseOnSTFT(bg_dataset)
train_feature_transform = Compose([ToMelSpectrogramFromSTFT(n_mels=n_mels), DeleteSTFT(), ToTensor('mel_spectrogram', 'input')])
train_dataset = SpeechCommandsDataset(args.train_dataset,
                                Compose([LoadAudio(),
                                         data_aug_transform,
                                         add_bg_noise,
                                         train_feature_transform]))

valid_feature_transform = Compose([ToMelSpectrogram(n_mels=n_mels), ToTensor('mel_spectrogram', 'input')])
valid_dataset = SpeechCommandsDataset(args.valid_dataset,
                                Compose([LoadAudio(),
                                         FixAudioLength(),
                                         valid_feature_transform]))

weights = train_dataset.make_weights_for_balanced_classes()
sampler = WeightedRandomSampler(weights, len(weights))
train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, sampler=sampler,
                              pin_memory=use_gpu, num_workers=args.dataload_workers_nums)
valid_dataloader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False,
                              pin_memory=use_gpu, num_workers=args.dataload_workers_nums)

use_gpu True


In [5]:
# a name used to save checkpoints etc.
full_name = '%s_%s_%s_bs%d_lr%.1e_wd%.1e' % (args.model, args.optim, args.lr_scheduler, args.batch_size, args.learning_rate, args.weight_decay)
if args.comment:
    full_name = '%s_%s' % (full_name, args.comment)

model = models.create_model(model_name=args.model, num_classes=len(CLASSES), in_channels=1)

if use_gpu:
    model = torch.nn.DataParallel(model).cuda()

criterion = torch.nn.CrossEntropyLoss()

if args.optim == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate, momentum=0.9, weight_decay=args.weight_decay)
else:
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay)

start_timestamp = int(time.time()*1000)
start_epoch = 0
best_accuracy = 0
best_loss = 1e100
global_step = 0

if args.resume:
    print("resuming a checkpoint '%s'" % args.resume)
    checkpoint = torch.load(args.resume)
    model.load_state_dict(checkpoint['state_dict'])
    model.float()
    optimizer.load_state_dict(checkpoint['optimizer'])

    best_accuracy = checkpoint.get('accuracy', best_accuracy)
    best_loss = checkpoint.get('loss', best_loss)
    start_epoch = checkpoint.get('epoch', start_epoch)
    global_step = checkpoint.get('step', global_step)

    del checkpoint  # reduce memory

if args.lr_scheduler == 'plateau':
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=args.lr_scheduler_patience, factor=args.lr_scheduler_gamma)
else:
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.lr_scheduler_step_size, gamma=args.lr_scheduler_gamma, last_epoch=start_epoch-1)

In [6]:
model

DataParallel(
  (module): WideResNet(
    (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (block1): NetworkBlock(
      (layer): Sequential(
        (0): BasicBlock(
          (bn1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(16, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu2): ReLU(inplace=True)
          (conv2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (convShortcut): Conv2d(16, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (1): BasicBlock(
          (bn1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu1): ReLU(inplace=True)
          (conv1): Conv2d(160, 160, kernel_size=(3, 3), stride=(

In [6]:
def get_lr():
    return optimizer.param_groups[0]['lr']

writer = SummaryWriter(comment=('_speech_commands_' + full_name))

def train(epoch):
    global global_step

    print("epoch %3d with lr=%.02e" % (epoch, get_lr()))
    phase = 'train'
    writer.add_scalar('%s/learning_rate' % phase,  get_lr(), epoch)

    model.train()  # Set model to training mode

    running_loss = 0.0
    it = 0
    correct = 0
    total = 0

    pbar = tqdm(train_dataloader, unit="audios", unit_scale=train_dataloader.batch_size)
    for batch in pbar:
        inputs = batch['input']
        inputs = torch.unsqueeze(inputs, 1)
        targets = batch['target']

        if args.mixup:
            inputs, targets = mixup(inputs, targets, num_classes=len(CLASSES))

        inputs = Variable(inputs, requires_grad=True)
        targets = Variable(targets, requires_grad=False)
        if use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda(async=True)

        # forward/backward
        outputs = model(inputs)
        if args.mixup:
            loss = mixup_cross_entropy_loss(outputs, targets)
        else:
            loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # statistics
        it += 1
        global_step += 1
        running_loss += loss.item()
        pred = outputs.data.max(1, keepdim=True)[1]
        if args.mixup:
            targets = batch['target']
            targets = Variable(targets, requires_grad=False).cuda(async=True)
        correct += pred.eq(targets.data.view_as(pred)).sum()
        total += targets.size(0)

        writer.add_scalar('%s/loss' % phase, loss.item(), global_step)

        # update the progress bar
        pbar.set_postfix({
            'loss': "%.05f" % (running_loss / it),
            'acc': "%.02f%%" % (100*correct/total)
        })

    accuracy = correct/total
    epoch_loss = running_loss / it
    writer.add_scalar('%s/accuracy' % phase, 100*accuracy, epoch)
    writer.add_scalar('%s/epoch_loss' % phase, epoch_loss, epoch)

In [7]:
def valid(epoch):
    global best_accuracy, best_loss, global_step

    phase = 'valid'
    model.eval()  # Set model to evaluate mode

    running_loss = 0.0
    it = 0
    correct = 0
    total = 0

    pbar = tqdm(valid_dataloader, unit="audios", unit_scale=valid_dataloader.batch_size)
    for batch in pbar:
        inputs = batch['input']
        inputs = torch.unsqueeze(inputs, 1)
        targets = batch['target']

        inputs = Variable(inputs, volatile = True)
        targets = Variable(targets, requires_grad=False)

        if use_gpu:
            inputs = inputs.cuda()
            targets = targets.cuda(async=True)

        # forward
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # statistics
        it += 1
        global_step += 1
        running_loss += loss.item()
        pred = outputs.data.max(1, keepdim=True)[1]
        correct += pred.eq(targets.data.view_as(pred)).sum()
        total += targets.size(0)

        writer.add_scalar('%s/loss' % phase, loss.item(), global_step)

        # update the progress bar
        pbar.set_postfix({
            'loss': "%.05f" % (running_loss / it),
            'acc': "%.02f%%" % (100*correct/total)
        })

    accuracy = correct/total
    epoch_loss = running_loss / it
    writer.add_scalar('%s/accuracy' % phase, 100*accuracy, epoch)
    writer.add_scalar('%s/epoch_loss' % phase, epoch_loss, epoch)

    checkpoint = {
        'epoch': epoch,
        'step': global_step,
        'state_dict': model.state_dict(),
        'loss': epoch_loss,
        'accuracy': accuracy,
        'optimizer' : optimizer.state_dict(),
    }

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        torch.save(checkpoint, 'checkpoints/best-loss-speech-commands-checkpoint-%s.pth' % full_name)
        torch.save(model, '%d-%s-best-loss.pth' % (start_timestamp, full_name))
    if epoch_loss < best_loss:
        best_loss = epoch_loss
        torch.save(checkpoint, 'checkpoints/best-acc-speech-commands-checkpoint-%s.pth' % full_name)
        torch.save(model, '%d-%s-best-acc.pth' % (start_timestamp, full_name))

    torch.save(checkpoint, 'checkpoints/last-speech-commands-checkpoint.pth')
    del checkpoint  # reduce memory

    return epoch_loss

In [8]:
print("training %s for Google speech commands..." % args.model)
since = time.time()
for epoch in range(start_epoch, args.max_epochs):
    if args.lr_scheduler == 'step':
        lr_scheduler.step()

    train(epoch)
    epoch_loss = valid(epoch)

    if args.lr_scheduler == 'plateau':
        lr_scheduler.step(metrics=epoch_loss)

    time_elapsed = time.time() - since
    time_str = 'total time elapsed: {:.0f}h {:.0f}m {:.0f}s '.format(time_elapsed // 3600, time_elapsed % 3600 // 60, time_elapsed % 60)
    print("%s, best accuracy: %.02f%%, best loss %f" % (time_str, 100*best_accuracy, best_loss))
print("finished")

  0%|          | 0/56320 [00:00<?, ?audios/s]

training wideresnet52_10 for Google speech commands...
epoch   0 with lr=1.00e-04


100%|██████████| 56320/56320 [05:13<00:00, 179.93audios/s, loss=2.44959, acc=11.00%]
100%|██████████| 7552/7552 [00:25<00:00, 298.28audios/s, loss=2.22966, acc=13.00%]
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  "type " + obj.__name__ + ". It won't be checked "
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 5m 40s , best accuracy: 0.00%, best loss 2.229655
epoch   1 with lr=1.00e-04


100%|██████████| 56320/56320 [04:57<00:00, 189.02audios/s, loss=2.40269, acc=13.00%]
100%|██████████| 7552/7552 [00:25<00:00, 299.82audios/s, loss=2.20843, acc=14.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 11m 6s , best accuracy: 0.00%, best loss 2.208429
epoch   2 with lr=1.00e-04


100%|██████████| 56320/56320 [05:02<00:00, 186.49audios/s, loss=2.39103, acc=14.00%]
100%|██████████| 7552/7552 [00:29<00:00, 260.08audios/s, loss=2.21066, acc=14.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 16m 39s , best accuracy: 0.00%, best loss 2.208429
epoch   3 with lr=1.00e-04


100%|██████████| 56320/56320 [06:39<00:00, 141.09audios/s, loss=2.37806, acc=14.00%]
100%|██████████| 7552/7552 [00:28<00:00, 262.91audios/s, loss=2.21324, acc=14.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 23m 48s , best accuracy: 0.00%, best loss 2.208429
epoch   4 with lr=1.00e-04


100%|██████████| 56320/56320 [06:37<00:00, 141.69audios/s, loss=2.36933, acc=15.00%]
100%|██████████| 7552/7552 [00:29<00:00, 258.31audios/s, loss=2.18298, acc=15.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 30m 58s , best accuracy: 0.00%, best loss 2.182975
epoch   5 with lr=1.00e-04


100%|██████████| 56320/56320 [06:35<00:00, 142.52audios/s, loss=2.35891, acc=16.00%]
100%|██████████| 7552/7552 [00:27<00:00, 273.94audios/s, loss=2.14361, acc=17.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 38m 4s , best accuracy: 0.00%, best loss 2.143610
epoch   6 with lr=1.00e-04


100%|██████████| 56320/56320 [06:33<00:00, 143.08audios/s, loss=2.34675, acc=16.00%]
100%|██████████| 7552/7552 [00:28<00:00, 262.18audios/s, loss=2.12681, acc=17.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 45m 10s , best accuracy: 0.00%, best loss 2.126807
epoch   7 with lr=1.00e-04


100%|██████████| 56320/56320 [06:33<00:00, 143.18audios/s, loss=2.33326, acc=17.00%]
100%|██████████| 7552/7552 [00:28<00:00, 267.54audios/s, loss=2.11666, acc=17.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 52m 15s , best accuracy: 0.00%, best loss 2.116658
epoch   8 with lr=1.00e-04


100%|██████████| 56320/56320 [06:31<00:00, 143.91audios/s, loss=2.32614, acc=17.00%]
100%|██████████| 7552/7552 [00:28<00:00, 267.96audios/s, loss=2.09134, acc=19.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 0h 59m 18s , best accuracy: 0.00%, best loss 2.091344
epoch   9 with lr=1.00e-04


100%|██████████| 56320/56320 [06:36<00:00, 142.12audios/s, loss=2.31265, acc=17.00%]
100%|██████████| 7552/7552 [00:28<00:00, 267.67audios/s, loss=2.08246, acc=19.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 6m 26s , best accuracy: 0.00%, best loss 2.082459
epoch  10 with lr=1.00e-04


100%|██████████| 56320/56320 [06:33<00:00, 143.02audios/s, loss=2.30058, acc=18.00%]
100%|██████████| 7552/7552 [00:29<00:00, 259.28audios/s, loss=2.04017, acc=20.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 13m 32s , best accuracy: 0.00%, best loss 2.040169
epoch  11 with lr=1.00e-04


100%|██████████| 56320/56320 [06:30<00:00, 144.18audios/s, loss=2.28497, acc=18.00%]
100%|██████████| 7552/7552 [00:29<00:00, 257.22audios/s, loss=2.03575, acc=21.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 20m 35s , best accuracy: 0.00%, best loss 2.035753
epoch  12 with lr=1.00e-04


100%|██████████| 56320/56320 [06:32<00:00, 143.55audios/s, loss=2.26562, acc=19.00%]
100%|██████████| 7552/7552 [00:29<00:00, 258.08audios/s, loss=2.01636, acc=22.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 27m 40s , best accuracy: 0.00%, best loss 2.016364
epoch  13 with lr=1.00e-04


100%|██████████| 56320/56320 [06:34<00:00, 142.81audios/s, loss=2.25066, acc=20.00%]
100%|██████████| 7552/7552 [00:28<00:00, 263.45audios/s, loss=2.00042, acc=24.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 34m 46s , best accuracy: 0.00%, best loss 2.000423
epoch  14 with lr=1.00e-04


100%|██████████| 56320/56320 [06:32<00:00, 143.35audios/s, loss=2.23842, acc=21.00%]
100%|██████████| 7552/7552 [00:28<00:00, 263.34audios/s, loss=1.98063, acc=25.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 41m 51s , best accuracy: 0.00%, best loss 1.980635
epoch  15 with lr=1.00e-04


100%|██████████| 56320/56320 [06:29<00:00, 144.69audios/s, loss=2.21696, acc=22.00%]
100%|██████████| 7552/7552 [00:27<00:00, 271.91audios/s, loss=2.00244, acc=25.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 48m 50s , best accuracy: 0.00%, best loss 1.980635
epoch  16 with lr=1.00e-04


100%|██████████| 56320/56320 [06:28<00:00, 145.12audios/s, loss=2.18731, acc=23.00%]
100%|██████████| 7552/7552 [00:27<00:00, 269.90audios/s, loss=1.90756, acc=31.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 1h 55m 49s , best accuracy: 0.00%, best loss 1.907565
epoch  17 with lr=1.00e-04


100%|██████████| 56320/56320 [06:30<00:00, 144.32audios/s, loss=2.15268, acc=24.00%]
100%|██████████| 7552/7552 [00:28<00:00, 268.85audios/s, loss=1.88801, acc=29.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 2m 51s , best accuracy: 0.00%, best loss 1.888013
epoch  18 with lr=1.00e-04


100%|██████████| 56320/56320 [06:33<00:00, 143.18audios/s, loss=2.11473, acc=26.00%]
100%|██████████| 7552/7552 [00:26<00:00, 289.00audios/s, loss=1.88738, acc=29.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 9m 54s , best accuracy: 0.00%, best loss 1.887376
epoch  19 with lr=1.00e-04


100%|██████████| 56320/56320 [06:38<00:00, 141.22audios/s, loss=2.08219, acc=27.00%]
100%|██████████| 7552/7552 [00:27<00:00, 272.82audios/s, loss=1.76180, acc=31.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 17m 4s , best accuracy: 0.00%, best loss 1.761797
epoch  20 with lr=1.00e-04


100%|██████████| 56320/56320 [06:36<00:00, 142.06audios/s, loss=2.05340, acc=28.00%]
100%|██████████| 7552/7552 [00:28<00:00, 264.26audios/s, loss=1.68610, acc=33.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 24m 12s , best accuracy: 0.00%, best loss 1.686102
epoch  21 with lr=1.00e-04


100%|██████████| 56320/56320 [06:31<00:00, 144.04audios/s, loss=2.01407, acc=29.00%]
100%|██████████| 7552/7552 [00:28<00:00, 261.10audios/s, loss=1.71384, acc=33.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 31m 14s , best accuracy: 0.00%, best loss 1.686102
epoch  22 with lr=1.00e-04


100%|██████████| 56320/56320 [06:25<00:00, 146.07audios/s, loss=1.98110, acc=30.00%]
100%|██████████| 7552/7552 [00:28<00:00, 265.30audios/s, loss=1.63857, acc=34.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 38m 11s , best accuracy: 0.00%, best loss 1.638570
epoch  23 with lr=1.00e-04


100%|██████████| 56320/56320 [06:29<00:00, 144.50audios/s, loss=1.95598, acc=30.00%]
100%|██████████| 7552/7552 [00:28<00:00, 265.36audios/s, loss=1.61152, acc=35.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 45m 13s , best accuracy: 0.00%, best loss 1.611521
epoch  24 with lr=1.00e-04


100%|██████████| 56320/56320 [06:32<00:00, 143.49audios/s, loss=1.92627, acc=32.00%]
100%|██████████| 7552/7552 [00:28<00:00, 263.90audios/s, loss=1.59999, acc=37.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 52m 18s , best accuracy: 0.00%, best loss 1.599989
epoch  25 with lr=1.00e-04


100%|██████████| 56320/56320 [06:27<00:00, 145.23audios/s, loss=1.90032, acc=32.00%]
100%|██████████| 7552/7552 [00:27<00:00, 276.15audios/s, loss=1.58865, acc=40.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 2h 59m 16s , best accuracy: 0.00%, best loss 1.588655
epoch  26 with lr=1.00e-04


100%|██████████| 56320/56320 [06:30<00:00, 144.07audios/s, loss=1.88453, acc=33.00%]
100%|██████████| 7552/7552 [00:27<00:00, 272.41audios/s, loss=1.62798, acc=30.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 6m 16s , best accuracy: 0.00%, best loss 1.588655
epoch  27 with lr=1.00e-04


100%|██████████| 56320/56320 [06:31<00:00, 143.74audios/s, loss=1.86323, acc=34.00%]
100%|██████████| 7552/7552 [00:28<00:00, 268.70audios/s, loss=1.52213, acc=42.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 13m 20s , best accuracy: 0.00%, best loss 1.522131
epoch  28 with lr=1.00e-04


100%|██████████| 56320/56320 [06:37<00:00, 141.57audios/s, loss=1.84789, acc=34.00%]
100%|██████████| 7552/7552 [00:26<00:00, 287.87audios/s, loss=1.47559, acc=45.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 20m 27s , best accuracy: 0.00%, best loss 1.475592
epoch  29 with lr=1.00e-04


100%|██████████| 56320/56320 [06:35<00:00, 142.26audios/s, loss=1.82188, acc=35.00%]
100%|██████████| 7552/7552 [00:27<00:00, 275.30audios/s, loss=1.42000, acc=47.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 27m 34s , best accuracy: 0.00%, best loss 1.419996
epoch  30 with lr=1.00e-04


100%|██████████| 56320/56320 [06:39<00:00, 141.03audios/s, loss=1.80886, acc=36.00%]
100%|██████████| 7552/7552 [00:28<00:00, 267.29audios/s, loss=1.47808, acc=47.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 34m 43s , best accuracy: 0.00%, best loss 1.419996
epoch  31 with lr=1.00e-04


100%|██████████| 56320/56320 [06:34<00:00, 142.69audios/s, loss=1.78754, acc=36.00%]
100%|██████████| 7552/7552 [00:28<00:00, 261.72audios/s, loss=1.45764, acc=46.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 41m 48s , best accuracy: 0.00%, best loss 1.419996
epoch  32 with lr=1.00e-04


100%|██████████| 56320/56320 [06:29<00:00, 144.56audios/s, loss=1.77360, acc=37.00%]
100%|██████████| 7552/7552 [00:28<00:00, 265.21audios/s, loss=1.30171, acc=51.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 48m 50s , best accuracy: 0.00%, best loss 1.301712
epoch  33 with lr=1.00e-04


100%|██████████| 56320/56320 [06:31<00:00, 143.78audios/s, loss=1.76107, acc=37.00%]
100%|██████████| 7552/7552 [00:28<00:00, 264.38audios/s, loss=1.38893, acc=50.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 3h 55m 51s , best accuracy: 0.00%, best loss 1.301712
epoch  34 with lr=1.00e-04


100%|██████████| 56320/56320 [06:28<00:00, 144.84audios/s, loss=1.74359, acc=38.00%]
100%|██████████| 7552/7552 [00:29<00:00, 256.97audios/s, loss=1.22038, acc=55.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 2m 53s , best accuracy: 0.00%, best loss 1.220377
epoch  35 with lr=1.00e-04


100%|██████████| 56320/56320 [06:11<00:00, 151.53audios/s, loss=1.73062, acc=38.00%]
100%|██████████| 7552/7552 [00:24<00:00, 303.14audios/s, loss=1.30922, acc=46.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 9m 31s , best accuracy: 0.00%, best loss 1.220377
epoch  36 with lr=1.00e-04


100%|██████████| 56320/56320 [04:57<00:00, 189.46audios/s, loss=1.72272, acc=38.00%]
100%|██████████| 7552/7552 [00:24<00:00, 307.95audios/s, loss=1.18358, acc=59.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 14m 56s , best accuracy: 0.00%, best loss 1.183581
epoch  37 with lr=1.00e-04


100%|██████████| 56320/56320 [04:54<00:00, 190.98audios/s, loss=1.71319, acc=39.00%]
100%|██████████| 7552/7552 [00:25<00:00, 300.99audios/s, loss=1.27879, acc=52.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 20m 17s , best accuracy: 0.00%, best loss 1.183581
epoch  38 with lr=1.00e-04


100%|██████████| 56320/56320 [04:40<00:00, 201.13audios/s, loss=1.69292, acc=39.00%]
100%|██████████| 7552/7552 [00:17<00:00, 430.78audios/s, loss=1.17125, acc=59.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 25m 17s , best accuracy: 0.00%, best loss 1.171249
epoch  39 with lr=1.00e-04


100%|██████████| 56320/56320 [03:14<00:00, 290.20audios/s, loss=1.68668, acc=39.00%]
100%|██████████| 7552/7552 [00:17<00:00, 431.96audios/s, loss=1.08906, acc=61.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 28m 52s , best accuracy: 0.00%, best loss 1.089056
epoch  40 with lr=1.00e-04


100%|██████████| 56320/56320 [03:15<00:00, 287.53audios/s, loss=1.68218, acc=39.00%]
100%|██████████| 7552/7552 [00:17<00:00, 434.70audios/s, loss=1.20843, acc=57.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 32m 27s , best accuracy: 0.00%, best loss 1.089056
epoch  41 with lr=1.00e-04


100%|██████████| 56320/56320 [03:14<00:00, 289.00audios/s, loss=1.66894, acc=40.00%]
100%|██████████| 7552/7552 [00:17<00:00, 430.88audios/s, loss=1.06660, acc=64.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 36m 2s , best accuracy: 0.00%, best loss 1.066605
epoch  42 with lr=1.00e-04


100%|██████████| 56320/56320 [03:15<00:00, 288.80audios/s, loss=1.65657, acc=40.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.78audios/s, loss=1.02723, acc=67.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 39m 38s , best accuracy: 0.00%, best loss 1.027229
epoch  43 with lr=1.00e-04


100%|██████████| 56320/56320 [03:15<00:00, 288.28audios/s, loss=1.64853, acc=41.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.26audios/s, loss=0.98310, acc=67.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 43m 13s , best accuracy: 0.00%, best loss 0.983096
epoch  44 with lr=1.00e-04


100%|██████████| 56320/56320 [03:15<00:00, 288.11audios/s, loss=1.63721, acc=41.00%]
100%|██████████| 7552/7552 [00:17<00:00, 433.95audios/s, loss=1.00674, acc=68.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 46m 47s , best accuracy: 0.00%, best loss 0.983096
epoch  45 with lr=1.00e-04


100%|██████████| 56320/56320 [03:15<00:00, 287.87audios/s, loss=1.63741, acc=41.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.76audios/s, loss=0.92755, acc=69.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 50m 24s , best accuracy: 0.00%, best loss 0.927550
epoch  46 with lr=1.00e-04


100%|██████████| 56320/56320 [03:15<00:00, 288.34audios/s, loss=1.62298, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.47audios/s, loss=0.86462, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 53m 59s , best accuracy: 0.00%, best loss 0.864617
epoch  47 with lr=1.00e-04


100%|██████████| 56320/56320 [03:13<00:00, 290.88audios/s, loss=1.61172, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 434.18audios/s, loss=0.88712, acc=72.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 4h 57m 32s , best accuracy: 0.00%, best loss 0.864617
epoch  48 with lr=1.00e-04


100%|██████████| 56320/56320 [03:14<00:00, 288.88audios/s, loss=1.59839, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.98audios/s, loss=1.01668, acc=66.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 1m 5s , best accuracy: 0.00%, best loss 0.864617
epoch  49 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.86audios/s, loss=1.59545, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 434.41audios/s, loss=0.88492, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 4m 38s , best accuracy: 0.00%, best loss 0.864617
epoch  50 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.44audios/s, loss=1.59585, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.29audios/s, loss=0.89261, acc=70.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 8m 11s , best accuracy: 0.00%, best loss 0.864617
epoch  51 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 287.69audios/s, loss=1.59388, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.61audios/s, loss=0.97119, acc=67.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 11m 46s , best accuracy: 0.00%, best loss 0.864617
epoch  52 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.65audios/s, loss=1.59258, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.58audios/s, loss=0.87530, acc=70.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 15m 19s , best accuracy: 0.00%, best loss 0.864617
epoch  53 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 288.35audios/s, loss=1.59291, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.51audios/s, loss=0.89283, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 18m 53s , best accuracy: 0.00%, best loss 0.864617
epoch  54 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 288.95audios/s, loss=1.58950, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.90audios/s, loss=0.82680, acc=74.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 22m 28s , best accuracy: 0.00%, best loss 0.826802
epoch  55 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 288.29audios/s, loss=1.58763, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 430.77audios/s, loss=0.96030, acc=68.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 26m 2s , best accuracy: 0.00%, best loss 0.826802
epoch  56 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.40audios/s, loss=1.58423, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 430.51audios/s, loss=0.84316, acc=74.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 29m 35s , best accuracy: 0.00%, best loss 0.826802
epoch  57 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 290.04audios/s, loss=1.58978, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.16audios/s, loss=0.85632, acc=72.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 33m 8s , best accuracy: 0.00%, best loss 0.826802
epoch  58 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 290.25audios/s, loss=1.58762, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.21audios/s, loss=0.83828, acc=73.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 36m 41s , best accuracy: 0.00%, best loss 0.826802
epoch  59 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 287.97audios/s, loss=1.58755, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 434.13audios/s, loss=0.82438, acc=73.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 40m 17s , best accuracy: 0.00%, best loss 0.824380
epoch  60 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 288.27audios/s, loss=1.58235, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 434.59audios/s, loss=0.81539, acc=74.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 43m 53s , best accuracy: 0.00%, best loss 0.815388
epoch  61 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.41audios/s, loss=1.58436, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.61audios/s, loss=0.87731, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 47m 26s , best accuracy: 0.00%, best loss 0.815388
epoch  62 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 290.02audios/s, loss=1.58184, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 430.18audios/s, loss=0.87206, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 50m 59s , best accuracy: 0.00%, best loss 0.815388
epoch  63 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 288.74audios/s, loss=1.58410, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.67audios/s, loss=0.88624, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 54m 33s , best accuracy: 0.00%, best loss 0.815388
epoch  64 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 290.14audios/s, loss=1.57487, acc=43.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.90audios/s, loss=0.83809, acc=73.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 5h 58m 5s , best accuracy: 0.00%, best loss 0.815388
epoch  65 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 290.12audios/s, loss=1.58501, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 436.76audios/s, loss=0.81759, acc=72.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 6h 1m 38s , best accuracy: 0.00%, best loss 0.815388
epoch  66 with lr=1.00e-05


100%|██████████| 56320/56320 [03:15<00:00, 288.55audios/s, loss=1.57436, acc=43.00%]
100%|██████████| 7552/7552 [00:17<00:00, 432.27audios/s, loss=0.88971, acc=71.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 6h 5m 12s , best accuracy: 0.00%, best loss 0.815388
epoch  67 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.42audios/s, loss=1.57439, acc=43.00%]
100%|██████████| 7552/7552 [00:17<00:00, 432.09audios/s, loss=0.79858, acc=74.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 6h 8m 47s , best accuracy: 0.00%, best loss 0.798576
epoch  68 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.06audios/s, loss=1.58182, acc=42.00%]
100%|██████████| 7552/7552 [00:17<00:00, 432.04audios/s, loss=0.82393, acc=74.00%]
  0%|          | 0/56320 [00:00<?, ?audios/s]

total time elapsed: 6h 12m 21s , best accuracy: 0.00%, best loss 0.798576
epoch  69 with lr=1.00e-05


100%|██████████| 56320/56320 [03:14<00:00, 289.55audios/s, loss=1.57163, acc=43.00%]
100%|██████████| 7552/7552 [00:17<00:00, 435.70audios/s, loss=0.84704, acc=72.00%]


total time elapsed: 6h 15m 54s , best accuracy: 0.00%, best loss 0.798576
finished
