#### referenced by - https://github.com/kuleshov/audio-super-res

# Training ASR model

In [1]:
import os
os.sys.path.append(os.path.abspath('.'))
os.sys.path.append(os.path.dirname(os.path.abspath('.')))
import numpy as np
import matplotlib
from asr_model import ASRNet, default_opt
from io_utils import upsample_wav
from io_utils import load_h5
import tensorflow as tf
#matplotlib.use('Agg')

In [2]:
args = {
    'train'      : 'prep_audioset.h5',
    'val'        : 'prep_audioset.val.h5',
    'alg'        : 'adam',
    'epochs'     : 5,
    'logname'    : 'default_log_name',
    'layers'     : 4,
    'lr'         : 1e-3,
    'batch_size' : 4
}
print(tf.__version__)

1.5.0


In [3]:
# get data
X_train, Y_train = load_h5(args['train'])
X_val, Y_val = load_h5(args['val'])

List of arrays in input file: KeysView(<HDF5 file "prep_audioset.h5" (mode r)>)
Shape of X: (48, 8192, 1)
Shape of Y: (48, 8192, 1)
List of arrays in input file: KeysView(<HDF5 file "prep_audioset.val.h5" (mode r)>)
Shape of X: (48, 8192, 1)
Shape of Y: (48, 8192, 1)


In [4]:
# determine super-resolution level
n_dim, n_chan = Y_train[0].shape
print('number of dimension:',n_dim)
print('number of channel:',n_chan)
r = Y_train[0].shape[1] / X_train[0].shape[1]
print('r:',r)
assert n_chan == 1 # if not number of channel is not 0 -> Error assert!

number of dimension: 8192
number of channel: 1
r: 1.0


In [5]:
# create model
def get_model(args, n_dim, r, from_ckpt=False, train=True):
    """Create a model based on arguments"""
    
    if train:
        opt_params = {
            'alg' : args['alg'], 
            'lr' : args['lr'], 
            'b1' : 0.9, 
            'b2' : 0.999,
            'batch_size': args['batch_size'], 
            'layers': args['layers']}
    else: 
        opt_params = default_opt

    # create model & init
    model = ASRNet(
        from_ckpt=from_ckpt, 
        n_dim=n_dim, 
        r=r,
        opt_params=opt_params, 
        log_prefix=args['logname'])
    
    return model

model = get_model(args, n_dim, r, from_ckpt=False, train=True)

>> Generator Model init...
D-Block >>  Tensor("generator/Relu:0", shape=(?, ?, 12), dtype=float32)
D-Block >>  Tensor("generator/Relu_1:0", shape=(?, ?, 24), dtype=float32)
D-Block >>  Tensor("generator/Relu_2:0", shape=(?, ?, 48), dtype=float32)
D-Block >>  Tensor("generator/Relu_3:0", shape=(?, ?, 48), dtype=float32)
B-Block >>  Tensor("generator/Relu_4:0", shape=(?, ?, 48), dtype=float32)
U-Block >>  Tensor("generator/concat:0", shape=(?, ?, 96), dtype=float32)
U-Block >>  Tensor("generator/concat_1:0", shape=(?, ?, 96), dtype=float32)
U-Block >>  Tensor("generator/concat_2:0", shape=(?, ?, 48), dtype=float32)
U-Block >>  Tensor("generator/concat_3:0", shape=(?, ?, 24), dtype=float32)
Fin-Layer >>  Tensor("generator/Add:0", shape=(?, ?, 1), dtype=float32)
>> ...finish

creating train_op with params: {'layers': 4, 'b1': 0.9, 'b2': 0.999, 'batch_size': 4, 'lr': 0.001, 'alg': 'adam'}


In [6]:
# train model
model.fit(X_train, Y_train, X_val, Y_val, n_epoch=args['epochs'])

start training epoch (n:5)
num-of-batch: 4

Epoch 1 of 5 took 2.305s (12 minibatches)
  training l2_loss/segsnr:		0.053736	5.767480
  validation l2_loss/segsnr:		0.053736	5.767480
-----------------------------------------------------------------------

Epoch 2 of 5 took 1.905s (12 minibatches)
  training l2_loss/segsnr:		0.053736	5.767480
  validation l2_loss/segsnr:		0.053736	5.767480
-----------------------------------------------------------------------

Epoch 3 of 5 took 1.839s (12 minibatches)
  training l2_loss/segsnr:		0.053736	5.767480
  validation l2_loss/segsnr:		0.053736	5.767480
-----------------------------------------------------------------------

Epoch 4 of 5 took 1.816s (12 minibatches)
  training l2_loss/segsnr:		0.053736	5.767480
  validation l2_loss/segsnr:		0.053736	5.767480
-----------------------------------------------------------------------

Epoch 5 of 5 took 1.754s (12 minibatches)
  training l2_loss/segsnr:		0.053736	5.767480
  validation l2_loss/segsnr:		0.