In [1]:
%pdb on

Automatic pdb calling has been turned ON


In [2]:
import numpy as np
import tensorflow as tf
import sys
import os
if os.path.abspath('../') not in sys.path:
    sys.path.append(os.path.abspath('../'))
if os.path.abspath('../../tt_keras') not in sys.path:
    sys.path.append(os.path.abspath('../../tt_keras'))

if os.path.abspath('../../t3f') not in sys.path:
    sys.path.append(os.path.abspath('../../t3f'))

import automatic_speech_recognition as asr
import time
from datetime import datetime

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm.notebook import tqdm

In [4]:
import horovod.tensorflow.keras as hvd

In [5]:
hvd.init()
# Pin GPU to be used to process local rank (one GPU per process)
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
if gpus:
    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')

In [6]:
from tensorflow import keras
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.mixed_precision import experimental as mixed_precision

## The model / pipeline

In [7]:
def get_pipeline(model, optimizer=None):
    alphabet = asr.text.Alphabet(lang='en')
    features_extractor = asr.features.MFCC_legacy(
        features_num=26,
        standardize=None,
        winlen=0.032,
        winstep=0.02,
    )
    if not optimizer:
        optimizer = tf.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999)
    decoder = asr.decoder.GreedyDecoder()
    pipeline = asr.pipeline.CTCPipeline(
        alphabet, features_extractor, model, optimizer, decoder
    )
    callbacks = []
    return pipeline

In [8]:
dev_dataset = asr.dataset.Audio.from_csv('./data/dev-clean-index.csv', batch_size=24, use_filesizes=True)

In [9]:
model = asr.model.load_mozilla_deepspeech('./data/mozilla_deepspeech.pb', is_mixed_precision=False)

In [10]:
tf.keras.utils.plot_model(model, 'model.png', show_shapes=True)

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [11]:
pipeline = get_pipeline(model)

### Evaluation

In [None]:
start = time.time()
wer_ref, cer_ref = asr.evaluate.calculate_error_rates(pipeline, dev_dataset, print_pred=False)
elapsed_time = time.time() - start
print(f'WER: {wer_ref}   CER: {cer_ref}')
print(f'Elapsed time {elapsed_time}')



### Training script

In [None]:
def train_model(model, dataset_idx, val_dataset_idx=None, initial_lr=0.001,
                batch_size=10, epochs=25,
                restart_filename=None):

    if restart_filename:
        model.load_weights(restart_filename)
    
    initial_lr_global = initial_lr * hvd.size()
    
    dataset = asr.dataset.Audio.from_csv(
        dataset_idx, batch_size=batch_size, max_filesize=750000,
        use_filesizes=True, group_size=hvd.size(), rank=hvd.rank())
    dataset.sort_by_length()
    #dataset._references = dataset._references[-batch_size*2:]
    #dataset._indices = np.arange(len(dataset))
    dataset.shuffle_indices()

    print(f'Group size: {hvd.size()} rank {hvd.rank()} got {len(dataset)} batches')

    if val_dataset_idx:
        val_dataset = asr.dataset.Audio.from_csv(
            val_dataset_idx, batch_size=batch_size, use_filesizes=True,
            group_size=hvd.size(), rank=hvd.rank())

        print(f'Group size: {hvd.size()} rank {hvd.rank()} got {len(val_dataset)} val batches')

    opt_instance = tf.optimizers.Adam(initial_lr_global)

    opt = hvd.DistributedOptimizer(opt_instance)
    pipeline = get_pipeline(model, opt)
    
    callbacks = [
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        hvd.callbacks.MetricAverageCallback(),
    ]

    time_start = time.time()
        
    if hvd.rank() == 0:
        prefix = datetime.now().strftime("%Y%m%d-%H%M%S")
        print('Will save to: {}'.format(prefix))
        monitor_metric_name = 'loss' if not val_dataset_idx else 'val_loss'  # val_loss is wrong and broken
        callbacks.append(
            keras.callbacks.ModelCheckpoint(
                prefix + '_best.h5',
                monitor=monitor_metric_name, save_weights_only=True,
                save_best_only=True))

    hist = pipeline.fit(dataset, dev_dataset=None, # val_dataset,
                        augmentation=None,
                        epochs=epochs,
                        #steps_per_epoch=270,
                        callbacks=callbacks,
                        verbose=1 if hvd.rank() == 0 else 0)
        
    elapsed = time.time() - time_start
    
    if hvd.rank() == 0:
        print(f'Elapsed time: {elapsed}')

    return model, hist

### Training

In [None]:
model, hist = train_model(model, dataset_idx='./data/train-clean-360-index.csv',
                          val_dataset_idx='./data/dev-clean-index.csv',
                          batch_size=24, epochs=1)