In [1]:
import tensorflow as tf
# tf.debugging.set_log_device_placement(True)
import tensorflow_addons as tfa
import sys
import os
if os.path.abspath('../') not in sys.path:
    sys.path.append(os.path.abspath('../'))
if os.path.abspath('../../tt_keras') not in sys.path:
    sys.path.append(os.path.abspath('../../tt_keras'))
if os.path.abspath('../../tf2-gradient-checkpointing') not in sys.path:
    sys.path.append(os.path.abspath('../../tf2-gradient-checkpointing'))

if os.path.abspath('../../t3f') not in sys.path:
    sys.path.append(os.path.abspath('../../t3f'))

import automatic_speech_recognition as asr
from automatic_speech_recognition.utils import wrap_call_methods, select_layers
import time
from datetime import datetime
import argparse
import pickle
from checkpointing import checkpointable
from functools import partial
from transform_model import transform

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from tqdm.notebook import tqdm
from h5_to_tflite import TF_CUSTOM_OBJECTS

In [3]:
from tensorflow import keras
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.mixed_precision import experimental as mixed_precision

In [4]:
import horovod.tensorflow.keras as hvd

In [5]:
#%load_ext tensorboard
#%tensorboard --logdir=./models/ --port=32779

In [6]:
# %pdb on

# Create Model

In [None]:
deepspeech = asr.model.load_mozilla_deepspeech('./data/myfrozen.pb', verbose=False)
deepspeech.save('./models/mozilla_deepspeech.pb')

load graph
Instructions for updating:
Use tf.gfile.GFile.


In [None]:
# tt_ranks = np.arange(2, 32, 1)
# # shape = ((16, 2, 2, 2, 16), (16, 2, 2, 2, 16))
# shape = ((4, 4, 8, 4, 4), (4, 4, 8, 4, 4))

In [None]:
# model = asr.model.get_deepspeech(26, 29)

In [None]:
# tt_rank = 12

In [None]:
# replacement_config = {
#     'dense_3': {
#         'config': {
#             'name': 'tt_dense_3',
#             'input_dims': shape[0],
#             'output_dims': shape[1],
#             'tt_rank': tt_rank,
#         },
# #         'init': {
# #             'max_tt_rank': tt_rank,
# #             'epsilon': 0.0
# #         }
#     },
    
#     'dense_2': {
#         'config': {
#             'name': 'tt_dense_2',
#             'input_dims': shape[0],
#             'output_dims': shape[1],
#             'tt_rank': tt_rank,
#         },
# #         'init': {
# #             'max_tt_rank': tt_rank,
# #             'epsilon': 0.0
# #         }
#     },
    
    
#     'dense_4': {
#         'config': {
#             'name': 'tt_dense_4',
#             'input_dims': shape[0],
#             'output_dims': shape[1],
#             'tt_rank': tt_rank,
#         },
# #         'init': {
# #             'max_tt_rank': tt_rank,
# #             'epsilon': 0.0
# #         }
#     },
# #     'lstm_1': {
# #             'cell_type': 'FusedTTLSTMCell',
# #             'config': {
# #                 'name': 'tt_fusedlstm_0',
# #                 'input_dims': shape[0],
# #                 'output_dims': shape[1],
# #                 'tt_rank': tt_rank,
# #                 'recurrent_tt_rank': tt_rank,
# #             },
# # #             'init': {
# # #                 'kernel': {
# # #                     'max_tt_rank': 12,
# # #                     'epsilon': 0.0
# # #                 },
# # #                 'recurrent': {
# # #                     'max_tt_rank': 12,
# # #                     'epsilon': 0.0
# # #                 }
# # #             }
# #         }
# }
# transformed_model = transform(model, replacement_config)

In [None]:
# model.summary()

## Train

In [None]:
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
# Initialize Horovod
hvd.init()
# Pin GPU to be used to process local rank (one GPU per process)
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
if gpus:
    tf.config.experimental.set_visible_devices(gpus[hvd.local_rank()], 'GPU')

In [None]:
# from tensorflow.keras.mixed_precision import experimental as mixed_precision
# policy = mixed_precision.Policy('mixed_float16')
# mixed_precision.set_policy(policy)

In [None]:
def get_pipeline(model, optimizer=None):
    alphabet = asr.text.Alphabet(lang='en')
    features_extractor = asr.features.MFCC(
        features_num=26,
        winlen=0.032,
        winstep=0.02,
    )
    if not optimizer:
        optimizer = tf.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999)
    decoder = asr.decoder.GreedyDecoder()
    pipeline = asr.pipeline.CTCPipeline(
        alphabet, features_extractor, model, optimizer, decoder
    )
    return pipeline

In [None]:
def tf_func_wrapper(layer):
    return tf.function(layer.call, experimental_relax_shapes=True)

def checkpointing_wrapper(layer):
    call_method = checkpointable(layer.call)
    call_method = partial(call_method, 
                         _checkpoint=True, 
                         _watch_vars=layer.trainable_variables)

    return call_method

In [None]:
def train_model(filename, 
                dataset_idx, 
                val_dataset_idx=None, 
                batch_size=10, 
                epochs=25, 
                tensorboard=False, 
                restart_filename=None,
                freeze_rnn_layers=False,
                freeze_time_distr_layers=False):
    basename = os.path.basename(filename).split('.')[0]
    model_dir = os.path.join(os.path.dirname(filename), basename + '_train')
    os.makedirs(model_dir, exist_ok=True)
    
    model = keras.models.load_model(filename, custom_objects=TF_CUSTOM_OBJECTS)
    
    # Wrap layers in tf func and checkpoints.
    # If we use autograph on function which is checkpointed then checkpoints will not work 
    model = wrap_call_methods(model, tf_func_wrapper, 
                              wrap_rnn_cells=True, 
                              wrap_time_distributed_inner=True)
    model = wrap_call_methods(model, checkpointing_wrapper, 
                              wrap_rnn_cells=True, 
                              trainable_only=True,
                              wrap_time_distributed_inner=True)
    if freeze_rnn_layers:
        rnn_layers = select_layers(model, other_predicate=lambda x: isinstance(x, keras.layers.RNN))
        for layer in rnn_layers:
            layer.trainable = False
    if freeze_time_distr_layers:
        time_distr_layers = select_layers(model, other_predicate=lambda x: isinstance(x, keras.layers.TimeDistributed))
        for layer in time_distr_layers:
            layer.trainable = False
    
    if restart_filename:
        model.load_weights(restart_filename)
    dataset = asr.dataset.Audio.from_csv(dataset_idx, batch_size=batch_size, use_filesizes=True)
    dataset.sort_by_length()
    dataset.shuffle_indices()
    if val_dataset_idx:
        val_dataset = asr.dataset.Audio.from_csv(val_dataset_idx, batch_size=batch_size, use_filesizes=True)

#     opt_instance = tf.optimizers.Adam(1e-3 * hvd.size(), beta_1=0.9, beta_2=0.999)
    opt_instance = tfa.optimizers.NovoGrad(1e10, beta_1=0.95, beta_2=0.5, weight_decay=0.001)
    opt = hvd.DistributedOptimizer(opt_instance)
#     opt.apply_gradients = avg_grads(opt.apply_gradients, averager)
    
    pipeline = get_pipeline(model, opt)
    
    callbacks = [
        hvd.callbacks.BroadcastGlobalVariablesCallback(0),
        hvd.callbacks.MetricAverageCallback(),
    ]
    schedule=tf.keras.experimental.CosineDecayRestarts(
        0.0 * hvd.size(), 10, t_mul=2.0, m_mul=1.0, alpha=0.0,
    )
    callbacks.append(LearningRateScheduler(schedule))
    if hvd.rank() == 0:
        prefix = datetime.now().strftime("%Y%m%d-%H%M%S")
        monitor_metric_name = 'loss' # if not val_dataset_idx else 'val_loss'  # val_loss is wrong and broken
        callbacks.append(
            keras.callbacks.ModelCheckpoint(
                os.path.join(model_dir, prefix + '_best.ckpt'),
                monitor=monitor_metric_name, save_weights_only=True,
                save_best_only=True))
        if tensorboard:
            logdir = os.path.join(model_dir, 'tb', prefix)
            tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir, profile_batch=1)
            callbacks.append(tensorboard_callback)

    time_start = time.time()

    hist = pipeline.fit(dataset, epochs=epochs, dev_dataset=val_dataset,
                        callbacks=callbacks,
                        verbose=1 if hvd.rank() == 0 else 0,
                        validation_steps=10)
    elapsed = time.time() - time_start
    
    if hvd.rank() == 0:
        print(f'Elapsed time: {elapsed}')
        #np.save(os.path.join(model_dir, prefix + '_hist.p'), np.array(hist))

In [None]:
train_model(
    filename='./models/mozilla_deepspeech.pb',
    dataset_idx='./data/dev-clean-index.csv',
    val_dataset_idx='./data/test-clean-index.csv',
    batch_size=10,
    epochs=250,
    tensorboard=False,
#     restart_filename='./experiments/initall_unfreezeall_denseoutput_r12/checkpoints/final_layer_best.ckpt'
)

# Cluster Training Scripts

In [29]:
filename='./models/ds.h5'
dataset_idx='./data/train-clean-100-index.csv'
val_dataset_idx='./data/dev-clean-index.csv'
epochs=25
tensorboard=True
restart_filename=None

In [45]:
from pathlib import Path
import shutil
import os

def create_horovod_starters(template,
                            model_paths, dest_folder='./models',
                            train_ds='../../train-clean-100-index.csv',
                            val_ds='../../test-clean-index.csv',
                            batch_size=10*4,
                            epochs=40,
                            script_name='slurm_horovod.sh',):
    """
    Creates .sh files for sbatch to intitate training. Returns paths to all created scripts.
    """
    dest_folder = Path(dest_folder)
    dest_folder.mkdir(exist_ok=True)
    
    created_scripts = []
    for path in model_paths:
        path = Path(path)
        model_folder = dest_folder/(path.name.split('.')[0])
        model_folder.mkdir(exist_ok=True)
        
        shutil.copyfile(path, model_folder/path.name)
        filled_template = template.format(path.name, train_ds, val_ds, batch_size, epochs)
        (model_folder/script_name).write_text(filled_template)
        created_scripts.append(model_folder/script_name)
    return created_scripts
    
        
def run_sbatch_scripts(script_paths, environ_vars=None):
    """
    Runs sbatch {script_name}.sh for every script in script_paths. Before running changes working directory to script dir.
    """
    # We will set variables using "A=B C=D sbatch script.sh" syntax
    var_init_str = ' '.join([f'{name}={value}' for name, value in environ_vars.items()])
    var_init_str += ' '
    
    for path in script_paths:
        command_string = f"cd {path.parent}; {var_init_str} sbatch {path.name}"
        print(os.popen(command_string).read())

In [46]:
train_all_template = """#!/bin/bash
#SBATCH -N 1 
#SBATCH -n 4
#SBATCH -o ./out_DEV_biglr.txt
#SBATCH -e ./error_DEV_biglr.txt
#SBATCH --gres gpu:4
#SBATCH --time=23:00:00    
#SBATCH --partition=gpu_small

module unload python/python-3.6.8
module load gpu/cuda-10.1
module load mpi/openmpi-3.1.2
source /trinity/home/g.leleitner/lab/Horovod/asr_3.7/bin/activate

horovodrun -np 4 -H localhost:4 python ../../horovod_training_biglr.py --filename {} --dataset {} --val_dataset {} --batch_size {} --epochs {}
"""

In [20]:
paths = create_horovod_starters(train_all_template,
[
#     './models/tt_initall_bn_r12.h5',
#     './models/tt_initall_r12.h5',
    
#     './models/tt_dense234_initall_bn_r12.h5',
#     './models/tt_dense234_initall_r12.h5',
    './models/tt_dense3_initall_r12.h5',
#     './models/tt_dense3_initall_bn_r12.h5',
#     './models/tt_dense234_savedall_r12.h5',
#     './models/tt_dense3_savedall_r12.h5',
    
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r2.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r5.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r9.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r15.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r20.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r25.h5',
    
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r2.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r5.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r9.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r15.h5',
# #     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r20.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r25.h5',
], train_ds='../../dev-clean-index.csv', val_ds='../../test-clean-index.csv', batch_size=28-4)
# We will add all required libraries by setting PYTHONPATH variable insted of editing it inside of the script
run_sbatch_scripts(paths, {
    'PYTHONPATH': os.environ['PYTHONPATH']+':/trinity/home/g.leleitner/lab/Horovod/Automatic-Speech-Recognition:\
/trinity/home/g.leleitner/lab/Horovod/tt_keras:/trinity/home/g.leleitner/lab/Horovod/t3f:\
/trinity/home/g.leleitner/lab/Horovod/tf2-gradient-checkpointing'
})

Submitted batch job 310395



In [10]:
freeze_rnn_template = """#!/bin/bash
#SBATCH -N 1 
#SBATCH -n 4
#SBATCH -o ./out.txt
#SBATCH -e ./error.txt
#SBATCH --gres gpu:4
#SBATCH --time=20:00:00    
#SBATCH --partition=gpu_small

module unload python/python-3.6.8
module load gpu/cuda-10.1
module load mpi/openmpi-3.1.2
source /trinity/home/g.leleitner/lab/Horovod/asr_3.7/bin/activate

horovodrun -np 4 -H localhost:4 python ../../../horovod_training.py --filename {} --dataset {} --val_dataset {} --batch_size {} --epochs {} --freeze_rnn
"""

In [11]:
paths = create_horovod_starters(freeze_rnn_template, dest_folder='./models/freeze_rnn_experiments',
model_paths=[
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r2.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r5.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r9.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r15.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r20.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initall_r25.h5',
    
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r2.h5',
    '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r5.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r9.h5',
    '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r15.h5',
#     '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r20.h5',
    '/gpfs/gpfs0/r.schutski/models/tt_ds_uniform_initdense_r25.h5',
])
# We will add all required libraries by setting PYTHONPATH variable insted of editing it inside of the script
run_sbatch_scripts(paths, {
    'PYTHONPATH': os.environ['PYTHONPATH']+':/trinity/home/g.leleitner/lab/Horovod/Automatic-Speech-Recognition:\
/trinity/home/g.leleitner/lab/Horovod/tt_keras:/trinity/home/g.leleitner/lab/Horovod/t3f:\
/trinity/home/g.leleitner/lab/Horovod/tf2-gradient-checkpointing'
})

Submitted batch job 306168

Submitted batch job 306169

Submitted batch job 306170

