In [1]:
%pdb on

Automatic pdb calling has been turned ON


In [2]:
import numpy as np
import tensorflow as tf
import sys
import os
if os.path.abspath('../') not in sys.path:
    sys.path.append(os.path.abspath('../'))
if os.path.abspath('../../tt_keras') not in sys.path:
    sys.path.append(os.path.abspath('../../tt_keras'))

if os.path.abspath('../../t3f') not in sys.path:
    sys.path.append(os.path.abspath('../../t3f'))

import automatic_speech_recognition as asr
import time

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm

## Reference model and scripts

In [4]:
def get_pipeline(model):
    alphabet = asr.text.Alphabet(lang='en')
    features_extractor = asr.features.MFCC_legacy(
        features_num=26,
        standardize=False,
        winlen=0.032,
        winstep=0.02,
    )
    optimizer = tf.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999)
    decoder = asr.decoder.GreedyDecoder()
    pipeline = asr.pipeline.CTCPipeline(
        alphabet, features_extractor, model, optimizer, decoder
    )
    callbacks = []
    return pipeline

In [5]:
from transform_model import transform

In [6]:
from tt_dense_layer import TTDense
from tt_recurrent_layer import SimpleTTRNNCell, TTLSTMCell, FusedTTLSTMCell
custom_objects = {'TTDense': TTDense, 'SimpleTTRNNCell': SimpleTTRNNCell, 'TTLSTMCell': TTLSTMCell, 'FusedTTLSTMCell': FusedTTLSTMCell}

In [7]:
def get_accuracy_by_rank(test_ds, models_filename_template, ranks):
    wers = []
    cers = []
    for rank in tqdm(ranks):
        model_filename = models_filename_template.format(rank)
        tt_model = tf.keras.models.load_model(model_filename, custom_objects=custom_objects)
        pipeline = get_pipeline(tt_model)
        wer, cer = asr.evaluate.calculate_error_rates(pipeline, test_ds, print_pred=False)
        wers.append(wer)
        cers.append(cer)
    return np.array(ranks), np.array(wers), np.array(cers)

In [8]:
# build the models
model = asr.model.load_mozilla_deepspeech('./data/mozilla_deepspeech.pb', tflite_version=False, is_mixed_precision=False)

load graph
Instructions for updating:
Use tf.gfile.GFile.
['IteratorV2', 'IteratorGetNext', 'Shape', 'strided_slice/stack', 'strided_slice/stack_1', 'strided_slice/stack_2', 'strided_slice', 'Const', 'conv1d/ExpandDims/dim', 'conv1d/ExpandDims', 'conv1d/ExpandDims_1/dim', 'conv1d/ExpandDims_1', 'conv1d', 'conv1d/Squeeze', 'Reshape/shape/1', 'Reshape/shape/2', 'Reshape/shape/3', 'Reshape/shape', 'Reshape', 'transpose/perm', 'transpose', 'Reshape_1/shape', 'Reshape_1', 'layer_1/bias', 'layer_1/bias/read', 'layer_1/weights', 'layer_1/weights/read', 'MatMul', 'BiasAdd', 'Relu', 'Minimum/y', 'Minimum', 'StringFormat', 'PrintV2', 'Identity', 'layer_2/bias', 'layer_2/bias/read', 'layer_2/weights', 'layer_2/weights/read', 'MatMul_1', 'BiasAdd_1', 'Relu_1', 'Minimum_1/y', 'Minimum_1', 'layer_3/bias', 'layer_3/bias/read', 'layer_3/weights', 'layer_3/weights/read', 'MatMul_2', 'BiasAdd_2', 'Relu_2', 'Minimum_2/y', 'Minimum_2', 'Reshape_2/shape', 'Reshape_2', 'cudnn_lstm/rnn/multi_rnn_cell/cell_0/

In [9]:
model.save('./models/ds.h5')

In [10]:
### Reference accuracy

In [11]:
pipeline_ref = get_pipeline(model)

In [12]:
dev_dataset = asr.dataset.Audio.from_csv('./data/dev-clean-index.csv', batch_size=32)

In [13]:
tim = time.time()
wer, cer = asr.evaluate.calculate_error_rates(pipeline_ref, dev_dataset)
print(f"Elapsed: {time.time() - tim}")
print(f"WER: {wer}, CER: {cer}")

Elapsed: 359.74946808815
WER: 0.13360058612399023, CER: 0.041848369145504376


## Sensitivity test

In [13]:
tt_ranks = np.arange(2, 1024, 60)
shape = ((16, 2, 2, 2, 16), (16, 2, 2, 2, 16))

In [14]:
dev_dataset = asr.dataset.Audio.from_csv('./data/dev-clean-index.csv', batch_size=32)

In [15]:
def replace_dense2(model, shape, rank):
    replacement_config = {
        'td_dense_2': {
            'config': {
                'name': 'tt_dense_2',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': rank,
            },
            'init': {
                'max_tt_rank': rank,
                'epsilon': 0.0
            }
        },
    }
  
    new_model = transform(model, replacement_config, from_layer=8)
    new_model.trainable = True
     
    return new_model


def replace_dense3(model, shape, rank):
    replacement_config = {
        'td_dense_3': {
            'config': {
                'name': 'tt_dense_3',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': rank,
            },
            'init': {
                'max_tt_rank': rank,
                'epsilon': 0.0
            }
        },
    }
  
    new_model = transform(model, replacement_config, from_layer=8)
    new_model.trainable = True
     
    return new_model


def replace_lstm(model, shape, lstm_rank, lstm_r_rank):
    replacement_config = {
        'lstm': {
            'cell_type': 'FusedTTLSTMCell',
            'config': {
                'name': 'tt_fusedlstm_0',
                'input_dims': shape[0],
                'output_dims': shape[1],
                'tt_rank': lstm_rank,
                'recurrent_tt_rank': lstm_r_rank,
            },
            'init': {
                'kernel': {
                    'max_tt_rank': lstm_rank,
                    'epsilon': 0.0
                },
                'recurrent': {
                    'max_tt_rank': lstm_r_rank,
                    'epsilon': 0.0
                }
            }
        },
    }
  
    new_model = transform(model, replacement_config, from_layer=8)
    new_model.trainable = True
     
    return new_model


def replace_dense4(model, shape, rank):
    replacement_config = {
        'td_dense_4': {
            'config': {
                'name': 'tt_dense_4',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': rank,
            },
            'init': {
                'max_tt_rank': rank,
                'epsilon': 0.0
            }
        },
    }
  
    new_model = transform(model, replacement_config, from_layer=8)
    new_model.trainable = True
     
    return new_model

In [16]:
for rank in tqdm(tt_ranks):
    tt_model = replace_dense2(model, shape, rank)
    filename = f'./models/ds_sensitivity/ds_dense2_r{rank}.h5'
    tt_model.save(filename, save_format='h5')

100%|██████████| 18/18 [03:06<00:00, 10.34s/it]


In [17]:
for rank in tqdm(tt_ranks):
    tt_model = replace_dense3(model, shape, rank)
    filename = f'./models/ds_sensitivity/ds_dense3_r{rank}.h5'
    tt_model.save(filename, save_format='h5')

100%|██████████| 18/18 [09:01<00:00, 30.07s/it]


In [18]:
for rank in tqdm(tt_ranks):
    tt_model = replace_lstm(model, shape, rank, rank)
    filename = f'./models/ds_sensitivity/ds_lstm_r{rank}.h5'
    tt_model.save(filename, save_format='h5')

  0%|          | 0/18 [00:00<?, ?it/s]


InternalError: tensorflow/core/kernels/cuda_solvers.cc:686: cuSolverDN call failed with status =3 [Op:Svd]

> [0;32m<string>[0m(3)[0;36mraise_from[0;34m()[0m



ipdb>  q


In [19]:
for rank in tqdm(tt_ranks):
    tt_model = replace_dense4(model, shape, rank)
    filename = f'./models/ds_sensitivity/ds_dense4_r{rank}.h5'
    tt_model.save(filename, save_format='h5')

100%|██████████| 18/18 [08:56<00:00, 29.80s/it]


In [None]:
ranks, wers_fix, cers_fix = get_accuracy_by_rank(dev_dataset, './models/tt_ds_uniform_fix_r{}.h5', tt_ranks)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(ranks, ref_time / times, label='speedup')
ax2 = ax.twinx()
ax2.plot(r, wer_ref - wers_fix, ls='-', label='WER')
ax2.plot(r, cer_ref - cers_fix, ls=':', label='CER')
# r, acc_ref - acc_direct
ax.set_xlabel('rank')
ax.set_ylabel('speedup')
ax2.set_ylabel('accuracy drop')

fig.suptitle('Network execution time and accuracy as function of rank')

## Accuracy with initializing all dense layers but tuning LSTM

In [17]:
tt_ranks = np.arange(2, 32, 1)
shape = ((16, 2, 2, 2, 16), (16, 2, 2, 2, 16))

In [18]:
dev_dataset = asr.dataset.Audio.from_csv('./data/dev-clean-index.csv', batch_size=8, use_filesizes=True, relative_paths=False)

In [19]:
def get_tt_model_initdense(model, shape, dense_ranks, lstm_rank, lstm_r_rank):
    replacement_config = {
        'dense_2': {
            'config': {
                'name': 'tt_dense_2',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': dense_ranks[0],
            },
        },
       'dense_3': {
            'config': {
                'name': 'tt_dense_3',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': dense_ranks[1],
            },
        },
        'lstm': {
            'cell_type': 'FusedTTLSTMCell',
            'config': {
                'name': 'tt_fusedlstm_0',
                'input_dims': shape[0],
                'output_dims': shape[1],
                'tt_rank': lstm_rank,
                'recurrent_tt_rank': lstm_r_rank,
            },
            'init': {
                'kernel': {
                    'max_tt_rank': lstm_rank,
                    'epsilon': 0.0
                },
                'recurrent': {
                    'max_tt_rank': lstm_r_rank,
                    'epsilon': 0.0
                }
            }
        },
        'dense_4': {
            'config': {
                'name': 'tt_dense0',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': dense_ranks[2],
            },
        },
    }
  
    new_model = transform(model, replacement_config, from_layer=8)
    new_model.trainable = True
     
    return new_model

In [20]:
for rank in tqdm(tt_ranks):
    tt_model = get_tt_model_initdense(model, shape, [rank, rank, rank], rank, rank)
    filename = f'./models/tt_ds_uniform_initdense_r{rank}.h5'
    tt_model.save(filename, save_format='h5')

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




In [None]:
ranks, wers_initdense, cers_initdense = get_accuracy_by_rank(dev_dataset, './models/tt_ds_uniform_initdense_r{}.h5', tt_ranks)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(ranks, ref_time / times, label='speedup')
ax2 = ax.twinx()
ax2.plot(r, wer_ref - wers_initdense, ls='-', label='WER')
ax2.plot(r, cer_ref - cers_initdense, ls=':', label='CER')
# r, acc_ref - acc_direct
ax.set_xlabel('rank')
ax.set_ylabel('speedup')
ax2.set_ylabel('accuracy drop')

fig.suptitle('Network execution time and accuracy as function of rank')

## Accuracy with initializing all layers

In [24]:
tt_ranks = np.arange(2, 32, 1)
shape = ((16, 2, 2, 2, 16), (16, 2, 2, 2, 16))

In [25]:
dev_dataset = asr.dataset.Audio.from_csv('./data/dev-clean-index.csv', batch_size=8, use_filesizes=True, relative_paths=False)

In [26]:
def get_tt_model_initall(model, shape, dense_ranks, lstm_rank, lstm_r_rank):
    replacement_config = {
        'dense_2': {
            'config': {
                'name': 'tt_dense_2',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': dense_ranks[0],
            },
        },
       'dense_3': {
            'config': {
                'name': 'tt_dense_3',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': dense_ranks[1],
            },
        },
        'lstm': {
            'cell_type': 'FusedTTLSTMCell',
            'config': {
                'name': 'tt_fusedlstm_0',
                'input_dims': shape[0],
                'output_dims': shape[1],
                'tt_rank': lstm_rank,
                'recurrent_tt_rank': lstm_r_rank,
            },
        },
        'dense_4': {
            'config': {
                'name': 'tt_dense0',
                'input_dims': shape[0],
                'output_dims' : shape[1],
                'activation': 'linear',
                'tt_rank': dense_ranks[2],
            },
        },
    }
  
    new_model = transform(model, replacement_config, from_layer=8)
    new_model.trainable = True
     
    return new_model

In [27]:
for rank in tqdm(tt_ranks):
    tt_model = get_tt_model_initall(model, shape, [rank, rank, rank], rank, rank)
    filename = f'./models/tt_ds_uniform_initall_r{rank}.h5'
    tt_model.save(filename, save_format='h5')

HBox(children=(FloatProgress(value=0.0, max=30.0), HTML(value='')))




In [None]:
ranks, wers_initall, cers_initall = get_accuracy_by_rank(dev_dataset, './models/tt_ds_uniform_initdense_r{}.h5', tt_ranks)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(5, 5))

ax.plot(ranks, ref_time / times, label='speedup')
ax2 = ax.twinx()
ax2.plot(r, wer_ref - wers_initall, ls='-', label='WER')
ax2.plot(r, cer_ref - cers_initall, ls=':', label='CER')
# r, acc_ref - acc_direct
ax.set_xlabel('rank')
ax.set_ylabel('speedup')
ax2.set_ylabel('accuracy drop')

fig.suptitle('Network execution time and accuracy as function of rank')

2.048

array([[4.17022005e-01, 7.20324493e-01, 1.14374817e-04]])

(1, 3)