# Red LAS: Listen, Attend and Spell

## Puesta a punto



### Instalación de paquetes necesarios







In [0]:
!pip3 install python_speech_features

### Preparación del entorno para poder correr mis módulos importados.

In [0]:
from google.colab import drive
drive.mount('/content/drive/')

import sys
sys.path.append('drive/My Drive/Tesis')
sys.path.append('drive/My Drive/Tesis/repo')

from tensorflow.python.client import device_lib
device_lib.list_local_devices()

## Entrenamiento

### Importación de librerías necesarias

In [0]:
import tensorflow as tf
from src.utils.Database import Database
from src.utils.LASLabel import LASLabel
from src.utils.ProjectData import ProjectData
import time
from src.neural_network.LAS.LASNetData import LASNetData
import pprint
from src.Estimators.las.model_fn import model_fn
from src.Estimators.las.data_input_fn import data_input_fn
import shutil

### Definición de los hiperparámetros de la red



In [0]:
# Load project data
project_data = ProjectData()

network_data = LASNetData()
network_data.model_path = 'drive/My Drive/Tesis/repo/' + project_data.LAS_NET_MODEL_PATH
network_data.checkpoint_path = 'drive/My Drive/Tesis/repo/' + project_data.LAS_NET_CHECKPOINT_PATH
network_data.tensorboard_path = 'drive/My Drive/Tesis/repo/' + project_data.LAS_NET_TENSORBOARD_PATH

network_data.num_classes = LASLabel.num_classes
network_data.num_features = 494
network_data.num_embeddings = 0
network_data.sos_id = LASLabel.SOS_INDEX
network_data.eos_id = LASLabel.EOS_INDEX

network_data.beam_width = 0

network_data.num_dense_layers_1 = 2
network_data.num_units_1 = [400] * network_data.num_dense_layers_1
network_data.dense_activations_1 = [tf.nn.relu] * network_data.num_dense_layers_1
network_data.batch_normalization_1 = True
network_data.keep_prob_1 = [0.7] * network_data.num_dense_layers_1
network_data.kernel_init_1 = [tf.truncated_normal_initializer(mean=0, stddev=0.1)] * network_data.num_dense_layers_1
network_data.bias_init_1 = [tf.zeros_initializer()] * network_data.num_dense_layers_1

network_data.listener_num_layers = 1
network_data.listener_num_units = [256] * network_data.listener_num_layers
network_data.listener_activation_list = [tf.nn.tanh] * network_data.listener_num_layers
network_data.listener_keep_prob_list = [0.8] * network_data.listener_num_layers

network_data.num_dense_layers_2 = 1
network_data.num_units_2 = [300]
network_data.dense_activations_2 = [tf.nn.relu] * network_data.num_dense_layers_2
network_data.batch_normalization_2 = True
network_data.keep_prob_2 = [0.7] * network_data.num_dense_layers_2
network_data.kernel_init_2 = [tf.truncated_normal_initializer(mean=0, stddev=0.1)] * network_data.num_dense_layers_2
network_data.bias_init_2 = [tf.zeros_initializer()] * network_data.num_dense_layers_2

network_data.attention_type = 'luong'       # 'luong', 'bahdanau'
network_data.attention_num_layers = 1
network_data.attention_size = None
network_data.attention_units = 256
network_data.attention_activation = tf.nn.tanh
network_data.attention_keep_prob = 0.8

network_data.kernel_regularizer = 0.0
network_data.sampling_probability = 0.2

network_data.optimizer = tf.train.AdamOptimizer(learning_rate=0.001)

pprint.pprint(network_data.as_dict())

### Configuración de entrenamiento

In [0]:
model_dir = 'drive/My Drive/Tesis/repo/out/las_net/estimator/'

base_path = 'drive/My Drive/Tesis/repo/data/tfrecords/librispeech/las/ds_dataset/'

index_files = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
train_files = ['train_database_{}.tfrecords'.format(item) for item in index_files]
val_files = ['test_database_1.tfrecords', 'test_database_2.tfrecords']
test_files = ['test_database_2.tfrecords']

train_files = list(map(lambda x: base_path + x, train_files))
val_files = list(map(lambda x: base_path + x, val_files))
test_files = list(map(lambda x: base_path + x, test_files))

train_batch_size = 100
train_epochs = 1000

validate_batch_size = 100

restore_run = True

config = tf.estimator.RunConfig(
    model_dir=model_dir,
    save_checkpoints_steps=100,
    save_summary_steps=100,
    log_step_count_steps=100)


model = tf.estimator.Estimator(
    model_fn=model_fn,
    params=network_data.as_dict(),
    config=config
)

tf.logging.set_verbosity(tf.logging.INFO)

if not restore_run:
    shutil.rmtree(model_dir)


### Entrenamiento

In [0]:
model.train(
    input_fn=lambda: data_input_fn(
        filenames=train_files,
        batch_size=train_batch_size,
        parse_fn=Database.tfrecord_parse_dense_fn,
        shuffle_buffer=10,
        num_features=network_data.num_features,
        num_epochs=train_epochs,
        eos_id=LASLabel.EOS_INDEX,
        sos_id=LASLabel.SOS_INDEX
        )
)


### Validación


In [0]:
model.evaluate(
    input_fn=lambda: data_input_fn(
        filenames=train_files,
        batch_size=validate_batch_size,
        parse_fn=Database.tfrecord_parse_dense_fn,
        shuffle_buffer=1,
        num_features=network_data.num_features,
        eos_id=LASLabel.EOS_INDEX,
        sos_id=LASLabel.SOS_INDEX
    )
)

### Testeo

In [0]:
num_tests = 10

predictions = model.predict(
    input_fn=lambda: data_input_fn(
        filenames=test_files,
        batch_size=1,
        parse_fn=Database.tfrecord_parse_dense_fn,
        shuffle_buffer=1,
        num_features=network_data.num_features,
        eos_id=LASLabel.EOS_INDEX,
        sos_id=LASLabel.SOS_INDEX
    )
)
count = 0
for item in predictions:
  pred = item['sample_ids']
  print("Predicted: " + LASLabel.from_index(pred))
  count += 1
  if count >= num_tests:
    break