In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"

os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [2]:

from sgspeech.configs.config import Config
from sgspeech.datasets.speech_dataset import SpeechSliceDataset
from sgspeech.featurizers.speech_featurizer import NumpySpeechFeaturizer
from sgspeech.featurizers.text_featurizer import CharFeaturizer

from sgspeech.models.ds2 import DeepSpeech2

In [3]:
config = Config('config.yml')
speech_featurizer = NumpySpeechFeaturizer(config.speech_config)
text_featurizer = CharFeaturizer(config.decoder_config)

# Training

In [None]:
from sgspeech.runners.ctc_runners import CTCTrainer

train_dataset = SpeechSliceDataset(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,**vars(config.learning_config.train_dataset_config))
eval_dataset = SpeechSliceDataset(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,**vars(config.learning_config.eval_dataset_config))

ctc_trainer = CTCTrainer(text_featurizer, config.learning_config.running_config)

with ctc_trainer.strategy.scope():
    ds2_model = DeepSpeech2(**config.model_config, vocabulary_size=text_featurizer.num_classes)
    ds2_model._build(speech_featurizer.shape)
    ds2_model.summary(line_length=120)
# Compile
ctc_trainer.compile(ds2_model, config.learning_config.optimizer_config,
                    max_to_keep=args.max_ckpts)

ctc_trainer.fit(train_dataset, eval_dataset, train_bs=args.tbs, eval_bs=args.ebs)

# Testing

In [4]:
from sgspeech.runners.base_runners import BaseTester

test_dataset = SpeechSliceDataset(speech_featurizer=speech_featurizer, text_featurizer=text_featurizer, **vars(config.learning_config.test_dataset_config))

ds2_model = DeepSpeech2(**config.model_config, vocabulary_size=text_featurizer.num_classes)
ds2_model._build(speech_featurizer.shape)
ds2_model.load_weights('../deepspeech2_tfasr/latest.h5')

ds2_model.add_featurizers(speech_featurizer, text_featurizer)

ds2_model.summary(line_length=120)


Model: "deepspeech2_conv_module"
________________________________________________________________________________________________________________________
Layer (type)                                          Output Shape                                    Param #           
deepspeech2_conv_module_block_0 (ConvBlock)           multiple                                        14592             
________________________________________________________________________________________________________________________
deepspeech2_conv_module_block_1 (ConvBlock)           multiple                                        236704            
________________________________________________________________________________________________________________________
deepspeech2_conv_module_block_2 (ConvBlock)           multiple                                        372192            
________________________________________________________________________________________________________________________

In [5]:
ctc_tester = BaseTester(
    config=config.learning_config.running_config,
    output_name='result'
)
ctc_tester.compile(ds2_model)


In [6]:
ctc_tester.run(test_dataset)

Read files


[Test]: 100%|██████████| 2619/2619 [04:31<00:00,  9.64batch/s]



> Calculating evaluation metrics ...
Test results:
G_WER =  23.3716316
G_CER =  100
B_WER =  100
B_CER =  100
BLM_WER =  100
BLM_CER =  100


In [11]:
ds2_model.get_layer('deepspeech2_fc_module').get_layer('deepspeech2_fc_module_fc').output

AttributeError: Layer deepspeech2_fc_module_fc has no inbound nodes.

In [18]:
ds2_model.get_layer('deepspeech2_fc_module').input

AttributeError: Layer deepspeech2_fc_module is not connected, no input to return.

In [31]:
test_iter = iter(test_dataset.create(1))

Read files


In [33]:
batch = next(test_iter)

In [34]:
batch

(<tf.Tensor: shape=(1,), dtype=string, numpy=
 array([b'/home/CORPUS/LibriSpeech/test-clean/6930/75918/6930-75918-0001.flac'],
       dtype=object)>,
 <tf.Tensor: shape=(1, 1420, 80, 1), dtype=float32, numpy=
 array([[[[-0.10527746],
          [-0.8822928 ],
          [-1.469422  ],
          ...,
          [-0.14052185],
          [-0.39716178],
          [-0.8145599 ]],
 
         [[-0.3966327 ],
          [-0.7339688 ],
          [-1.5704011 ],
          ...,
          [-0.09846278],
          [-0.570096  ],
          [-0.8241293 ]],
 
         [[-0.613629  ],
          [-1.2102872 ],
          [-1.7619019 ],
          ...,
          [-0.27932546],
          [-0.3360485 ],
          [-0.4656605 ]],
 
         ...,
 
         [[-0.39576238],
          [-0.71845514],
          [-1.4273047 ],
          ...,
          [-1.7072636 ],
          [-1.6666433 ],
          [-1.7108108 ]],
 
         [[-0.48675725],
          [-0.84964746],
          [-1.5051823 ],
          ...,
          [-1