In [None]:
!git clone https://github.com/indra622/tiny_sgspeech

In [None]:
!wget https://www.openslr.org/resources/12/train-clean-100.tar.gz
!wget https://www.openslr.org/resources/12/dev-clean.tar.gz

In [None]:
!tar -xvzf train-clean-100.tar.gz && rm train-clean-100.tar.gz
!tar -xvzf dev-clean.tar.gz && rm dev-clean.tar.gz

In [None]:
!python tiny_sgspeech/create_librispeech_trans.py --dir /content/LibriSpeech/train-clean-100 /content/LibriSpeech/train-clean-100/transcripts.tsv
!python tiny_sgspeech/create_librispeech_trans.py --dir /content/LibriSpeech/dev-clean /content/LibriSpeech/dev-clean/transcripts.tsv


In [None]:
!cd tiny_sgspeech && pip install .

In [None]:
from tiny_sgspeech.sgspeech.utils import setup_environment, setup_strategy

setup_environment()
strategy = setup_strategy([0])

In [None]:
from tiny_sgspeech.sgspeech.configs.config import Config
from tiny_sgspeech.sgspeech.featurizers.speech_featurizer import NumpySpeechFeaturizer
from tiny_sgspeech.sgspeech.featurizers.text_featurizer import CharFeaturizer

config = Config('/content/tiny_sgspeech/config.yml')
speech_featurizer = NumpySpeechFeaturizer(config.speech_config)
text_featurizer = CharFeaturizer(config.decoder_config)

In [None]:
from tiny_sgspeech.sgspeech.datasets.speech_dataset import SpeechSliceDataset
from tiny_sgspeech.sgspeech.featurizers.text_featurizer import CharFeaturizer

train_dataset = SpeechSliceDataset(
    speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,
    **vars(config.learning_config.train_dataset_config)
)
eval_dataset = SpeechSliceDataset(
    speech_featurizer=speech_featurizer, text_featurizer=text_featurizer,
    **vars(config.learning_config.eval_dataset_config)
)

In [None]:
td = next(iter(train_dataset.create(1)))
speech_link = td[0]
speech_feature = td[1]
speech_duration = td[2]
transcription = td[3]
transcription_length = td[4]

In [None]:
import IPython.display as ipd
import tensorflow as tf

speech_path = speech_link[0].numpy().decode('utf-8')
ipd.Audio(speech_path)

In [None]:
transcription

In [None]:
text_featurizer.iextract(transcription)

In [None]:
import librosa
import os

raw_audio, _ = librosa.load(os.path.expanduser(speech_path), sr=16000, mono=True)


In [None]:
print(raw_audio.shape)
print(speech_feature.shape)

In [None]:
from tiny_sgspeech.sgspeech.runners.transducer_runners import TransducerTrainer


conformer_trainer = TransducerTrainer(
    config=config.learning_config.running_config,
    text_featurizer=text_featurizer, strategy=strategy
)

In [None]:
from tiny_sgspeech.sgspeech.models.conformer import Conformer

cf = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)

In [None]:
cf._build(speech_featurizer.shape)

In [None]:
cf.summary(line_length=150)

In [None]:
from tiny_sgspeech.sgspeech.optimizers.schedules import TransformerSchedule
import tensorflow as tf
import math

with conformer_trainer.strategy.scope():
    # build model
    conformer = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
    conformer._build(speech_featurizer.shape)

    optimizer_config = config.learning_config.optimizer_config
    optimizer = tf.keras.optimizers.Adam(
        TransformerSchedule(
            d_model=conformer.dmodel,
            warmup_steps=optimizer_config["warmup_steps"],
            max_lr=(0.05 / math.sqrt(conformer.dmodel))
        ),
        beta_1=optimizer_config["beta1"],
        beta_2=optimizer_config["beta2"],
        epsilon=optimizer_config["epsilon"]
    )


In [None]:
conformer_trainer.compile(model=conformer, optimizer=optimizer,
                          max_to_keep=10)

conformer_trainer.fit(train_dataset, eval_dataset, train_bs=2, eval_bs=1)

In [None]:
test_cf = Conformer(**config.model_config, vocabulary_size=text_featurizer.num_classes)
test_cf._build(speech_featurizer.shape)
test_cf.load_weights('/content/conformer.h5')
test_cf.summary(line_length=150)
test_cf.add_featurizers(speech_featurizer, text_featurizer)

In [None]:
from tiny_sgspeech.sgspeech.runners.base_runners import BaseTester

conformer_tester = BaseTester(
    config=config.learning_config.running_config,
    output_name='result'
)
conformer_tester.compile(test_cf)
conformer_tester.run(eval_dataset, batch_size=1)