diff --git a/examples/models/speech_recognition/README.md b/examples/models/speech_recognition/README.md index be2fd251..3f9cd39a 100644 --- a/examples/models/speech_recognition/README.md +++ b/examples/models/speech_recognition/README.md @@ -10,6 +10,7 @@ which works for the *English* language. You'll need to first download this model ``` bash examples/models/speech_recognition/tfdeepspeech/download_lm.sh bash examples/models/speech_recognition/tfdeepspeech/download_trie.sh +cp examples/models/speech_recognition/tfdeepspeech/alphabet.txt tfdeepspeech/alphabet.txt wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer -P /tfdeepspeech ``` @@ -25,7 +26,7 @@ If you wish to generate your own language models and trie files instead, or wish ### Generating Language Models -The TfDeepSpeech model requires a binary n-gram language model compiled by `kenlm` to make predictions. Follow the steps in the example below to generate a LibriSpeech language model for English language: +The TfDeepSpeech model requires a binary n-gram language model compiled by `kenlm` to make predictions. You can simple download pre-built lm.binary, alphabet.txt and trie files using above stpes, or should you in need to generate your own language model, please follow the steps in the example below to generate a LibriSpeech language model for English language: 1. Download the required txt.gz by running the python script @@ -33,7 +34,7 @@ The TfDeepSpeech model requires a binary n-gram language model compiled by `kenl python examples/models/speech_recognition/tfdeepspeech/download_lm_txt.py ``` -1. Install dependencies +1. Install dependencies for building language model ```sh sudo apt-get install build-essential libboost-all-dev cmake zlib1g-dev libbz2-dev liblzma-dev @@ -77,7 +78,7 @@ The TfDeepSpeech model requires a binary n-gram language model compiled by `kenl ### Generating Trie -See documentation on [DeepSpeech Git Repo](https://github.com/mozilla/DeepSpeech/tree/master/native_client) to generate the trie for your language model. Follow the steps up to **Compile libdeepspeech.so & generate_trie** section. The generated binaries will be saved to `bazel-bin/native-client/`. +See documentation on [DeepSpeech Git Repo](https://github.com/mozilla/DeepSpeech/tree/master/native_client) to generate the trie for your language model. You can simple download pre-built lm.binary, alphabet.txt and trie files using above stpes, or should you in need to generate your own language model, please follow the steps below up to **Compile libdeepspeech.so & generate_trie** section. The generated binaries will be saved to `bazel-bin/native-client/`. Remember to modify the `alphabet.txt` file if you are training TfDeepSpeech on languages other than English. @@ -90,3 +91,64 @@ bazel-bin/native-clinet/generate_trie ../rafiki/examples/datasets/speech_recogni The `trie` file is now in the data directory. *Note: The `generate_trie` binaries are subject to updates by the DeepSpeech team. If you find mismatch of trie file version, update the version of ctc_decoder package by amending the `VERSION` variable in `examples/models/speech_recognition/utils/taskcluster.py`.* + + +### Test with Sample Dataset + +Run + + ```sh + python examples/datasets/audio_files/load_sample_ldc93s1.py + python examples/datasets/audio_files/load_librispeech.py + ``` +to download the sample and training datasets. + +### Run Test with Sample Dataset + +Run the below script to install dependencies for model in the host server environment (e.g. docker container) + + ```sh + pip install -U pip \ + && pip install -r examples/models/speech_recognition/requirements.txt + ``` + +Use Python API to create model, pls run + + ```python + import os + from singa_auto.client import Client + from singa_auto.constants import BudgetOption, ModelDependency + + # change localhost address and port number accordingly + # to conform with settings in web/src/HTTPconfig.js, scripts/docker_swarm/.env.sh, scripts/.base_env.sh + client = Client(admin_host='localhost', admin_port=3000) + client.login(email='', password='') + + task = 'SPEECH_RECOGNITION' + + # if nessacery, you can change into other dataset + data_dir = 'data/libri' + train_dataset_path = os.path.join(data_dir, 'dev-clean.zip') + + created_model=client.create_model(name='', + task='SPEECH_RECOGNITION', + model_file_path='examples/models/speech_recognition/TfDeepSpeech.py', + model_class='TfDeepSpeech',model_preload_file_path ='examples/models/speech_recognition/TfDeepSpeech.py', + dependencies={"ds_ctcdecoder":"0.6.1", "tensorflow":'1.12.0', }) + + budget = {BudgetOption.TIME_HOURS: 0.5, BudgetOption.GPU_COUNT: 0, BudgetOption.MODEL_TRIAL_COUNT: 1} + + # to create a inference job with speech_recognition model + client.create_inference_job_by_checkpoint(model_name= created_model['name'], budget= budget) + + # to obtain the predictor_host + client.get_running_inference_job(app=created_model['name']) + + import json + import requests + data = 'data/ldc93s1/ldc93s1/LDC93S1.wav' + res = requests.post('http://{}'.format(), json=data) + + # to print out the prediction result + print(res.text) + ``` diff --git a/examples/models/speech_recognition/TfDeepSpeech.py b/examples/models/speech_recognition/TfDeepSpeech.py index 43c0c866..3f0cee26 100644 --- a/examples/models/speech_recognition/TfDeepSpeech.py +++ b/examples/models/speech_recognition/TfDeepSpeech.py @@ -1211,7 +1211,7 @@ def predict(self, queries, n_steps=16): predictions = [] for query in queries: - wav_bytes = tf.read_file(query) + wav_bytes = base64.b64decode(query.encode('utf-8')) features, features_len = self.audiofile_to_features(wav_bytes) # Add batch dimension diff --git a/examples/models/speech_recognition/requirements.txt b/examples/models/speech_recognition/requirements.txt index 9d1d2987..2e04b6a7 100644 --- a/examples/models/speech_recognition/requirements.txt +++ b/examples/models/speech_recognition/requirements.txt @@ -1,4 +1,4 @@ -tensorflow==1.15.5 +tensorflow==1.12.0 sox scikit-optimize ds-ctcdecoder==0.6.1 diff --git a/singa_auto/client/client.py b/singa_auto/client/client.py index c5adf77d..2aff82ae 100644 --- a/singa_auto/client/client.py +++ b/singa_auto/client/client.py @@ -258,7 +258,7 @@ def create_model(self, :param model_class: The name of the model class inside the Python file. This class should implement :class:`singa_auto.model.BaseModel` :param dependencies: List of Python dependencies & their versions :param access_right: Model access right - :param model_preload_file_path: pretrained mdoel file + :param model_preload_file_path: pretrained model file :param docker_image: A custom Docker image that extends ``singa_auto/singa_auto_worker``, publicly available on Docker Hub. :returns: Created model as dictionary