Merge pull request #179 from delphieritas/patch-29

typo fixing, adding detailed steps for sample dataset downloading, Tensorflow version fixing
nusdbsystem · Mar 11, 2021 · c86e142 · c86e142
2 parents 4bfbe59 + 43de768
commit c86e142
Show file tree

Hide file tree

Showing 4 changed files with 68 additions and 6 deletions.
diff --git a/examples/models/speech_recognition/README.md b/examples/models/speech_recognition/README.md
@@ -10,6 +10,7 @@ which works for the *English* language. You'll need to first download this model
 ```
 bash examples/models/speech_recognition/tfdeepspeech/download_lm.sh
 bash examples/models/speech_recognition/tfdeepspeech/download_trie.sh
+cp examples/models/speech_recognition/tfdeepspeech/alphabet.txt tfdeepspeech/alphabet.txt
 wget https://github.com/mozilla/DeepSpeech/releases/download/v0.7.3/deepspeech-0.7.3-models.scorer -P <model_root_directory>/tfdeepspeech
 ```
 
@@ -25,15 +26,15 @@ If you wish to generate your own language models and trie files instead, or wish
 
 ### Generating Language Models
 
-The TfDeepSpeech model requires a binary n-gram language model compiled by `kenlm` to make predictions. Follow the steps in the example below to generate a LibriSpeech language model for English language:
+The TfDeepSpeech model requires a binary n-gram language model compiled by `kenlm` to make predictions. You can simple download pre-built lm.binary, alphabet.txt and trie files using above stpes, or should you in need to generate your own language model, please follow the steps in the example below to generate a LibriSpeech language model for English language:
 
 1. Download the required txt.gz by running the python script
 
     ```sh 
     python examples/models/speech_recognition/tfdeepspeech/download_lm_txt.py
     ```
 
-1. Install dependencies
+1. Install dependencies for building language model
 
     ```sh
     sudo apt-get install build-essential libboost-all-dev cmake zlib1g-dev libbz2-dev liblzma-dev
@@ -77,7 +78,7 @@ The TfDeepSpeech model requires a binary n-gram language model compiled by `kenl
 
 ### Generating Trie 
 
-See documentation on [DeepSpeech Git Repo](https://github.com/mozilla/DeepSpeech/tree/master/native_client) to generate the trie for your language model. Follow the steps up to **Compile libdeepspeech.so & generate_trie** section. The generated binaries will be saved to `bazel-bin/native-client/`.
+See documentation on [DeepSpeech Git Repo](https://github.com/mozilla/DeepSpeech/tree/master/native_client) to generate the trie for your language model. You can simple download pre-built lm.binary, alphabet.txt and trie files using above stpes, or should you in need to generate your own language model, please follow the steps below up to **Compile libdeepspeech.so & generate_trie** section. The generated binaries will be saved to `bazel-bin/native-client/`.
 
 Remember to modify the `alphabet.txt` file if you are training TfDeepSpeech on languages other than English.
 
@@ -90,3 +91,64 @@ bazel-bin/native-clinet/generate_trie ../rafiki/examples/datasets/speech_recogni
 The `trie` file is now in the data directory.
 
 *Note: The `generate_trie` binaries are subject to updates by the DeepSpeech team. If you find mismatch of trie file version, update the version of ctc_decoder package by amending the `VERSION` variable in `examples/models/speech_recognition/utils/taskcluster.py`.*
+
+
+### Test with Sample Dataset
+
+Run
+
+    ```sh
+    python examples/datasets/audio_files/load_sample_ldc93s1.py
+    python examples/datasets/audio_files/load_librispeech.py 
+    ```
+to download the sample and training datasets.
+
+### Run Test with Sample Dataset
+
+Run the below script to install dependencies for model in the host server environment (e.g. docker container)
+
+    ```sh
+    pip install -U pip \
+    && pip install -r examples/models/speech_recognition/requirements.txt
+    ```
+
+Use Python API to create model, pls run
+
+    ```python
+    import os
+    from singa_auto.client import Client
+    from singa_auto.constants import BudgetOption, ModelDependency
+
+    # change localhost address and port number accordingly 
+    # to conform with settings in web/src/HTTPconfig.js, scripts/docker_swarm/.env.sh, scripts/.base_env.sh
+    client = Client(admin_host='localhost', admin_port=3000) 
+    client.login(email='<USER_EMAIL>', password='<USER_PASSWORD>')
+
+    task = 'SPEECH_RECOGNITION'
+
+    # if nessacery, you can change into other dataset
+    data_dir = 'data/libri'
+    train_dataset_path = os.path.join(data_dir, 'dev-clean.zip') 
+
+    created_model=client.create_model(name='<MODEL_NAME>',
+        task='SPEECH_RECOGNITION',
+        model_file_path='examples/models/speech_recognition/TfDeepSpeech.py',
+        model_class='TfDeepSpeech',model_preload_file_path ='examples/models/speech_recognition/TfDeepSpeech.py',
+        dependencies={"ds_ctcdecoder":"0.6.1", "tensorflow":'1.12.0', })
+
+    budget = {BudgetOption.TIME_HOURS: 0.5, BudgetOption.GPU_COUNT: 0, BudgetOption.MODEL_TRIAL_COUNT: 1}
+
+    # to create a inference job with speech_recognition model
+    client.create_inference_job_by_checkpoint(model_name= created_model['name'], budget= budget)
+
+    # to obtain the predictor_host
+    client.get_running_inference_job(app=created_model['name'])
+
+    import json
+    import requests
+    data = 'data/ldc93s1/ldc93s1/LDC93S1.wav'
+    res = requests.post('http://{}'.format(<PREDICTOR_HOST>), json=data)
+
+    # to print out the prediction result
+    print(res.text)
+    ```
diff --git a/examples/models/speech_recognition/TfDeepSpeech.py b/examples/models/speech_recognition/TfDeepSpeech.py
@@ -1211,7 +1211,7 @@ def predict(self, queries, n_steps=16):
 
         predictions = []
         for query in queries:
-            wav_bytes = tf.read_file(query)
+            wav_bytes = base64.b64decode(query.encode('utf-8'))
             features, features_len = self.audiofile_to_features(wav_bytes)
 
             # Add batch dimension

diff --git a/examples/models/speech_recognition/requirements.txt b/examples/models/speech_recognition/requirements.txt
@@ -1,4 +1,4 @@
-tensorflow==1.15.5
+tensorflow==1.12.0
 sox
 scikit-optimize
 ds-ctcdecoder==0.6.1

diff --git a/singa_auto/client/client.py b/singa_auto/client/client.py
@@ -258,7 +258,7 @@ def create_model(self,
         :param model_class: The name of the model class inside the Python file. This class should implement :class:`singa_auto.model.BaseModel`
         :param dependencies: List of Python dependencies & their versions
         :param access_right: Model access right
-        :param model_preload_file_path: pretrained mdoel file
+        :param model_preload_file_path: pretrained model file
         :param docker_image: A custom Docker image that extends ``singa_auto/singa_auto_worker``, publicly available on Docker Hub.
         :returns: Created model as dictionary