<a href="https://colab.research.google.com/github/toonday/CarND-Advanced-Lane-Lines/blob/master/notebooks/pre-deployment/speech2cmd_covid_faq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **PRE-DEPLOYMENT: speech2cmd-covid_faq**

In [1]:
import os
import pkg_resources

install_commands = [
  "pip uninstall torchvision -y",
  "pip uninstall torch -y",
  "pip uninstall torchtext -y",
  "pip uninstall torchaudio -y",
  #"pip install torchvision==0.8.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html",
  #"pip install torch==1.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html",
  #"pip install torchvision==0.8.1",
  #"pip install torch==1.7.0",
  "pip install torchvision==0.8.1+cpu -f https://download.pytorch.org/whl/torch_stable.html",
  "pip install torch==1.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html",
  "pip install torchtext==0.8.0",
  "pip install torchaudio==0.7.0",
  "pip install sentencepiece==0.1.91",
  "pip install urllib3==1.25.11",
  "pip install onnxruntime==1.5.2",
  "pip install frozendict==1.2",
  "pip install braceexpand==0.1.6",
  "pip install editdistance==0.5.3",
  "pip install inflect==4.1.0",
  "pip install kaldi-io==0.9.4",
  "pip install librosa==0.8.0",
  "pip install marshmallow==3.9.1",
  "pip install packaging==20.4",
  "pip install num2words==0.5.10",
  "pip install ruamel.yaml==0.16.12",
  "pip install soundfile==0.10.3.post1",
  "pip install sox==1.4.1",
  "pip install torch-stft==0.1.4",
  "pip install unidecode==1.1.1",
  "pip install webdataset==0.1.40",
  "pip install kaldi-python-io==1.1.3",
  "pip install scipy==1.5.4",
  "pip install pandas==1.1.4",
  "pip install g2p_en==2.1.0",
  "pip install nemo_toolkit==1.0.0b3",
  #"pip install wget",
  #"pip install gspread oauth2client df2gspread",
]

required_pkgs = {'nemo-toolkit'}
installed_pkgs = {pkg.key for pkg in pkg_resources.working_set}
missing_pkgs = required_pkgs - installed_pkgs

if missing_pkgs:
  print("E no dey")
  for i in range(len(install_commands)):
    os.system(install_commands[i])

In [None]:
# restart runtime

In [2]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"]="/content/google-api-key-test-model-deploy.json"

In [3]:
%%bash 
echo $GOOGLE_APPLICATION_CREDENTIALS

/content/google-api-key-test-model-deploy.json


In [68]:
from nemo.utils import logging as nemo_logging
nemo_logging.setLevel(nemo_logging.ERROR)

# Import general, miscellaneous and utility libraries
import os
import shutil
import json
import logging
from google.cloud import storage

import librosa

import torch
import nemo.collections.asr as nemo_asr
from difflib import get_close_matches, SequenceMatcher


# setup cloud storage varaibles
storage_client = storage.Client()
bucket_name = "test-model-deploy.appspot.com"
storage_bucket = storage_client.bucket(bucket_name)
cstorage_models_dpath = 'models/'
cstorage_config_dpath = 'vai-service-config/voiceai-covid-faq/'

# Set up paths for local directory
#service_dpath = '/tmp/asr_service/' # for gcf deployment
service_dpath = '/content/' # for colab testing
config_dpath = service_dpath + 'config/'
models_dpath = service_dpath + 'models/'
audio_dpath = service_dpath + 'usr_data/'

curr_state = None
lang_code = None
audio_fname = None

service_option = None

config_fnames = {
  "models": "map-models.json",
  "cmd2digit": "map-cmd2digit.json",
  "str2cmd": "map-str2cmd.json",
  "thresh": "map-thresh.json"
}
config_vars = {
  "models": {},
  "cmd2digit": {},
  "str2cmd": {},
  "thresh": {}
}

# ===================================
# Functions to run model inference
# ===================================
def build_manifest(manifest_path, file_path, default_cmd):
  with open(manifest_path, 'w') as fout:
    duration = round(librosa.core.get_duration(filename=file_path), 2)

    # Write the metadata to the manifest
    metadata = {
        "audio_filepath": file_path,
        "duration": duration,
        "text": default_cmd
    }
    json.dump(metadata, fout)
    fout.write('\n')


@torch.no_grad()
def get_pred_str(model, dataloader, logprobs=False):
  hypotheses = []

  for batch in dataloader:
    audio_signal, audio_signal_len, labels, labels_len = batch
    logits, logits_len, greedy_predictions = model.forward(
        input_signal=audio_signal, input_signal_length=audio_signal_len
    )
    if logprobs:
        # dump log probs per file
        for idx in range(logits.shape[0]):
            hypotheses.append(logits[idx][: logits_len[idx]])
    else:
        hypotheses += model._wer.ctc_decoder_predictions_tensor(greedy_predictions)
 
  return hypotheses


# ===================================
# Functions to speech command recognition service
# ===================================
def download_avail_models():
  global storage_bucket
  global models_dpath
  global cstorage_models_dpath

  if not os.path.isdir(models_dpath):
    os.makedirs(models_dpath)

  blob_list = storage_bucket.list_blobs(prefix=cstorage_models_dpath)
  for blob in blob_list:
    if blob.name != cstorage_models_dpath:
      fname = blob.name.replace(cstorage_models_dpath, '')
      model_fpath = models_dpath + fname
      if not os.path.exists(model_fpath):
        blob.download_to_filename(model_fpath)


def clear_avail_models():
  if os.path.isdir(models_dpath):
    shutil.rmtree(models_dpath)


def setup_service_config_vars():
  global config_vars

  if not os.path.isdir(audio_dpath):
    os.makedirs(audio_dpath)

  if not os.path.isdir(config_dpath):
    os.makedirs(config_dpath)

  blob_list = storage_bucket.list_blobs(prefix=cstorage_config_dpath)
  for blob in blob_list:
    if blob.name != cstorage_config_dpath:
      fname = blob.name.replace(cstorage_config_dpath, '')
      fpath = config_dpath + fname
      if not os.path.exists(fpath):
        blob.download_to_filename(fpath)
  
  for fname in config_fnames.values():
    fpath = config_dpath + fname
    if not os.path.exists(fpath):
      assert False, "All service configuration files are needed for service to be functional. Missing file '{}'".format(fpath)
  
  for key in config_fnames.keys():
    fpath = config_dpath + config_fnames[key]
    with open(fpath) as f:
      config_vars[key] = json.load(f)
  
  # ensure that the fname specified for each model exists
  for key in config_vars["models"].keys():
    fpath = models_dpath + config_vars["models"][key]
    if not os.path.exists(fpath):
      assert False, "Specified model for language code '{}' does not exist. Missing file '{}'".format(key, fpath)


def clear_service_config_vars():
  global config_vars
  
  if os.path.isdir(config_dpath):
    shutil.rmtree(config_dpath)
  
  for key in config_vars.keys():
    config_vars[key] = {}


def speech_to_string():
  global models_dpath
  global audio_dpath
  global audio_fname

  model_fname = config_vars["models"][lang_code]
  model_fpath = models_dpath + model_fname
  loaded_asr_model = nemo_asr.models.EncDecCTCModel.restore_from(restore_path=model_fpath)

  audio_fpath = audio_dpath + audio_fname
  manifest_fpath = service_dpath + "manifest.json"
  build_manifest(manifest_fpath, audio_fpath, 'nothing')

  dl_config = None
  if 'test_ds' in loaded_asr_model.cfg.keys():
    dl_config = loaded_asr_model.cfg.test_ds
  else:
    dl_config = loaded_asr_model.cfg.validation_ds
  dl_config.manifest_filepath = manifest_fpath
  
  dl = loaded_asr_model._setup_dataloader_from_config(dl_config)
  dl.dataset._sample_rate = dl_config.sample_rate
  
  cpu_model = loaded_asr_model.cpu()
  cpu_model.eval()

  pred_str = get_pred_str(cpu_model, dl)[0]

  return pred_str


def string_to_command(pred_str, curr_state):
  class_options = config_vars["str2cmd"][curr_state][lang_code]
  conf_level = 0.0
  percentile_bins = [0.8, 0.6, 0.4, 0.2]
  
  # remove space between characters seems algorithm seems to work a little better for the intended use-case of auditory (phonetic) similarity comparison rather than the original design character based comparison
  options_aud = {}
  for option in class_options.keys():
    option_aud_str = option.replace(" ", "")
    options_aud[option_aud_str] = option
  
  closest_match = []
  pred_aud_str = pred_str.replace(" ", "")
  for i in range(len(percentile_bins)):
    closest_match = get_close_matches(pred_aud_str, options_aud, 1, percentile_bins[i])
    if len(closest_match) > 0:
      break

  # calculate similarity between predicted string and closest match string and use that as the confidence level
  conf_level = 0
  pred_cmd = None
  closest_match_aud_str = None if len(closest_match) <= 0 else closest_match[0]
  closest_match_str = None if closest_match_aud_str == None else options_aud[closest_match_aud_str]
  if closest_match_str != None:
    ratio1 = SequenceMatcher(None, pred_aud_str, closest_match_aud_str).ratio()
    ratio2 = SequenceMatcher(None, closest_match_aud_str, pred_aud_str).ratio()
    conf_level = (ratio1 + ratio2) / 2.0
    pred_cmd = class_options[closest_match_str]

  return pred_cmd, closest_match_str, conf_level, pred_aud_str, closest_match_aud_str


def command_to_digit(command, curr_state):
  digit = -1
  if command != None:
    digit = config_vars["cmd2digit"][curr_state][command]
  
  return digit


def process_client_json(json_data):
  global service_option
  global curr_state
  global lang_code
  global audio_fname

  status_obj = None

  if not 'service_option' in json_data.keys():
    status_obj = {
      "status": "error",
      "message": "service_option attribute is expected to be specified to this endpoint"
    }
    return status_obj
  service_option = json_data['service_option']

  # check if service_option has acceptable value and return error where appropriate
  if not service_option in ['init', 'clear', 'pred', 'pred_cache_audio']:
    status_obj = {
      "status": "error",
      "message": "expecting  received incorrect setting {} for service option".format(service_option)
    }
    return status_obj

  # no need to do further processing if user just wants to initialize or clear the service
  if service_option == 'init' or service_option == 'clear':
    return status_obj
  
  # check to ensure that service has been initialized
  if 0 in [len(config_vars["models"].keys()), len(config_vars["str2cmd"].keys()), len(config_vars["cmd2digit"].keys())]:
    status_obj = {
      "status": "error",
      "message": "speech2cmd service has not been initialized"
    }
    return status_obj
  
  if not 'curr_state' in json_data.keys():
    status_obj = {
      "status": "error",
      "message": "curr_state attribute is expected to be specified to this endpoint"
    }
    return status_obj
  curr_state = json_data['curr_state']
  
  if not 'lang_code' in json_data.keys():
    status_obj = {
      "status": "error",
      "message": "lang_code attribute is expected to be specified to this endpoint"
    }
    return status_obj
  lang_code = json_data['lang_code']
  
  # check to ensure lang_code specified by client is supported
  if lang_code not in config_vars["models"].keys():
    status_obj = {
      "status": "error",
      "message": "lang_code specified '{}' is not supported by available models".format(lang_code)
    }
    return status_obj
  
  audio_fname = json_data['audio_fname']

  return status_obj


def process_client_request(request):
  # get data fields from post command and check if there are any errors
  data = request.form.to_dict()

  if data['service_option'] in ['pred', 'pred_cache_audio']:
    # get and save wav audio file to local tmp directory from post command
    expected_num_of_files = 1
    item_list = request.files.to_dict().items()
    if len(item_list) != 1:
      status_obj = {
        "status": "error",
        "message": "expecting number of files to be {}, however, received {} instead".format(expected_num_of_files, len(item_list))
      }
      return status_obj
    
    for item in item_list:
      data['audio_fname'] = item[0]
      audio_fcontent = item[1]

      if not os.path.isdir(audio_dpath):
        os.makedirs(audio_dpath)
      audio_fpath = audio_dpath + data['audio_fname']
      audio_fcontent.save(audio_fpath)
  
  return process_client_json(data)


def handler_main(status_obj):
  global storage_bucket
  global service_option
  global lang_code
  global curr_state
  global audio_dpath
  global audio_fname
  
  return_headers = {
    'Content-Type': 'application/json',
    'Access-Control-Allow-Origin': '*'
  }
  if(status_obj):
    return json.dumps(status_obj), 200, return_headers

  if service_option == "init":
    download_avail_models()
    setup_service_config_vars()

    status_obj = {
      "status": "success",
      "message": "service successfully initialized"
    }
    return json.dumps(status_obj), 200, return_headers
  elif service_option == "clear":
    clear_avail_models()
    clear_service_config_vars()

    status_obj = {
      "status": "success",
      "message": "service files successfully cleared"
    }
    return json.dumps(status_obj), 200, return_headers
  elif service_option == "pred" or service_option == "pred_cache_audio":
    pred_str = speech_to_string()
    pred_cmd, match_str, confidence, pred_aud_str, match_aud_str = string_to_command(pred_str, curr_state)
    action_digit = command_to_digit(pred_cmd, curr_state)
    if confidence < config_vars["thresh"][lang_code]:
      action_digit = -1

    audio_fpath = audio_dpath + audio_fname
    if service_option == "pred_cache_audio":
      dest_blob_name = "data/usr_audio/" + audio_fname
      dest_blob = storage_bucket.blob(dest_blob_name)
      dest_blob.upload_from_filename(audio_fpath)
    
    # comment line below if testing in colab and not gcf
    # remove processed wav file from system
    #os.remove(audio_fpath)
    
    lang_code = None
    curr_state = None
    status_obj = {
      "status": "success",
      "message": "command prediction successfully made",
      "pred_str": pred_str,
      "match_str": match_str,
      "pred_cmd": pred_cmd,
      "confidence": confidence,
      "action_digit": action_digit,
      "pred_aud_str": pred_aud_str,
      "match_aud_str": match_aud_str
    }
    return json.dumps(status_obj), 200, return_headers


def handler_json(json_data):
  status_obj = process_client_json(json_data)
  
  return handler_main(status_obj)


def handler(request):
  status_obj = process_client_request(request)
  
  return handler_main(status_obj)



**RUNNING Speech2Cmd E2E TESTS**

In [69]:
print()
print(" =========== ")
print(" ... RUNNING Speech2Cmd E2E TESTS ... ")
print(" =========== ")
print()

results = []
expected_results = []

test_idx = 0


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "clear"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "success", 
  "message": "service files successfully cleared"
})


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "pred",
  "curr_state": "ASK_QUESTION",
  "lang_code": "yor",
  "audio_fname": "Recording #10.wav"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "error", 
  "message": "speech2cmd service has not been initialized"
})


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "init"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "success", 
  "message": "service successfully initialized"
})


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "pred",
  "curr_state": "ASK_QUESTION",
  "lang_code": "yor",
  "audio_fname": "Recording #10.wav"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "success", 
  "message": "command prediction successfully made"
})


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "pred",
  "curr_state": "ASK_LANG_SELECT",
  "lang_code": "eng",
  "audio_fname": "Recording #15.wav"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "success", 
  "message": "command prediction successfully made"
})


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "clear"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "success", 
  "message": "service files successfully cleared"
})


test_idx += 1
print()
print()
print("... TEST {} ...".format(test_idx))
print(" ------------------------------------ ")
res = handler_json({
  "service_option": "pred",
  "curr_state": "ASK_QUESTION",
  "lang_code": "yor",
  "audio_fname": "Recording #10.wav"
})
results.append(json.loads(res[0]))
expected_results.append({
  "status": "error", 
  "message": "speech2cmd service has not been initialized"
})



 ... RUNNING Speech2Cmd E2E TESTS ... 



... TEST 1 ...
 ------------------------------------ 


... TEST 2 ...
 ------------------------------------ 


... TEST 3 ...
 ------------------------------------ 


... TEST 4 ...
 ------------------------------------ 


... TEST 5 ...
 ------------------------------------ 


... TEST 6 ...
 ------------------------------------ 


... TEST 7 ...
 ------------------------------------ 


In [66]:

test_passed = True
for i in range(len(expected_results)):
  if expected_results[i]['status'] != results[i]['status'] or expected_results[i]['message'] != results[i]['message']:
    test_passed = False
    break

if test_passed:
  print("E2E Speech2Cmd test passed - All tests passed sequentially as expected")
else:
  print("E2E Speech2Cmd test failed - Test failed at idx: {} ".format(i))

E2E Speech2Cmd test passed - All tests passed sequentially as expected


In [70]:
print(json.dumps(results, indent=2, sort_keys=False))

[
  {
    "status": "success",
    "message": "service files successfully cleared"
  },
  {
    "status": "error",
    "message": "speech2cmd service has not been initialized"
  },
  {
    "status": "success",
    "message": "service successfully initialized"
  },
  {
    "status": "success",
    "message": "command prediction successfully made",
    "pred_str": "kinie cove nineteen",
    "match_str": "k IH n iH k oH f iH d n aH iH n t IH n",
    "pred_cmd": "basics",
    "confidence": 0.4,
    "action_digit": 1,
    "pred_aud_str": "kiniecovenineteen",
    "match_aud_str": "kIHniHkoHfiHdnaHiHntIHn"
  },
  {
    "status": "success",
    "message": "command prediction successfully made",
    "pred_str": "peeg in",
    "match_str": "p ih dz ih n",
    "pred_cmd": "pidgin",
    "confidence": 0.42857142857142855,
    "action_digit": 2,
    "pred_aud_str": "peegin",
    "match_aud_str": "pihdzihn"
  },
  {
    "status": "success",
    "message": "service files successfully cleared"
  },
  {

In [23]:
!wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_quartznet15x5/versions/1.0.0rc1/zip -O stt_en_quartznet15x5_1.0.0rc1.zip

--2021-06-22 07:40:44--  https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_quartznet15x5/versions/1.0.0rc1/zip
Resolving api.ngc.nvidia.com (api.ngc.nvidia.com)... 13.56.52.101, 13.56.70.20
Connecting to api.ngc.nvidia.com (api.ngc.nvidia.com)|13.56.52.101|:443... connected.
HTTP request sent, awaiting response... 302 
Location: https://prod-model-registry-ngc-bucket.s3.us-west-2.amazonaws.com/org/nvidia/team/nemo/models/stt_en_quartznet15x5/versions/1.0.0rc1/files.zip?response-content-disposition=attachment%3B%20filename%3D%22files.zip%22&response-content-type=application%2Fzip&X-Amz-Security-Token=IQoJb3JpZ2luX2VjEAcaCXVzLXdlc3QtMSJIMEYCIQD9oeomL2fuXVWSBdohuCtCEHuLQr2xQvf%2B7vtdEbiIUwIhAPbtKfQs1VKRFhlayMX8ahNLbScq%2B3LiYMt6JuWehpB%2FKoMECND%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEQAxoMNzg5MzYzMTM1MDI3IgwEx8PMWlZEvJniLJQq1wMGo0L88SBXP3PjLeNNpgk7WccuKcnOtcvDr3UlXu%2BWouw0Ev6L7SZsMXLhsy7uYMeuK2jojdPAQmFaAVslGM5%2BolEZZbyiSkeg74xGEwt0E2anEefpAFKbBXVJ9ugSWO0HQ1hhdv7fVJA4Fo%2BkmUIiNQZfAgfPZXf

In [25]:
!unzip stt_en_quartznet15x5_1.0.0rc1.zip

Archive:  stt_en_quartznet15x5_1.0.0rc1.zip
  inflating: stt_en_quartznet15x5.nemo  


In [22]:
!wget --content-disposition https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_quartznet15x5/versions/1.0.0b3/zip -O stt_en_quartznet15x5_1.0.0b3.zip

--2021-06-22 07:37:39--  https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_quartznet15x5/versions/1.0.0b3/zip
Resolving api.ngc.nvidia.com (api.ngc.nvidia.com)... 13.56.70.20, 13.56.52.101
Connecting to api.ngc.nvidia.com (api.ngc.nvidia.com)|13.56.70.20|:443... connected.
HTTP request sent, awaiting response... 404 
2021-06-22 07:37:39 ERROR 404: (no description).

