# Live Colab Example


## Dependencies and Imports

In [None]:
#@title Install dependencies

!pip install -q omegaconf
!pip install -q torchaudio
!pip install -q soundfile
!pip install -q pydub

import os
from os.path import exists

if not exists('silero-models'):
  !git clone -q --depth 1 https://github.com/snakers4/silero-models

%cd silero-models

# silero imports
import torch
import random
from glob import glob
from omegaconf import OmegaConf
from utils import (init_jit_model, 
                   split_into_batches,
                   read_batch,
                   prepare_model_input)
from colab_utils import (record_audio,
                         audio_bytes_to_np,
                         upload_audio)

device = torch.device('cpu')   # you can use any pytorch device
models = OmegaConf.load('models.yml')

# imports for uploading/recording
import numpy as np
import ipywidgets as widgets
from scipy.io import wavfile
from IPython.display import Audio, display, clear_output


# wav to text method
def wav_to_text(f='test.wav'):
  batch = read_batch([f])
  input = prepare_model_input(batch, device=device)
  output = model(input)
  return decoder(output[0].cpu())

## Transcribe

In [None]:
#@markdown { run: "auto" }

language = "English" #@param ["English", "German", "Spanish"]

print(language)
if language == 'German':
  model, decoder = init_jit_model(models.stt_models.de.latest.jit, device=device)
elif language == "Spanish":
  model, decoder = init_jit_model(models.stt_models.es.latest.jit, device=device)
else:
  model, decoder = init_jit_model(models.stt_models.en.latest.jit, device=device)

In [None]:
#@markdown Either record audio from microphone or upload audio from file (.mp3 or .wav) { run: "auto" }

record_or_upload = "Record" #@param ["Record", "Upload (.mp3 or .wav)"]
record_seconds =   4#@param {type:"number", min:1, max:10, step:1}
sample_rate = 16000

def _recognize(audio):
  display(Audio(audio, rate=sample_rate, autoplay=True))
  wavfile.write('test.wav', sample_rate, (32767*audio).numpy().astype(np.int16))
  transcription = wav_to_text()
  print('\n\nTRANSCRIPTION:\n')
  print(transcription)

def _record_audio(b):
  clear_output()
  audio = record_audio(record_seconds)
  _recognize(audio)

def _upload_audio(b):
  clear_output()
  audio = upload_audio()
  _recognize(audio)

if record_or_upload == "Record":
  button = widgets.Button(description="Record Speech")
  button.on_click(_record_audio)
  display(button)
else:
  _upload_audio("")

# PyTorch Example


In [None]:
#@title Install Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio
!pip install -q omegaconf
!pip install -q soundfile

import os
from os.path import exists

if not exists('silero-models'):
  !git clone -q --depth 1 https://github.com/snakers4/silero-models

%cd silero-models

import torch
import random
from glob import glob
from omegaconf import OmegaConf
from utils import (init_jit_model, 
                   split_into_batches,
                   read_batch,
                   prepare_model_input)
from IPython.display import display, Audio

In [None]:
#@title Random English Validation Dataset (optional)

if not exists('scottish_english_female'):
  !wget http://www.openslr.org/resources/83/scottish_english_female.zip
  !unzip -qq scottish_english_female.zip -d scottish_english_female

In [None]:
#@title Random Spanish Validation Dataset (optional)

if not exists('es_pr_female'):
  !wget http://www.openslr.org/resources/74/es_pr_female.zip
  !unzip -qq es_pr_female.zip -d es_pr_female

## Example cells

In [None]:
models = OmegaConf.load('models.yml')  # all available models are listed in the yml file
print(list(models.stt_models.keys()),
      list(models.stt_models.en.keys()),
      list(models.stt_models.en.latest.keys()),
      models.stt_models.en.latest.jit)
device = torch.device('cpu')   # you can use any pytorch device
model, decoder = init_jit_model(models.stt_models.en.latest.jit, device=device)

In [None]:
device = torch.device('cpu')   # you can use any pytorch device
model, decoder = init_jit_model(models.stt_models.en.latest.jit, device=device)

In [None]:
# test_files = glob('path/to/your/file/*.opus')
test_files = glob('scottish_english_female/*.wav')  # replace with your data
batches = split_into_batches(test_files, batch_size=10)

In [None]:
# transcribe a set of files
input = prepare_model_input(read_batch(random.sample(batches, k=1)[0]),
                            device=device)
output = model(input)
for example in output:
    print(decoder(example.cpu()))

In [None]:
# listen to one file
batch = read_batch(random.sample(batches, k=1)[0])
input = prepare_model_input(batch,
                            device=device)
output = model(input)

for i, example in enumerate(output):
    print(decoder(example.cpu()))
    display(Audio(batch[i], rate=16000))  # audio was resampled to 16kHz
    break

# ONNX example

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio
!pip install -q omegaconf
!pip install -q soundfile
!pip install -q onnx
!pip install -q onnxruntime

import os
from os.path import exists

if not exists('silero-models'):
  !git clone -q --depth 1 https://github.com/snakers4/silero-models

%cd silero-models

import json
import onnx
import torch
import random
import tempfile
import onnxruntime
from glob import glob
from omegaconf import OmegaConf
from utils import (init_jit_model, Decoder, read_batch,
                   split_into_batches, prepare_model_input)

## Example Cells

In [None]:
models = OmegaConf.load('models.yml')  # all available models are listed in the yml file
print(list(models.stt_models.en.latest))  # see which models are available)

In [None]:
with tempfile.NamedTemporaryFile('wb', suffix='.json') as f:
    torch.hub.download_url_to_file(models.stt_models.en.latest.labels,
                               f.name,
                               progress=True)
    with open(f.name) as f:
        labels = json.load(f)
        decoder = Decoder(labels)

with tempfile.NamedTemporaryFile('wb', suffix='.model') as f:
    torch.hub.download_url_to_file(models.stt_models.en.latest.onnx,
                                   f.name,
                                   progress=True)
    onnx_model = onnx.load(f.name)
    onnx.checker.check_model(onnx_model)
    ort_session = onnxruntime.InferenceSession(f.name)

In [None]:
# note that for now ONNX supports only batchless models, i.e. just samples
# as it is mostly intended for porting the network elsewhere

# test_files = glob('path/to/your/file/*.opus')
test_files = glob('scottish_english_female/*.wav')  # replace with your data
batches = split_into_batches(test_files, batch_size=1)

In [None]:
input = prepare_model_input(
    read_batch(
        random.sample(batches, k=1)[0]
    )
).detach().cpu().numpy()[0]

ort_inputs = {'input': input}
ort_outs = ort_session.run(None, ort_inputs)
decoded = decoder(torch.Tensor(ort_outs[0]))
print(decoded)