# PyTorch Example

This example requires:

- PyTorch 1.6+
- torchaudio 0.7+
- omegaconf (or any similar library to work with yaml files)

In [None]:
import torch
import random
from glob import glob
from omegaconf import OmegaConf
from utils import (init_jit_model, 
                   split_into_batches,
                   read_batch,
                   prepare_model_input)

In [None]:
models = OmegaConf.load('models.yml')  # all available models are listed in the yml file

In [None]:
list(models.stt_models.keys()), list(models.stt_models.en.keys()), list(models.stt_models.en.latest.keys()), models.stt_models.en.latest.jit

In [None]:
device = torch.device('cpu')   # you can use any pytorch device
model, decoder = init_jit_model(models.stt_models.en.latest.jit, device=device)

In [None]:
test_files = glob('path/to/your/file/*.opus')
batches = split_into_batches(test_files, batch_size=10)

In [None]:
input = prepare_model_input(read_batch(random.sample(batches, k=1)[0]),
                            device=device)
output = model(input)
for example in output:
    print(decoder(example.cpu()))

# ONNX example

This example requires:

- PyTorch 1.6+
- torchaudio 0.7+
- omegaconf (or any similar library to work with yaml files)
- onnx
- onnxruntime

Differences:

- ONNX runtime is used for inference;
- I am reusing PyTorch utils, but you may use your own;

In [None]:
import json
import onnx
import torch
import random
import tempfile
import onnxruntime
from glob import glob
from omegaconf import OmegaConf
from utils import (init_jit_model, Decoder, read_batch,
                   split_into_batches, prepare_model_input)

In [None]:
models = OmegaConf.load('models.yml')  # all available models are listed in the yml file

In [None]:
list(models.stt_models.en.latest)  # see which models are available

In [None]:
with tempfile.NamedTemporaryFile('wb', suffix='.json') as f:
    torch.hub.download_url_to_file(models.stt_models.en.latest.labels,
                               f.name,
                               progress=True)
    with open(f.name) as f:
        labels = json.load(f)
        decoder = Decoder(labels)


with tempfile.NamedTemporaryFile('wb', suffix='.model') as f:
    torch.hub.download_url_to_file(models.stt_models.en.latest.onnx,
                                   f.name,
                                   progress=True)
    onnx_model = onnx.load(f.name)
    onnx.checker.check_model(onnx_model)
    ort_session = onnxruntime.InferenceSession(f.name)

In [None]:
# note that for now ONNX supports only batchless models, i.e. just samples
# as it is mostly intended for porting the network elsewhere

test_files = glob('path/to/your/file/*.opus')
batches = split_into_batches(test_files, batch_size=1)

In [None]:
input = prepare_model_input(
    read_batch(
        random.sample(batches, k=1)[0]
    )
).detach().cpu().numpy()[0]

In [None]:
ort_inputs = {'input': input}
ort_outs = ort_session.run(None, ort_inputs)
decoded = decoder(torch.Tensor(ort_outs[0]))
print(decoded)