# Jit example

In [None]:
!pip install -q torchaudio
!pip install -q ipython # For jupyter audio display

In [None]:
# dependencies
import glob
import torch
torch.set_num_threads(1)
from IPython.display import Audio

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_vad')


(get_speech_ts,
 save_audio,
 read_audio,
 state_generator,
 single_audio_stream,
 collect_speeches) = utils

## Full audio

In [None]:
wav = read_audio('files/en.wav')

In [None]:
speech_timestamps = get_speech_ts(wav, model, num_steps=4) # get speech timestamps from full audio file

In [None]:
speech_timestamps

In [None]:
save_audio('only_speech.wav', collect_speeches(speech_timestamps, wav), 16000) # merge all speech chunks to one audio
Audio('only_speech.wav')

## Single audio stream

In [None]:
wav = 'files/en.wav'

In [None]:
for batch in single_audio_stream(model, wav):
    if batch:
        print(batch)

## Multiple audio stream

In [None]:
audios_for_stream = glob.glob('files/*.wav')
len(audios_for_stream) # total 4 audios

In [None]:
for batch in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream
    if batch:
        print(batch)

# Onnx example

In [None]:
!pip install -q ipython # For jupyter audio display
!pip install -q onnxruntime

In [None]:
# dependencies
import glob
import torch
from IPython.display import Audio
torch.set_num_threads(1)
import onnxruntime

from utils import (get_speech_ts, save_audio, read_audio, 
                   state_generator, single_audio_stream, collect_speeches)

def init_onnx_model(model_path: str):
    return onnxruntime.InferenceSession(model_path)

def validate_onnx(model, inputs):
    with torch.no_grad():
        ort_inputs = {'input': inputs.cpu().numpy()}
        outs = model.run(None, ort_inputs)
        outs = [torch.Tensor(x) for x in outs]
    return outs

## Full audio

In [None]:
model = init_onnx_model('files/model.onnx')
wav = read_audio('files/en.wav')

In [None]:
speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) # get speech timestamps from full audio file

In [None]:
speech_timestamps

In [None]:
save_audio('only_speech.wav', collect_speeches(speech_timestamps, wav), 16000) # merge all speech chunks to one audio
Audio('only_speech.wav')

## Single audio stream

In [None]:
model = init_onnx_model('files/model.onnx')
wav = 'files/en.wav'

In [None]:
for batch in single_audio_stream(model, wav, run_function=validate_onnx):
    if batch:
        print(batch)

## Multiple audio stream

In [None]:
model = init_onnx_model('files/model.onnx')

In [None]:
audios_for_stream = glob.glob('files/*.wav')
len(audios_for_stream) # total 4 audios

In [None]:
for batch in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream
    if batch:
        print(batch)