# PyTorch Example

## Install Dependencies

In [1]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
#!pip install -q torchaudio soundfile

import glob
import torch
torch.set_num_threads(1)

from IPython.display import Audio


model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_vad',
                              force_reload=True)

(get_speech_ts,
 save_audio,
 read_audio,
 state_generator,
 single_audio_stream,
 collect_speeches) = utils

Downloading: "https://github.com/snakers4/silero-vad/archive/master.zip" to /home/keras/.cache/torch/hub/master.zip


In [5]:
torch.hub.get_dir()

'/home/keras/.cache/torch/hub'

## Full audio

In [None]:
wav = read_audio('files/en.wav')
# get speech timestamps from full audio file
speech_timestamps = get_speech_ts(wav, model,
                                  num_steps=4)
print(speech_timestamps)

In [None]:
# merge all speech chunks to one audio
save_audio('only_speech.wav',
           collect_speeches(speech_timestamps, wav), 16000) 
Audio('only_speech.wav')

## Single Audio Stream

In [None]:
wav = 'files/en.wav'

for batch in single_audio_stream(model, wav):
    if batch:
        print(batch)

## Multiple Audio Streams

In [None]:
audios_for_stream = glob.glob('files/*.wav')
len(audios_for_stream) # total 4 audios

In [None]:
for batch in state_generator(model, audios_for_stream, audios_in_stream=2): # 2 audio stream
    if batch:
        print(batch)

# ONNX Example

## Install Dependencies

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio soundfile onnxruntime

import glob
import onnxruntime

from IPython.display import Audio

_, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_vad',
                              force_reload=True)

(get_speech_ts,
 save_audio,
 read_audio,
 state_generator,
 single_audio_stream,
 collect_speeches) = utils

 def init_onnx_model(model_path: str):
    return onnxruntime.InferenceSession(model_path)

def validate_onnx(model, inputs):
    with torch.no_grad():
        ort_inputs = {'input': inputs.cpu().numpy()}
        outs = model.run(None, ort_inputs)
        outs = [torch.Tensor(x) for x in outs]
    return outs

## Full audio

In [None]:
model = init_onnx_model('files/model.onnx')
wav = read_audio('files/en.wav')

# get speech timestamps from full audio file
speech_timestamps = get_speech_ts(wav, model, num_steps=4, run_function=validate_onnx) 
print(speech_timestamps)

In [None]:
# merge all speech chunks to one audio
save_audio('only_speech.wav', collect_speeches(speech_timestamps, wav), 16000)
Audio('only_speech.wav')

## Single audio stream

In [None]:
model = init_onnx_model('files/model.onnx')
wav = 'files/en.wav'

In [None]:
for batch in single_audio_stream(model, wav, run_function=validate_onnx):
    if batch:
        print(batch)

## Multiple audio stream

In [None]:
model = init_onnx_model('files/model.onnx')
audios_for_stream = glob.glob('files/*.wav')
print(len(audios_for_stream)) # total 4 audios

In [None]:
for batch in state_generator(model, audios_for_stream, audios_in_stream=2, run_function=validate_onnx): # 2 audio stream
    if batch:
        print(batch)