# PyTorch Examples

## VAD

### Install Dependencies

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio

SAMPLE_RATE = 16000

import glob
import torch
torch.set_num_threads(1)

from IPython.display import Audio
from pprint import pprint

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_vad',
                              force_reload=True)

(get_speech_timestamps,
 save_audio,
 read_audio,
 VADIterator,
 collect_chunks) = utils

files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'

### Full Audio

In [None]:
wav = read_audio(f'{files_dir}/en.wav', sampling_rate=SAMPLE_RATE)

In [None]:
wav

**Speech timestapms from full audio**

In [None]:
wav = read_audio(f'{files_dir}/en.wav', sampling_rate=SAMPLE_RATE)
# get speech timestamps from full audio file
speech_timestamps = get_speech_timestamps(wav, model, sampling_rate=SAMPLE_RATE)
pprint(speech_timestamps)

In [None]:
# merge all speech chunks to one audio
save_audio('only_speech.wav',
           collect_chunks(speech_timestamps, wav), sampling_rate=16000) 
Audio('only_speech.wav')

### Stream imitation example

In [None]:
## using VADIterator class

vad_iterator = VADIterator(model)
wav = read_audio(f'{files_dir}/en.wav', sampling_rate=SAMPLE_RATE)

window_size_samples = 1536 # number of samples in a single audio chunk
for i in range(0, len(wav), window_size_samples):
    speech_dict = vad_iterator(wav[i: i+ window_size_samples], return_seconds=True)
    if speech_dict:
        print(speech_dict, end=' ')
vad_iterator.reset_states() # reset model states after each audio

In [None]:
## just probabilities

wav = read_audio(f'{files_dir}/en.wav', sampling_rate=SAMPLE_RATE)
speech_probs = []
window_size_samples = 1536
for i in range(0, len(wav), window_size_samples):
    speech_prob = model(wav[i: i+ window_size_samples], SAMPLE_RATE).item()
    speech_probs.append(speech_prob)

pprint(speech_probs[:100])

## Number detector

### Install Dependencies

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio soundfile

import glob
import torch
torch.set_num_threads(1)

from IPython.display import Audio
from pprint import pprint

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_number_detector',
                              force_reload=True)

(get_number_ts,
 save_audio,
 read_audio,
 collect_chunks,
 drop_chunks,
 _) = utils

files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'

### Full audio

In [None]:
wav = read_audio(f'{files_dir}/en_num.wav')
# get number timestamps from full audio file
number_timestamps = get_number_ts(wav, model)
pprint(number_timestamps)

In [None]:
sample_rate = 16000
# convert ms in timestamps to samples
for timestamp in number_timestamps:
    timestamp['start'] = int(timestamp['start'] * sample_rate / 1000)
    timestamp['end'] = int(timestamp['end'] * sample_rate / 1000)

In [None]:
# merge all number chunks to one audio
save_audio('only_numbers.wav',
           collect_chunks(number_timestamps, wav), sample_rate) 
Audio('only_numbers.wav')

In [None]:
# drop all number chunks from audio
save_audio('no_numbers.wav',
           drop_chunks(number_timestamps, wav), sample_rate) 
Audio('no_numbers.wav')

## Language detector

### Install Dependencies

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio soundfile

import glob
import torch
torch.set_num_threads(1)

from IPython.display import Audio
from pprint import pprint

model, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_lang_detector',
                              force_reload=True)

(get_language,
 read_audio,
 _) = utils

files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'

### Full audio

In [None]:
wav = read_audio(f'{files_dir}/en.wav')
lang = get_language(wav, model)
print(lang)

# ONNX Example

## VAD

**TO BE DONE**

## Number detector

### Install Dependencies

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio soundfile onnxruntime

import glob
import torch
import onnxruntime
from pprint import pprint

from IPython.display import Audio

_, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_number_detector',
                              force_reload=True)

(get_number_ts,
 save_audio,
 read_audio,
 collect_chunks,
 drop_chunks,
 donwload_onnx_model) = utils

files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'
donwload_onnx_model('number_detector')

def init_onnx_model(model_path: str):
    return onnxruntime.InferenceSession(model_path)

def validate_onnx(model, inputs):
    with torch.no_grad():
        ort_inputs = {'input': inputs.cpu().numpy()}
        outs = model.run(None, ort_inputs)
        outs = [torch.Tensor(x) for x in outs]
    return outs

### Full Audio

In [None]:
model = init_onnx_model('number_detector.onnx')
wav = read_audio(f'{files_dir}/en_num.wav')

# get number timestamps from full audio file
number_timestamps = get_number_ts(wav, model, run_function=validate_onnx)
pprint(number_timestamps)

In [None]:
sample_rate = 16000
# convert ms in timestamps to samples
for timestamp in number_timestamps:
    timestamp['start'] = int(timestamp['start'] * sample_rate / 1000)
    timestamp['end'] = int(timestamp['end'] * sample_rate / 1000)

In [None]:
# merge all number chunks to one audio
save_audio('only_numbers.wav',
           collect_chunks(number_timestamps, wav), 16000) 
Audio('only_numbers.wav')

In [None]:
# drop all number chunks from audio
save_audio('no_numbers.wav',
           drop_chunks(number_timestamps, wav), 16000) 
Audio('no_numbers.wav')

## Language detector

### Install Dependencies

In [None]:
#@title Install and Import Dependencies

# this assumes that you have a relevant version of PyTorch installed
!pip install -q torchaudio soundfile onnxruntime

import glob
import torch
import onnxruntime
from pprint import pprint

from IPython.display import Audio

_, utils = torch.hub.load(repo_or_dir='snakers4/silero-vad',
                              model='silero_lang_detector',
                              force_reload=True)

(get_language,
 read_audio,
 donwload_onnx_model) = utils

donwload_onnx_model('number_detector')
files_dir = torch.hub.get_dir() + '/snakers4_silero-vad_master/files'

def init_onnx_model(model_path: str):
    return onnxruntime.InferenceSession(model_path)

def validate_onnx(model, inputs):
    with torch.no_grad():
        ort_inputs = {'input': inputs.cpu().numpy()}
        outs = model.run(None, ort_inputs)
        outs = [torch.Tensor(x) for x in outs]
    return outs

### Full Audio

In [None]:
model = init_onnx_model('number_detector.onnx')
wav = read_audio(f'{files_dir}/en.wav')

lang = get_language(wav, model, run_function=validate_onnx)
print(lang)