In [1]:
import torch
import pandas as pd
from utils import vad
from pathlib import Path
from time import process_time

In [2]:
audiofiles = Path("../data").glob("*.wav")

In [3]:
vad_model, vad_utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad')
vad_model = vad_model

(get_speech_timestamps,
 save_audio,
 read_audio,
 VADIterator,
 collect_chunks) = vad_utils

Using cache found in /home/pers/.cache/torch/hub/snakers4_silero-vad_master


In [4]:
def test_vad(files, min_silence):
    dictlist = []
    for f in files:
        try:
            print(f"Processing {f.name}")
            mydict = {}
            t1_start = process_time()
            wav = read_audio(f, sampling_rate=16000)
            vad_ts = get_speech_timestamps(wav, vad_model, sampling_rate=16000, speech_pad_ms=1000, min_silence_duration_ms = min_silence, return_seconds=True)
            orig_len = len(vad_ts)
            vad_ts = vad.resample(vad_ts)
            t1_stop = process_time()
            reduced_len = len(vad_ts)
            df = pd.DataFrame(vad_ts)
            df.loc[:, "duration"] = df.end - df.start
            mydict["file"] = f.name
            mydict["Minimal silence"] = min_silence
            mydict["Original samle length"] = orig_len
            mydict["Reduced sample length"] = reduced_len
            mydict["smallest segment"] = df.duration.min()
            mydict["largest segment"] = df.duration.max()
            mydict["processing_time"] = t1_stop-t1_start
            dictlist.append(mydict)
        except TypeError:
            pass
    return pd.DataFrame(dictlist)

In [6]:
ten_ms = test_vad(audiofiles, 10)
print(ten_ms)

Processing Stortinget-20160602-155510.wav
Processing Stortinget-20160606-095525.wav
Error: max_len (30) is smaller than one of the segments ({'start': 19543.9, 'end': 19586.6})!
Added {'start': 19543.9, 'end': 19565.25}
Added {'start': 19565.25, 'end': 19586.6}
Processing Stortinget-20160603-085513.wav
Error: max_len (30) is smaller than one of the segments ({'start': 1310.2, 'end': 1340.8})!
Added {'start': 1310.2, 'end': 1325.5}
Added {'start': 1325.5, 'end': 1340.8}
Processing Stortinget-20160602-095510.wav
Error: max_len (30) is smaller than one of the segments ({'start': 1728.8, 'end': 1764.8})!
Added {'start': 1728.8, 'end': 1746.8}
Added {'start': 1746.8, 'end': 1764.8}
Error: max_len (30) is smaller than one of the segments ({'start': 1894.8, 'end': 1929.1})!
Added {'start': 1894.8, 'end': 1911.9499999999998}
Added {'start': 1911.9499999999998, 'end': 1929.1}
Error: max_len (30) is smaller than one of the segments ({'start': 11195.6, 'end': 11225.7})!
Added {'start': 11195.6, '

In [9]:
print(ten_ms["processing_time"].sum() / ten_ms["Original samle length"].sum())

0.36075400427883564

In [5]:
twenty_ms = test_vad(audiofiles, 20)
print(twenty_ms)
print(twenty_ms["processing_time"].sum() / twenty_ms["Original samle length"].sum())

Processing Stortinget-20160602-155510.wav
Processing Stortinget-20160606-095525.wav
Error: max_len (30) is smaller than one of the segments ({'start': 19543.9, 'end': 19586.6})!
Added {'start': 19543.9, 'end': 19565.25}
Added {'start': 19565.25, 'end': 19586.6}
Processing Stortinget-20160603-085513.wav
Error: max_len (30) is smaller than one of the segments ({'start': 1310.2, 'end': 1340.8})!
Added {'start': 1310.2, 'end': 1325.5}
Added {'start': 1325.5, 'end': 1340.8}
Processing Stortinget-20160602-095510.wav
Error: max_len (30) is smaller than one of the segments ({'start': 1728.8, 'end': 1764.8})!
Added {'start': 1728.8, 'end': 1746.8}
Added {'start': 1746.8, 'end': 1764.8}
Error: max_len (30) is smaller than one of the segments ({'start': 1894.8, 'end': 1929.1})!
Added {'start': 1894.8, 'end': 1911.9499999999998}
Added {'start': 1911.9499999999998, 'end': 1929.1}
Error: max_len (30) is smaller than one of the segments ({'start': 11195.6, 'end': 11225.7})!
Added {'start': 11195.6, '

In [None]:
thirty_ms = test_vad(audiofiles, 30)
print(thirty_ms)
print(thirty_ms["processing_time"].sum() / thirty_ms["Original samle length"].sum())

In [None]:
fourty_ms = test_vad(audiofiles, 40)
print(fourty_ms)
print(fourty_ms["processing_time"].sum() / fourty_ms["Original samle length"].sum())

In [None]:
fifty_ms = test_vad(audiofiles, 50)
print(fifty_ms)
print(fifty_ms["processing_time"].sum() / fifty_ms["Original samle length"].sum())

In [None]:
sixty_ms = test_vad(audiofiles, 60)
print(sixty_ms)
print(sixty_ms["processing_time"].sum() / sixty_ms["Original samle length"].sum())

In [None]:
seventy_ms = test_vad(audiofiles, 70)
print(seventy_ms)
print(seventy_ms["processing_time"].sum() / seventy_ms["Original samle length"].sum())

In [None]:
eighty_ms = test_vad(audiofiles, 80)
print(eighty_ms)
print(eighty_ms["processing_time"].sum() / eighty_ms["Original samle length"].sum())

In [None]:
ninety_ms = test_vad(audiofiles, 90)
print(ninety_ms)
print(ninety_ms["processing_time"].sum() / ninety_ms["Original samle length"].sum())

In [None]:
hundred_ms = test_vad(audiofiles, 100)
print(hundred_ms)
print(hundred_ms["processing_time"].sum() / hundred_ms["Original samle length"].sum())