In [1]:
# %matplotlib
# !git reset master --hard && git pull

In [2]:
# !wget http://www.openslr.org/resources/12/dev-clean.tar.gz
# !tar -zxf dev-clean.tar.gz

In [3]:
# !wget https://raw.githubusercontent.com/pyannote/pyannote-audio/develop/tutorials/data_preparation/download_ami.sh
# !mkdir ami
# !bash download_ami.sh ami

In [4]:
# !wget https://raw.githubusercontent.com/pyannote/pyannote-audio/develop/tutorials/data_preparation/AMI/MixHeadset.development.rttm
# !wget https://raw.githubusercontent.com/pyannote/pyannote-audio/develop/tutorials/data_preparation/AMI/MixHeadset.test.rttm
# !wget https://raw.githubusercontent.com/pyannote/pyannote-audio/develop/tutorials/data_preparation/AMI/MixHeadset.train.rttm

In [5]:
import malaya_speech.train as train
import numpy as np
import malaya_speech
from tqdm import tqdm
import random

np.seterr(divide='raise', invalid='raise')

{'divide': 'warn', 'over': 'warn', 'under': 'ignore', 'invalid': 'warn'}

In [6]:
import librosa

def random_stretch(samples, low = 0.5, high = 1.3):
    input_length = len(samples)
    stretching = samples.copy()
    random_stretch = np.random.uniform(low = low, high = high)
    stretching = librosa.effects.time_stretch(
        stretching.astype('float'), random_stretch
    )
    return stretching

def random_pitch(samples, low = 0.5, high = 1.0):
    y_pitch_speed = samples.copy()
    length_change = np.random.uniform(low = low, high = high)
    speed_fac = 1.0 / length_change
    tmp = np.interp(
        np.arange(0, len(y_pitch_speed), speed_fac),
        np.arange(0, len(y_pitch_speed)),
        y_pitch_speed,
    )
    minlen = min(y_pitch_speed.shape[0], tmp.shape[0])
    y_pitch_speed *= 0
    y_pitch_speed[:minlen] = tmp[:minlen]
    return y_pitch_speed

In [7]:
import os
from glob import glob

ami = glob('ami/amicorpus/*/*/*.wav')
ami = {os.path.split(f)[1].replace('.wav', ''): f for f in ami}
rttm = glob('*.rttm')
len(ami), len(rttm)

(171, 3)

In [8]:
import pandas as pd
import random
from tqdm import tqdm

maxlen = 0.3
selected_frames = [30, 90]
functions = [random_pitch, random_stretch]

In [9]:
files = glob('../speech-bahasa/LibriSpeech/test-clean/*/*/*.flac')
files.extend(glob('LibriSpeech/dev-clean/*/*/*.flac'))
len(files)

5323

In [10]:
noises = glob('../noise/noise/*.wav')
noises = [malaya_speech.astype.int_to_float(malaya_speech.utils.read.wav(n)[0]) for n in tqdm(noises)]
len(noises)

100%|██████████| 182/182 [00:39<00:00,  4.57it/s]


182

In [12]:
def generator(sr = 16000):
    
    for file in tqdm(files):
        y, sr = malaya_speech.utils.read.flac(file)
        vad = malaya_speech.vad.webrtc(minimum_amplitude = int(np.quantile(np.abs(y), 0.3)))
        frames = malaya_speech.utils.generator.frames(y, 30, sr, False)
        frames = [(frame, vad(frame)) for frame in frames]
        grouped = malaya_speech.vad.group_vad(frames)

        x, y = [], []
        for frame, label in grouped:
            if frame.duration < maxlen:
                x.append(malaya_speech.astype.int_to_float(frame.array))
                y.append(label)
            for s in selected_frames:
                try:
                    frames = malaya_speech.utils.generator.frames(frame.array, s, sr, False)
                    x.extend([malaya_speech.astype.int_to_float(f.array) for f in frames])
                    y.extend([label] * len(frames))
                except:
                    pass

        for i in range(len(x)):
            for _ in range(random.randint(0, 3)):
                if y[i]:
                    factor = random.uniform(0.1, 0.5)
                else:
                    factor = random.uniform(0.4, 0.9)

                n = random.choice(noises)
                added_noise = malaya_speech.augmentation.waveform.add_noise(x[i],n, 
                                                                            factor = factor)
                x.append(added_noise)
                y.append(y[i])
                
            for _ in range(random.randint(0, 3)):
                for f in functions:
                    x.append(f(x[i]))
                    y.append(y[i])

        for i in range(len(x)):

            yield {
                'waveforms': x[i].tolist(),
                'targets': [int(y[i])],
            }
        
generator = generator()

In [13]:
import os
import tensorflow as tf

os.system('rm vad2/data/*')
DATA_DIR = os.path.expanduser('vad2/data')
tf.gfile.MakeDirs(DATA_DIR)

In [14]:
shards = [{'split': 'train', 'shards': 1000}, {'split': 'dev', 'shards': 1}]
train.prepare_dataset(generator, DATA_DIR, shards, prefix = 'vad')





  0%|          | 0/5323 [00:00<?, ?it/s]


INFO:tensorflow:Generating case 0.


  1%|          | 52/5323 [01:15<2:11:24,  1.50s/it]

INFO:tensorflow:Generating case 100000.


  2%|▏         | 97/5323 [02:36<2:44:43,  1.89s/it]

INFO:tensorflow:Generating case 200000.


  3%|▎         | 137/5323 [03:52<2:59:33,  2.08s/it]

INFO:tensorflow:Generating case 300000.


  4%|▎         | 193/5323 [05:13<1:42:28,  1.20s/it]

INFO:tensorflow:Generating case 400000.


  5%|▍         | 263/5323 [06:29<2:11:27,  1.56s/it]

INFO:tensorflow:Generating case 500000.


  6%|▋         | 334/5323 [07:50<1:14:58,  1.11it/s]

INFO:tensorflow:Generating case 600000.


  7%|▋         | 382/5323 [09:09<2:44:34,  2.00s/it]

INFO:tensorflow:Generating case 700000.


  8%|▊         | 449/5323 [10:27<59:08,  1.37it/s]  

INFO:tensorflow:Generating case 800000.


 10%|▉         | 516/5323 [11:48<2:59:38,  2.24s/it]

INFO:tensorflow:Generating case 900000.


 10%|█         | 557/5323 [13:10<1:16:53,  1.03it/s]

INFO:tensorflow:Generating case 1000000.


 11%|█▏        | 608/5323 [14:32<2:35:37,  1.98s/it]

INFO:tensorflow:Generating case 1100000.


 13%|█▎        | 686/5323 [15:54<1:20:01,  1.04s/it]

INFO:tensorflow:Generating case 1200000.


 14%|█▍        | 769/5323 [17:17<1:12:22,  1.05it/s]

INFO:tensorflow:Generating case 1300000.


 16%|█▌        | 832/5323 [18:40<1:00:54,  1.23it/s]

INFO:tensorflow:Generating case 1400000.


 16%|█▋        | 872/5323 [20:01<2:51:57,  2.32s/it]

INFO:tensorflow:Generating case 1500000.


 17%|█▋        | 922/5323 [21:23<2:17:08,  1.87s/it]

INFO:tensorflow:Generating case 1600000.


 19%|█▉        | 1003/5323 [22:45<39:04,  1.84it/s] 

INFO:tensorflow:Generating case 1700000.


 21%|██        | 1100/5323 [24:08<49:42,  1.42it/s]  

INFO:tensorflow:Generating case 1800000.


 22%|██▏       | 1179/5323 [25:25<1:53:48,  1.65s/it]

INFO:tensorflow:Generating case 1900000.


 23%|██▎       | 1231/5323 [26:46<2:38:55,  2.33s/it]

INFO:tensorflow:Generating case 2000000.


 24%|██▍       | 1280/5323 [28:03<40:50,  1.65it/s]  

INFO:tensorflow:Generating case 2100000.


 25%|██▌       | 1357/5323 [29:22<1:15:14,  1.14s/it]

INFO:tensorflow:Generating case 2200000.


 26%|██▋       | 1398/5323 [30:42<2:20:21,  2.15s/it]

INFO:tensorflow:Generating case 2300000.


 27%|██▋       | 1453/5323 [32:02<1:55:27,  1.79s/it]

INFO:tensorflow:Generating case 2400000.


 28%|██▊       | 1510/5323 [33:19<1:27:04,  1.37s/it]

INFO:tensorflow:Generating case 2500000.


 30%|██▉       | 1572/5323 [34:35<1:17:32,  1.24s/it]

INFO:tensorflow:Generating case 2600000.


 31%|███       | 1626/5323 [35:56<1:34:54,  1.54s/it]

INFO:tensorflow:Generating case 2700000.


 32%|███▏      | 1684/5323 [37:19<1:12:40,  1.20s/it]

INFO:tensorflow:Generating case 2800000.


 33%|███▎      | 1749/5323 [38:38<1:22:46,  1.39s/it]

INFO:tensorflow:Generating case 2900000.


 34%|███▍      | 1829/5323 [39:58<47:40,  1.22it/s]  

INFO:tensorflow:Generating case 3000000.


 36%|███▌      | 1914/5323 [41:17<1:21:00,  1.43s/it]

INFO:tensorflow:Generating case 3100000.


 37%|███▋      | 1977/5323 [42:32<1:24:00,  1.51s/it]

INFO:tensorflow:Generating case 3200000.


 38%|███▊      | 2049/5323 [43:53<1:14:14,  1.36s/it]

INFO:tensorflow:Generating case 3300000.


 39%|███▉      | 2097/5323 [45:14<1:18:19,  1.46s/it]

INFO:tensorflow:Generating case 3400000.


 40%|████      | 2139/5323 [46:32<1:54:43,  2.16s/it]

INFO:tensorflow:Generating case 3500000.


 41%|████      | 2177/5323 [47:50<1:40:36,  1.92s/it]

INFO:tensorflow:Generating case 3600000.


 42%|████▏     | 2212/5323 [49:07<2:12:13,  2.55s/it]

INFO:tensorflow:Generating case 3700000.


 43%|████▎     | 2273/5323 [50:30<48:06,  1.06it/s]  

INFO:tensorflow:Generating case 3800000.


 44%|████▍     | 2334/5323 [51:49<53:45,  1.08s/it]  

INFO:tensorflow:Generating case 3900000.


 46%|████▌     | 2425/5323 [53:12<1:48:35,  2.25s/it]

INFO:tensorflow:Generating case 4000000.


 46%|████▋     | 2474/5323 [54:31<43:51,  1.08it/s]  

INFO:tensorflow:Generating case 4100000.


 48%|████▊     | 2537/5323 [55:51<41:24,  1.12it/s]  

INFO:tensorflow:Generating case 4200000.


 49%|████▉     | 2595/5323 [57:11<57:41,  1.27s/it]  

INFO:tensorflow:Generating case 4300000.


 49%|████▉     | 2620/5323 [58:30<1:57:45,  2.61s/it]

INFO:tensorflow:Generating case 4400000.


 51%|█████     | 2707/5323 [59:49<50:54,  1.17s/it]  

INFO:tensorflow:Generating case 4500000.


 52%|█████▏    | 2781/5323 [1:01:07<37:44,  1.12it/s]  

INFO:tensorflow:Generating case 4600000.


 53%|█████▎    | 2831/5323 [1:02:26<1:42:47,  2.48s/it]

INFO:tensorflow:Generating case 4700000.


 54%|█████▍    | 2867/5323 [1:03:47<1:38:17,  2.40s/it]

INFO:tensorflow:Generating case 4800000.


 55%|█████▍    | 2927/5323 [1:05:07<28:10,  1.42it/s]  

INFO:tensorflow:Generating case 4900000.


 56%|█████▌    | 2986/5323 [1:06:27<1:15:29,  1.94s/it]

INFO:tensorflow:Generating case 5000000.


 57%|█████▋    | 3030/5323 [1:08:10<1:02:18,  1.63s/it]

INFO:tensorflow:Generating case 5100000.


 58%|█████▊    | 3083/5323 [1:09:41<46:37,  1.25s/it]  

INFO:tensorflow:Generating case 5200000.


 59%|█████▉    | 3150/5323 [1:11:02<1:00:28,  1.67s/it]

INFO:tensorflow:Generating case 5300000.


 60%|██████    | 3203/5323 [1:12:21<32:19,  1.09it/s]  

INFO:tensorflow:Generating case 5400000.


 62%|██████▏   | 3274/5323 [1:13:38<35:04,  1.03s/it]  

INFO:tensorflow:Generating case 5500000.


 62%|██████▏   | 3323/5323 [1:14:56<44:41,  1.34s/it]  

INFO:tensorflow:Generating case 5600000.


 63%|██████▎   | 3380/5323 [1:16:12<44:59,  1.39s/it]  

INFO:tensorflow:Generating case 5700000.


 65%|██████▍   | 3448/5323 [1:17:32<18:18,  1.71it/s]  

INFO:tensorflow:Generating case 5800000.


 66%|██████▌   | 3490/5323 [1:18:51<1:06:42,  2.18s/it]

INFO:tensorflow:Generating case 5900000.


 67%|██████▋   | 3558/5323 [1:20:08<19:26,  1.51it/s]  

INFO:tensorflow:Generating case 6000000.


 68%|██████▊   | 3618/5323 [1:21:22<50:29,  1.78s/it]  

INFO:tensorflow:Generating case 6100000.


 69%|██████▉   | 3675/5323 [1:22:44<21:28,  1.28it/s]  

INFO:tensorflow:Generating case 6200000.


 71%|███████   | 3757/5323 [1:24:01<19:46,  1.32it/s]

INFO:tensorflow:Generating case 6300000.


 72%|███████▏  | 3837/5323 [1:25:19<30:52,  1.25s/it]

INFO:tensorflow:Generating case 6400000.


 74%|███████▎  | 3921/5323 [1:26:37<16:42,  1.40it/s]

INFO:tensorflow:Generating case 6500000.


 75%|███████▍  | 3990/5323 [1:27:54<31:35,  1.42s/it]

INFO:tensorflow:Generating case 6600000.


 76%|███████▌  | 4042/5323 [1:29:12<26:21,  1.23s/it]

INFO:tensorflow:Generating case 6700000.


 77%|███████▋  | 4083/5323 [1:30:29<42:32,  2.06s/it]

INFO:tensorflow:Generating case 6800000.


 78%|███████▊  | 4129/5323 [1:31:46<23:21,  1.17s/it]

INFO:tensorflow:Generating case 6900000.


 79%|███████▉  | 4201/5323 [1:33:05<19:54,  1.06s/it]

INFO:tensorflow:Generating case 7000000.


 80%|████████  | 4275/5323 [1:34:21<29:01,  1.66s/it]

INFO:tensorflow:Generating case 7100000.


 82%|████████▏ | 4343/5323 [1:35:40<19:41,  1.21s/it]

INFO:tensorflow:Generating case 7200000.


 83%|████████▎ | 4409/5323 [1:36:58<24:08,  1.58s/it]

INFO:tensorflow:Generating case 7300000.


 84%|████████▍ | 4477/5323 [1:38:17<23:05,  1.64s/it]

INFO:tensorflow:Generating case 7400000.


 85%|████████▌ | 4543/5323 [1:39:33<18:52,  1.45s/it]

INFO:tensorflow:Generating case 7500000.


 87%|████████▋ | 4617/5323 [1:40:52<11:22,  1.03it/s]

INFO:tensorflow:Generating case 7600000.


 88%|████████▊ | 4667/5323 [1:42:10<08:28,  1.29it/s]

INFO:tensorflow:Generating case 7700000.


 89%|████████▉ | 4736/5323 [1:43:30<15:36,  1.60s/it]

INFO:tensorflow:Generating case 7800000.


 90%|████████▉ | 4770/5323 [1:44:47<23:27,  2.54s/it]

INFO:tensorflow:Generating case 7900000.


 91%|█████████ | 4831/5323 [1:46:05<09:56,  1.21s/it]

INFO:tensorflow:Generating case 8000000.


 92%|█████████▏| 4891/5323 [1:47:23<17:14,  2.40s/it]

INFO:tensorflow:Generating case 8100000.


 93%|█████████▎| 4947/5323 [1:48:40<10:26,  1.67s/it]

INFO:tensorflow:Generating case 8200000.


 94%|█████████▍| 4991/5323 [1:49:58<09:31,  1.72s/it]

INFO:tensorflow:Generating case 8300000.


 95%|█████████▌| 5062/5323 [1:51:17<10:24,  2.39s/it]

INFO:tensorflow:Generating case 8400000.


 97%|█████████▋| 5138/5323 [1:52:34<02:41,  1.15it/s]

INFO:tensorflow:Generating case 8500000.


 98%|█████████▊| 5196/5323 [1:53:53<02:38,  1.25s/it]

INFO:tensorflow:Generating case 8600000.


 99%|█████████▊| 5247/5323 [1:55:08<01:36,  1.27s/it]

INFO:tensorflow:Generating case 8700000.


100%|█████████▉| 5316/5323 [1:56:26<00:10,  1.43s/it]

INFO:tensorflow:Generating case 8800000.


100%|██████████| 5323/5323 [1:56:37<00:00,  1.31s/it]


INFO:tensorflow:Generated 8810401 Examples
INFO:tensorflow:Shuffling data...
Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`






INFO:tensorflow:Data shuffled.
