In [1]:
import malaya_speech
import torch
from malaya_speech.utils import torch_featurization
import numpy as np
import json
from datasets import Audio

`pyaudio` is not available, `malaya_speech.streaming.stream` is not able to use.


In [2]:
y, _ = malaya_speech.load('speech/singlish/singlish0.wav')

In [3]:
class MalayaDataset(torch.utils.data.Dataset):

    SR = 16000

    def __init__(self, file):
        with open(file) as fopen:
            self.data = json.load(fopen)
        self.audio = Audio(sampling_rate=self.SR)

    def __getitem__(self, idx):
        x = self.data['X'][idx]
        y = self.data['Y'][idx]

        r = self.audio.decode_example(self.audio.encode_example(x))
        return r['array']

    def __len__(self):
        return len(self.data['X'])

In [4]:
train_dataset = MalayaDataset('/home/husein/speech-bahasa/malay-asr-train-shuffled.json')

In [5]:
dataloader = torch.utils.data.DataLoader(train_dataset, num_workers=4)

In [6]:
from tqdm import tqdm

def generate_statistics(samples):
    E_x = 0
    E_x_2 = 0
    N = 0

    for idx, sample in tqdm(enumerate(samples)):
        mel_spec = torch_featurization.melspectrogram(sample[0].squeeze())
        scaled_mel_spec = torch_featurization.piecewise_linear_log(mel_spec)
        sum = scaled_mel_spec.sum(0)
        sq_sum = scaled_mel_spec.pow(2).sum(0)
        M = scaled_mel_spec.size(0)

        E_x = E_x * (N / (N + M)) + sum / (N + M)
        E_x_2 = E_x_2 * (N / (N + M)) + sq_sum / (N + M)
        N += M

    return E_x, (E_x_2 - E_x**2) ** 0.5

In [7]:
mean, stddev = generate_statistics(iter(dataloader))

79562it [15:30, 17.56it/s] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

470801it [1:38:27, 111.28it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

873675it [3:00:37, 101.93it/s]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limi

In [8]:
json_str = json.dumps({"mean": mean.tolist(), "invstddev": (1 / stddev).tolist()}, indent=2)
with open('malay-stats.json', "w") as f:
    f.write(json_str)