In [1]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'mesolitica-tpu.json'

In [2]:
from glob import glob
import tensorflow as tf
from tqdm import tqdm
import malaya_speech
from malaya_speech.utils import subword
import numpy as np
import mp
from google.cloud import storage

  'Cannot import beam_search_ops from Tensorflow Addons, `deep_model` for stemmer will not available to use, make sure Tensorflow Addons version >= 0.12.0'


In [3]:
singlish = glob('part4-same-room/wav/*.wav')
len(singlish)

1148098

In [4]:
subwords = subword.load('transducer-singlish.subword')

In [5]:
import unicodedata
import re
import itertools

vocabs = [" ", "a", "e", "n", "i", "t", "o", "u", "s", "k", "r", "l", "h", "d", "m", "g", "y", "b", "p", "w", "c", "f", "j", "v", "z", "0", "1", "x", "2", "q", "5", "3", "4", "6", "9", "8", "7"]

def preprocessing_text(string):
    
    string = unicodedata.normalize('NFC', string.lower())
    string = string.replace('\'', '')
    string = ''.join([c if c in vocabs else ' ' for c in string])
    string = re.sub(r'[ ]+', ' ', string).strip()
    string = (
        ''.join(''.join(s)[:2] for _, s in itertools.groupby(string))
    )
    return string

In [6]:
def get_after_mandarin(word):
    if '<mandarin>' in word:
        w = word.split('>')[1].split(':')[1]
        return w.split('</')[0]
    else:
        return word
    
def get_before_mandarin(word):
    if '</mandarin>' in word:
        return word.split('</')[0]
    else:
        return word

def replace_paralinguistic(string, replaces = ['(ppb)', '(ppc)', '(ppl)', '(ppo)', '<UNK>', '<MANDARIN>']):
    for r in replaces:
        string = string.replace(r, ' ')
    string = string.split()
    string = [get_after_mandarin(w) for w in string]
    string = [get_before_mandarin(w) for w in string]
    string = [w for w in string if w[0] not in '<[(' and w[-1] not in '>])']
    return ' '.join(string)

In [7]:
def loop(files):
    files, index = files
    results = []
    for i in tqdm(files):
        try:
            p = i.replace('/wav','/text')
            with open(f'{p}.txt') as fopen:
                text = fopen.read()
            if len(text) < 2:
                continue
            if text[0] == '<' and text[-1] == '>':
                continue
            text = replace_paralinguistic(text)
            text = preprocessing_text(text)
            if len(text):
                results.append((i, text))
        except Exception as e:
            pass
    return results

In [8]:
loop((singlish[:10], 0))

100%|██████████| 10/10 [00:00<00:00, 1033.64it/s]


[('part4-same-room/wav/sur_0096_1192_phns_cs-chn-0-223.wav',
  'i can tell you you dont say first you let me guess so when youre upset right i do one of three things first'),
 ('part4-same-room/wav/sur_0525_2050_phns_cs-chn-0-1037.wav',
  'mei you ta shi zhe yang ben lai shi yi zheng pan de ta hui sa de')]

In [9]:
singlishs = mp.multiprocessing(singlish, loop, cores = 12)

100%|██████████| 95674/95674 [05:08<00:00, 310.35it/s] 
100%|██████████| 10/10 [00:00<00:00, 131.13it/s]1it/s]
100%|██████████| 95674/95674 [05:09<00:00, 309.06it/s] 
100%|██████████| 95674/95674 [05:09<00:00, 308.99it/s]
100%|██████████| 95674/95674 [05:10<00:00, 307.75it/s]]
100%|██████████| 95674/95674 [05:10<00:00, 307.67it/s] 
100%|██████████| 95674/95674 [05:11<00:00, 306.96it/s] 
100%|██████████| 95674/95674 [05:11<00:00, 307.51it/s] 
100%|██████████| 95674/95674 [05:11<00:00, 306.75it/s] 
100%|██████████| 95674/95674 [05:12<00:00, 306.59it/s] 
100%|██████████| 95674/95674 [05:12<00:00, 305.68it/s] 
100%|██████████| 95674/95674 [05:13<00:00, 305.56it/s] 
100%|██████████| 95674/95674 [05:13<00:00, 305.44it/s] 


In [12]:
len(singlishs)

377265

In [13]:
import six

def to_example(dictionary):
    """Helper: build tf.Example from (string -> int/float/str list) dictionary."""
    features = {}
    for (k, v) in six.iteritems(dictionary):
        if not v:
            raise ValueError('Empty generated field: %s' % str((k, v)))
        # Subtly in PY2 vs PY3, map is not scriptable in py3. As a result,
        # map objects will fail with TypeError, unless converted to a list.
        if six.PY3 and isinstance(v, map):
            v = list(v)
        if isinstance(v[0], six.integer_types) or np.issubdtype(
            type(v[0]), np.integer
        ):
            features[k] = tf.train.Feature(
                int64_list=tf.train.Int64List(value=v)
            )
        elif isinstance(v[0], float):
            features[k] = tf.train.Feature(
                float_list=tf.train.FloatList(value=v)
            )
        elif isinstance(v[0], six.string_types):
            if not six.PY2:  # Convert in python 3.
                v = [bytes(x, 'utf-8') for x in v]
            features[k] = tf.train.Feature(
                bytes_list=tf.train.BytesList(value=v)
            )
        elif isinstance(v[0], bytes):
            features[k] = tf.train.Feature(
                bytes_list=tf.train.BytesList(value=v)
            )
        else:
            raise ValueError(
                'Value for %s is not a recognized type; v: %s type: %s'
                % (k, str(v[0]), str(type(v[0])))
            )
    return tf.train.Example(features=tf.train.Features(feature=features))

In [14]:
sr = 16000
maxlen = 18
minlen_text = 1
global_count = 0

In [15]:
def loop(files):
    client = storage.Client()
    bucket = client.bucket('mesolitica-tpu-general')
    files, index = files
    output_file = f'{index}-{global_count}.tfrecord'
    writer = tf.io.TFRecordWriter(output_file)
    for s in tqdm(files):
        try:
            if len(s[1]) < minlen_text:
                continue
            y, _ = malaya_speech.load(s[0])
            if (len(y) / sr) > maxlen:
                continue
            t = subword.encode(subwords, s[1], add_blank=False)
            example = to_example({'waveforms': y.tolist(), 
                                  'targets': t, 
                                  'targets_length': [len(t)]})
            writer.write(example.SerializeToString())
        except Exception as e:
            print(e)
            pass
    writer.close()
    blob = bucket.blob(f'imda/part4-same-room/{output_file}')
    blob.upload_from_filename(output_file)
    os.system(f'rm {output_file}')

In [17]:
loop((singlishs[:10], 0))

100%|██████████| 10/10 [00:00<00:00, 32.13it/s]


In [18]:
batch_size = 25000
for i in range(0, len(singlishs), batch_size):
    batch = singlishs[i: i + batch_size]
    mp.multiprocessing(batch, loop, cores = 6, returned = False)
    global_count += 1

  3%|▎         | 115/4166 [00:09<06:44, 10.00it/s]

zero-size array to reduction operation maximum which has no identity


 74%|███████▍  | 3073/4166 [02:15<00:39, 27.51it/s]

zero-size array to reduction operation maximum which has no identity


 75%|███████▍  | 3115/4166 [02:15<00:51, 20.25it/s]

zero-size array to reduction operation maximum which has no identity


 83%|████████▎ | 3478/4166 [02:33<00:27, 25.28it/s]

zero-size array to reduction operation maximum which has no identity


 88%|████████▊ | 3649/4166 [02:40<00:19, 26.98it/s]

zero-size array to reduction operation maximum which has no identity

 87%|████████▋ | 3638/4166 [02:40<00:19, 27.62it/s]




100%|██████████| 4166/4166 [02:56<00:00, 23.55it/s]
100%|██████████| 4166/4166 [02:57<00:00, 23.46it/s]
100%|██████████| 4166/4166 [02:57<00:00, 23.46it/s]
100%|██████████| 4166/4166 [02:58<00:00, 23.40it/s]
100%|██████████| 4166/4166 [02:59<00:00, 23.25it/s]
100%|██████████| 4166/4166 [02:59<00:00, 23.23it/s]
100%|██████████| 4/4 [00:00<00:00, 37.49it/s]
 46%|████▌     | 1911/4166 [01:37<02:14, 16.71it/s]

zero-size array to reduction operation maximum which has no identity


 72%|███████▏  | 2988/4166 [02:34<00:55, 21.28it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [03:35<00:00, 19.33it/s]
100%|██████████| 4166/4166 [03:35<00:00, 19.32it/s]
100%|██████████| 4166/4166 [03:35<00:00, 19.31it/s]
100%|██████████| 4166/4166 [03:36<00:00, 19.27it/s]
100%|██████████| 4166/4166 [03:36<00:00, 19.23it/s]
100%|██████████| 4166/4166 [03:37<00:00, 19.19it/s]
100%|██████████| 4/4 [00:00<00:00, 47.33it/s]
  6%|▋         | 261/4166 [00:12<04:02, 16.14it/s]

zero-size array to reduction operation maximum which has no identity

  6%|▌         | 249/4166 [00:12<03:26, 18.95it/s]




  8%|▊         | 322/4166 [00:15<03:04, 20.89it/s]

zero-size array to reduction operation maximum which has no identity


 10%|█         | 421/4166 [00:21<03:03, 20.42it/s]

zero-size array to reduction operation maximum which has no identity

 10%|█         | 422/4166 [00:21<04:40, 13.34it/s]




 10%|█         | 424/4166 [00:21<03:18, 18.87it/s]

zero-size array to reduction operation maximum which has no identity


 40%|████      | 1685/4166 [01:29<01:10, 35.12it/s]

zero-size array to reduction operation maximum which has no identity


 83%|████████▎ | 3460/4166 [02:42<00:27, 25.80it/s]

zero-size array to reduction operation maximum which has no identity


 87%|████████▋ | 3624/4166 [02:50<00:17, 30.44it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [03:05<00:00, 22.48it/s]
 98%|█████████▊| 4072/4166 [03:07<00:03, 27.24it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [03:08<00:00, 22.13it/s]
100%|██████████| 4166/4166 [03:08<00:00, 22.09it/s]
100%|██████████| 4166/4166 [03:10<00:00, 21.92it/s]
100%|██████████| 4166/4166 [03:10<00:00, 21.89it/s]
100%|██████████| 4166/4166 [03:11<00:00, 21.79it/s]
100%|██████████| 4/4 [00:00<00:00, 26.46it/s]
 31%|███▏      | 1303/4166 [00:57<01:51, 25.67it/s]

zero-size array to reduction operation maximum which has no identity


 57%|█████▋    | 2369/4166 [01:39<01:23, 21.48it/s]

zero-size array to reduction operation maximum which has no identity


 91%|█████████ | 3772/4166 [02:35<00:16, 24.27it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:47<00:00, 24.93it/s]
100%|██████████| 4166/4166 [02:48<00:00, 24.71it/s]
100%|██████████| 4166/4166 [02:48<00:00, 24.67it/s]
100%|██████████| 4166/4166 [02:49<00:00, 24.63it/s]
100%|██████████| 4166/4166 [02:49<00:00, 24.54it/s]
100%|██████████| 4166/4166 [02:49<00:00, 24.54it/s]
100%|██████████| 4/4 [00:00<00:00, 85.10it/s]
  7%|▋         | 309/4166 [00:12<02:09, 29.88it/s]

zero-size array to reduction operation maximum which has no identity

  8%|▊         | 315/4166 [00:12<02:49, 22.72it/s]




  9%|▉         | 394/4166 [00:16<02:43, 23.12it/s]

zero-size array to reduction operation maximum which has no identity

 10%|▉         | 408/4166 [00:16<01:52, 33.30it/s]




 15%|█▍        | 614/4166 [00:24<02:57, 19.98it/s]

zero-size array to reduction operation maximum which has no identity


 46%|████▌     | 1923/4166 [01:15<01:21, 27.68it/s]

zero-size array to reduction operation maximum which has no identity


 61%|██████    | 2541/4166 [01:40<00:55, 29.21it/s]

zero-size array to reduction operation maximum which has no identity


 71%|███████   | 2947/4166 [02:00<00:54, 22.34it/s]

zero-size array to reduction operation maximum which has no identity


 89%|████████▉ | 3705/4166 [02:31<00:18, 24.40it/s]

zero-size array to reduction operation maximum which has no identity


 95%|█████████▌| 3965/4166 [02:36<00:08, 24.25it/s]

zero-size array to reduction operation maximum which has no identity

 94%|█████████▍| 3933/4166 [02:36<00:09, 25.77it/s]




100%|██████████| 4166/4166 [02:42<00:00, 25.70it/s]
100%|██████████| 4166/4166 [02:45<00:00, 25.16it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.02it/s]
100%|██████████| 4166/4166 [02:47<00:00, 24.85it/s]
100%|██████████| 4166/4166 [02:48<00:00, 24.77it/s]
100%|██████████| 4166/4166 [02:51<00:00, 24.25it/s]
100%|██████████| 4/4 [00:00<00:00, 53.11it/s]
  5%|▍         | 194/4166 [00:07<02:21, 28.16it/s]

zero-size array to reduction operation maximum which has no identity


  7%|▋         | 300/4166 [00:12<04:01, 16.01it/s]

zero-size array to reduction operation maximum which has no identity

  8%|▊         | 314/4166 [00:12<02:33, 25.07it/s]




  9%|▊         | 362/4166 [00:15<03:03, 20.77it/s]

zero-size array to reduction operation maximum which has no identity


 10%|█         | 418/4166 [00:18<02:45, 22.71it/s]

zero-size array to reduction operation maximum which has no identity


 20%|█▉        | 816/4166 [00:31<01:45, 31.73it/s]

zero-size array to reduction operation maximum which has no identity


 20%|█▉        | 831/4166 [00:33<02:06, 26.41it/s]

zero-size array to reduction operation maximum which has no identity

 20%|█▉        | 825/4166 [00:33<01:37, 34.17it/s]




 36%|███▌      | 1494/4166 [01:00<02:06, 21.19it/s]

zero-size array to reduction operation maximum which has no identity

 36%|███▋      | 1511/4166 [01:00<01:36, 27.49it/s]




 39%|███▊      | 1609/4166 [01:04<01:09, 36.73it/s]

zero-size array to reduction operation maximum which has no identity


 59%|█████▉    | 2469/4166 [01:40<00:52, 32.33it/s]

zero-size array to reduction operation maximum which has no identity


 71%|███████   | 2944/4166 [01:58<00:51, 23.65it/s]

zero-size array to reduction operation maximum which has no identity

 73%|███████▎  | 3031/4166 [01:58<00:47, 23.72it/s]




 86%|████████▋ | 3595/4166 [02:24<00:18, 31.53it/s]

zero-size array to reduction operation maximum which has no identity

 86%|████████▋ | 3597/4166 [02:24<00:23, 24.71it/s]




 90%|████████▉ | 3741/4166 [02:29<00:17, 24.73it/s]

zero-size array to reduction operation maximum which has no identity

 90%|████████▉ | 3744/4166 [02:30<00:14, 29.15it/s]




100%|██████████| 4166/4166 [02:44<00:00, 25.32it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.26it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.06it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.04it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.02it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.02it/s]
100%|██████████| 4/4 [00:00<00:00, 43.93it/s]
  6%|▋         | 268/4166 [00:09<02:01, 32.16it/s]

zero-size array to reduction operation maximum which has no identity

  5%|▌         | 223/4166 [00:09<02:12, 29.73it/s]




 13%|█▎        | 538/4166 [00:20<02:43, 22.26it/s]

zero-size array to reduction operation maximum which has no identity


 27%|██▋       | 1117/4166 [00:44<02:00, 25.21it/s]

zero-size array to reduction operation maximum which has no identity

 27%|██▋       | 1135/4166 [00:44<02:19, 21.69it/s]




 31%|███       | 1279/4166 [00:51<01:36, 29.87it/s]

zero-size array to reduction operation maximum which has no identity

 32%|███▏      | 1350/4166 [00:51<01:59, 23.47it/s]




 35%|███▌      | 1478/4166 [00:57<01:31, 29.52it/s]

zero-size array to reduction operation maximum which has no identity


 55%|█████▍    | 2283/4166 [01:32<00:52, 35.78it/s]

zero-size array to reduction operation maximum which has no identity


 86%|████████▋ | 3603/4166 [02:25<00:26, 21.29it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:41<00:00, 25.81it/s]
100%|██████████| 4166/4166 [02:41<00:00, 25.82it/s]
100%|██████████| 4166/4166 [02:42<00:00, 25.63it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.50it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.50it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.09it/s]
100%|██████████| 4/4 [00:00<00:00, 75.77it/s]
  1%|▏         | 53/4166 [00:02<02:49, 24.27it/s]

zero-size array to reduction operation maximum which has no identity

  2%|▏         | 63/4166 [00:02<02:24, 28.41it/s]




 20%|██        | 854/4166 [00:34<02:32, 21.71it/s]

zero-size array to reduction operation maximum which has no identity

 22%|██▏       | 902/4166 [00:34<01:52, 28.98it/s]




 33%|███▎      | 1394/4166 [01:00<02:47, 16.55it/s]

zero-size array to reduction operation maximum which has no identity


 46%|████▌     | 1922/4166 [01:27<01:36, 23.19it/s]

zero-size array to reduction operation maximum which has no identity


 67%|██████▋   | 2798/4166 [02:02<01:00, 22.69it/s]

zero-size array to reduction operation maximum which has no identity

 67%|██████▋   | 2792/4166 [02:02<00:56, 24.35it/s]




 76%|███████▌  | 3159/4166 [02:16<00:32, 30.66it/s]

zero-size array to reduction operation maximum which has no identity


 80%|████████  | 3351/4166 [02:25<00:32, 24.86it/s]

zero-size array to reduction operation maximum which has no identity


 83%|████████▎ | 3457/4166 [02:27<00:39, 17.90it/s]

zero-size array to reduction operation maximum which has no identity


 84%|████████▍ | 3504/4166 [02:30<00:20, 32.15it/s]

zero-size array to reduction operation maximum which has no identity


 91%|█████████ | 3795/4166 [02:41<00:17, 21.34it/s]

zero-size array to reduction operation maximum which has no identity

 91%|█████████ | 3799/4166 [02:41<00:09, 39.31it/s]




100%|██████████| 4166/4166 [02:54<00:00, 23.82it/s]
100%|██████████| 4166/4166 [02:55<00:00, 23.79it/s]
100%|██████████| 4166/4166 [02:55<00:00, 23.75it/s]
100%|██████████| 4166/4166 [02:55<00:00, 23.74it/s]
100%|██████████| 4166/4166 [02:55<00:00, 23.69it/s]
100%|██████████| 4166/4166 [02:56<00:00, 23.58it/s]
100%|██████████| 4/4 [00:00<00:00, 49.18it/s]
 55%|█████▌    | 2305/4166 [01:31<01:13, 25.48it/s]

zero-size array to reduction operation maximum which has no identity


 85%|████████▍ | 3541/4166 [02:18<00:37, 16.86it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:41<00:00, 25.87it/s]
100%|██████████| 4166/4166 [02:42<00:00, 25.62it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.40it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.36it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.26it/s]
100%|██████████| 4166/4166 [02:45<00:00, 25.17it/s]
100%|██████████| 4/4 [00:00<00:00, 43.32it/s]
 23%|██▎       | 957/4166 [00:36<02:03, 26.02it/s]

zero-size array to reduction operation maximum which has no identity


 42%|████▏     | 1770/4166 [01:09<01:34, 25.45it/s]

zero-size array to reduction operation maximum which has no identity


 65%|██████▌   | 2724/4166 [01:48<01:15, 19.19it/s]

zero-size array to reduction operation maximum which has no identity


 72%|███████▏  | 3010/4166 [01:59<00:42, 27.09it/s]

zero-size array to reduction operation maximum which has no identity

 74%|███████▎  | 3067/4166 [01:59<00:56, 19.49it/s]




100%|██████████| 4166/4166 [02:40<00:00, 25.99it/s]
100%|██████████| 4166/4166 [02:40<00:00, 25.89it/s]
100%|██████████| 4166/4166 [02:42<00:00, 25.69it/s]
100%|██████████| 4166/4166 [02:42<00:00, 25.63it/s]
100%|██████████| 4166/4166 [02:42<00:00, 25.63it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.47it/s]
100%|██████████| 4/4 [00:00<00:00, 66.18it/s]
  4%|▎         | 151/4166 [00:05<02:19, 28.74it/s]

zero-size array to reduction operation maximum which has no identity


  5%|▌         | 209/4166 [00:08<02:26, 27.09it/s]

zero-size array to reduction operation maximum which has no identity


 15%|█▌        | 634/4166 [00:26<02:51, 20.55it/s]

zero-size array to reduction operation maximum which has no identity


 41%|████      | 1713/4166 [01:07<01:37, 25.29it/s]

zero-size array to reduction operation maximum which has no identity


 66%|██████▌   | 2759/4166 [01:48<00:55, 25.19it/s]

zero-size array to reduction operation maximum which has no identity


 69%|██████▉   | 2886/4166 [01:50<00:56, 22.47it/s]

zero-size array to reduction operation maximum which has no identity


 71%|███████   | 2965/4166 [01:54<00:37, 32.21it/s]

zero-size array to reduction operation maximum which has no identity


 84%|████████▍ | 3517/4166 [02:18<00:24, 26.15it/s]

zero-size array to reduction operation maximum which has no identity


 95%|█████████▍| 3954/4166 [02:33<00:10, 20.94it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:41<00:00, 25.86it/s]
100%|██████████| 4166/4166 [02:42<00:00, 25.70it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.40it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.34it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.33it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.32it/s]
100%|██████████| 4/4 [00:00<00:00, 97.45it/s]
 24%|██▍       | 996/4166 [00:40<01:32, 34.23it/s]]

zero-size array to reduction operation maximum which has no identity

 25%|██▍       | 1031/4166 [00:40<01:50, 28.39it/s]




 36%|███▌      | 1502/4166 [00:58<01:42, 26.00it/s]

zero-size array to reduction operation maximum which has no identity


 36%|███▌      | 1508/4166 [00:59<01:36, 27.58it/s]

zero-size array to reduction operation maximum which has no identity


 43%|████▎     | 1773/4166 [01:11<01:13, 32.77it/s]

zero-size array to reduction operation maximum which has no identity


 52%|█████▏    | 2148/4166 [01:25<01:16, 26.48it/s]

zero-size array to reduction operation maximum which has no identity


 53%|█████▎    | 2228/4166 [01:28<01:20, 24.09it/s]

zero-size array to reduction operation maximum which has no identity


 63%|██████▎   | 2604/4166 [01:41<00:50, 30.95it/s]

zero-size array to reduction operation maximum which has no identity


 95%|█████████▌| 3973/4166 [02:34<00:08, 23.07it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:39<00:00, 26.05it/s]
 98%|█████████▊| 4083/4166 [02:40<00:02, 30.81it/s]

zero-size array to reduction operation maximum which has no identity

 96%|█████████▌| 3990/4166 [02:40<00:05, 29.96it/s]




100%|██████████| 4166/4166 [02:41<00:00, 25.75it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.56it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.44it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.31it/s]
100%|██████████| 4166/4166 [02:45<00:00, 25.23it/s]
100%|██████████| 4/4 [00:00<00:00, 64.07it/s]
  4%|▍         | 164/4166 [00:06<02:30, 26.64it/s]

zero-size array to reduction operation maximum which has no identity


 26%|██▋       | 1096/4166 [00:45<01:28, 34.88it/s]

zero-size array to reduction operation maximum which has no identity


 48%|████▊     | 2009/4166 [01:26<02:08, 16.83it/s]

zero-size array to reduction operation maximum which has no identity


 51%|█████     | 2111/4166 [01:34<01:52, 18.21it/s]

zero-size array to reduction operation maximum which has no identity


 69%|██████▉   | 2880/4166 [02:06<00:44, 28.69it/s]

zero-size array to reduction operation maximum which has no identity


 72%|███████▏  | 3004/4166 [02:11<00:49, 23.37it/s]

zero-size array to reduction operation maximum which has no identity


 96%|█████████▌| 4009/4166 [02:51<00:06, 22.79it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:57<00:00, 23.50it/s]
100%|██████████| 4166/4166 [02:58<00:00, 23.35it/s]
100%|██████████| 4166/4166 [02:58<00:00, 23.31it/s]
100%|██████████| 4166/4166 [02:58<00:00, 23.30it/s]
100%|██████████| 4166/4166 [02:58<00:00, 23.28it/s]
100%|██████████| 4166/4166 [02:59<00:00, 23.18it/s]
100%|██████████| 4/4 [00:00<00:00, 89.66it/s]
 14%|█▎        | 567/4166 [00:23<02:40, 22.46it/s]

zero-size array to reduction operation maximum which has no identity

 14%|█▍        | 580/4166 [00:23<03:03, 19.49it/s]




 14%|█▍        | 595/4166 [00:23<02:17, 25.90it/s]

zero-size array to reduction operation maximum which has no identity


 48%|████▊     | 2018/4166 [01:17<01:06, 32.13it/s]

zero-size array to reduction operation maximum which has no identity

 47%|████▋     | 1957/4166 [01:17<01:37, 22.66it/s]




 93%|█████████▎| 3893/4166 [02:31<00:09, 27.51it/s]

zero-size array to reduction operation maximum which has no identity

 95%|█████████▍| 3948/4166 [02:31<00:08, 25.38it/s]




 94%|█████████▍| 3916/4166 [02:32<00:10, 23.45it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:41<00:00, 25.87it/s]
100%|██████████| 4166/4166 [02:41<00:00, 25.76it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.53it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.54it/s]
100%|██████████| 4166/4166 [02:43<00:00, 25.50it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.38it/s]
100%|██████████| 4/4 [00:00<00:00, 46.97it/s]
 26%|██▌       | 1072/4166 [00:43<01:53, 27.18it/s]

zero-size array to reduction operation maximum which has no identity

 25%|██▍       | 1030/4166 [00:43<02:15, 23.07it/s]




 27%|██▋       | 1119/4166 [00:45<02:01, 25.16it/s]

zero-size array to reduction operation maximum which has no identity


 28%|██▊       | 1181/4166 [00:48<02:11, 22.63it/s]

zero-size array to reduction operation maximum which has no identity


 63%|██████▎   | 2644/4166 [01:45<01:11, 21.37it/s]

zero-size array to reduction operation maximum which has no identity


 74%|███████▍  | 3076/4166 [02:02<00:44, 24.46it/s]

zero-size array to reduction operation maximum which has no identity


 79%|███████▊  | 3279/4166 [02:09<00:46, 19.00it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 4166/4166 [02:43<00:00, 25.42it/s]
100%|██████████| 4166/4166 [02:44<00:00, 25.40it/s]
100%|██████████| 4166/4166 [02:45<00:00, 25.25it/s]
100%|██████████| 4166/4166 [02:45<00:00, 25.23it/s]
100%|██████████| 4166/4166 [02:45<00:00, 25.22it/s]
100%|██████████| 4166/4166 [02:46<00:00, 25.00it/s]
100%|██████████| 4/4 [00:00<00:00, 31.29it/s]
  2%|▏         | 6/377 [00:00<00:15, 23.80it/s]

zero-size array to reduction operation maximum which has no identity


100%|██████████| 377/377 [00:14<00:00, 26.38it/s]
100%|██████████| 377/377 [00:14<00:00, 26.17it/s]
100%|██████████| 377/377 [00:14<00:00, 25.46it/s]
100%|██████████| 377/377 [00:15<00:00, 24.86it/s]
100%|██████████| 377/377 [00:15<00:00, 24.65it/s]
100%|██████████| 377/377 [00:15<00:00, 24.60it/s]
100%|██████████| 3/3 [00:00<00:00, 61.43it/s]


In [19]:
from malaya_speech.utils import tf_featurization

config = malaya_speech.config.transducer_featurizer_config
featurizer = tf_featurization.STTFeaturizer(**config)

In [20]:
n_mels = 80

def preprocess_inputs(example):
    s = featurizer.vectorize(example['waveforms'])
    mel_fbanks = tf.reshape(s, (-1, n_mels))
    example['inputs'] = mel_fbanks
    return example

def parse(serialized_example):

    data_fields = {
        'waveforms': tf.compat.v1.VarLenFeature(tf.float32),
        'targets': tf.compat.v1.VarLenFeature(tf.int64),
        'targets_length': tf.compat.v1.VarLenFeature(tf.int64),
    }
    features = tf.compat.v1.parse_single_example(
        serialized_example, features = data_fields
    )
    for k in features.keys():
        features[k] = features[k].values
        
    features = preprocess_inputs(features)

    keys = list(features.keys())
    for k in keys:
        if k not in ['waveforms', 'inputs', 'targets', 'targets_length']:
            features.pop(k, None)

    return features

def get_dataset(files, batch_size = 2, shuffle_size = 32, thread_count = 24):
    def get():
        dataset = tf.data.TFRecordDataset(files)
        dataset = dataset.shuffle(shuffle_size)
        dataset = dataset.map(parse, num_parallel_calls = thread_count)
        dataset = dataset.repeat()
        return dataset

    return get

In [21]:
files = tf.io.gfile.glob('gs://mesolitica-tpu-general/imda/part4-same-room/*.tfrecord')
d = get_dataset(files)()
d = d.as_numpy_iterator()

In [23]:
next(d)

{'targets': array([ 69, 108, 396, 795,  65,   6,  20, 489, 375, 795, 464,   9, 223,
         37, 140, 880]),
 'targets_length': array([16]),
 'waveforms': array([-0.01147776, -0.01396093, -0.01743737, ...,  0.00623551,
         0.00469043,  0.00469043], dtype=float32),
 'inputs': array([[-1.1467438 , -0.9835571 , -0.7878138 , ..., -2.358215  ,
         -1.7385253 , -1.9502212 ],
        [-1.3655019 , -1.3327678 , -1.2868116 , ..., -1.8851404 ,
         -1.9457333 , -2.0116887 ],
        [-1.4144297 , -1.1720926 , -0.9228275 , ..., -1.595703  ,
         -1.831306  , -1.9690977 ],
        ...,
        [-1.1878959 , -1.076973  , -0.92887765, ..., -1.5371672 ,
         -1.7004726 , -2.0543156 ],
        [-1.1862378 , -1.1690313 , -1.1501343 , ..., -1.3770837 ,
         -1.6801057 , -1.781006  ],
        [-1.2420168 , -1.1447793 , -1.0114012 , ..., -1.7152041 ,
         -1.9377683 , -1.8452353 ]], dtype=float32)}