In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '2'

In [2]:
directory = 'tacotron2-female-singlish-alignment'

In [3]:
os.system(f'rm -rf {directory}')
os.system(f'mkdir {directory}')

0

In [4]:
import tensorflow as tf
import numpy as np
from glob import glob
import tensorflow as tf
import malaya_speech
import malaya_speech.train
from malaya_speech.train.model import tacotron2_nvidia as tacotron2
import malaya_speech.config
import numpy as np
import json
import malaya_speech.train as train
import re






The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.





In [5]:
def norm_mean_std(x, mean, std):
    zero_idxs = np.where(x == 0.0)[0]
    x = (x - mean) / std
    x[zero_idxs] = 0.0
    return x

def average_by_duration(x, durs):
    mel_len = durs.sum()
    durs_cum = np.cumsum(np.pad(durs, (1, 0)))
    
    x_char = np.zeros((durs.shape[0],), dtype=np.float32)
    for idx, start, end in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
        values = x[start:end][np.where(x[start:end] != 0.0)[0]]
        x_char[idx] = np.mean(values) if len(values) > 0 else 0.0

    return x_char.astype(np.float32)

In [6]:
f0_stat = np.load('../speech-bahasa/female-singlish-stats/stats_f0.npy')
energy_stat = np.load('../speech-bahasa/female-singlish-stats/stats_energy.npy')

In [7]:
files = glob('../speech-bahasa/output-female-singlish/mels/*.npy')

In [8]:
reduction_factor = 1
maxlen = 1008
minlen = 32
pad_to = 8
data_min = 1e-2

_pad = 'pad'
_start = 'start'
_eos = 'eos'
_punctuation = "!'(),.:;? "
_special = '-'
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
_rejected = '\'():;"'

MALAYA_SPEECH_SYMBOLS = (
    [_pad, _start, _eos] + list(_special) + list(_punctuation) + list(_letters)
)

def generate(files):
    for f in files:
        f = f.decode()
        mel = np.load(f)
        mel_length = len(mel)
        if mel_length > maxlen or mel_length < minlen:
            continue

        stop_token_target = np.zeros([len(mel)], dtype = np.float32)

        text_ids = np.load(f.replace('mels', 'text_ids'), allow_pickle = True)[
            0
        ]
        text_ids = ''.join(
            [
                c
                for c in text_ids
                if c in MALAYA_SPEECH_SYMBOLS and c not in _rejected
            ]
        )
        text_ids = re.sub(r'[ ]+', ' ', text_ids).strip()
        text_input = np.array(
            [MALAYA_SPEECH_SYMBOLS.index(c) for c in text_ids]
        )
        num_pad = pad_to - ((len(text_input) + 2) % pad_to)
        text_input = np.pad(
            text_input, ((1, 1)), 'constant', constant_values = ((1, 2))
        )
        text_input = np.pad(
            text_input, ((0, num_pad)), 'constant', constant_values = 0
        )
        num_pad = pad_to - ((len(mel) + 1) % pad_to) + 1
        pad_value_mel = np.log(data_min)
        mel = np.pad(
            mel,
            ((0, num_pad), (0, 0)),
            'constant',
            constant_values = pad_value_mel,
        )
        stop_token_target = np.pad(
            stop_token_target, ((0, num_pad)), 'constant', constant_values = 1
        )
        len_mel = [len(mel)]
        len_text_ids = [len(text_input)]
        
        
        f0 = np.load(f.replace('mels', 'f0s'))
        num_pad = pad_to - ((len(f0) + 1) % pad_to) + 1
        f0 = np.pad(
            f0,
            ((0, num_pad)),
            'constant',
        )
        f0 = norm_mean_std(f0, f0_stat[0], f0_stat[1])
        len_f0 = [len(f0)]
        
        energy = np.load(f.replace('mels', 'energies'))
        num_pad = pad_to - ((len(energy) + 1) % pad_to) + 1
        energy = np.pad(
            energy,
            ((0, num_pad)),
            'constant',
        )
        energy = norm_mean_std(energy, energy_stat[0], energy_stat[1])
        len_energy = [len(energy)]
        
        
        yield {
            'mel': mel,
            'text_ids': text_input,
            'len_mel': len_mel,
            'len_text_ids': len_text_ids,
            'stop_token_target': stop_token_target,
            'f0': f0,
            'len_f0': len_f0,
            'energy': energy,
            'len_energy': len_energy,
            'f': [f]
        }

def parse(example):
    mel_len = example['len_mel'][0]
    input_len = example['len_text_ids'][0]
    g = tacotron2.generate_guided_attention(mel_len, input_len, reduction_factor = reduction_factor)
    example['g'] = g
    return example
    
    
def get_dataset(files, batch_size = 2, shuffle_size = 2, thread_count = 24):
    def get():
        dataset = tf.data.Dataset.from_generator(
            generate,
            {
                'mel': tf.float32,
                'text_ids': tf.int32,
                'len_mel': tf.int32,
                'len_text_ids': tf.int32,
                'stop_token_target': tf.float32,
                'f0': tf.float32,
                'len_f0': tf.int32,
                'energy': tf.float32,
                'len_energy': tf.int32,
                'f': tf.string
            },
            output_shapes = {
                'mel': tf.TensorShape([None, 80]),
                'text_ids': tf.TensorShape([None]),
                'len_mel': tf.TensorShape([1]),
                'len_text_ids': tf.TensorShape([1]),
                'stop_token_target': tf.TensorShape([None]),
                'f0': tf.TensorShape([None]),
                'len_f0': tf.TensorShape([1]),
                'energy': tf.TensorShape([None]),
                'len_energy': tf.TensorShape([1]),
                'f': tf.TensorShape([1]),
            },
            args = (files,),
        )
        dataset = dataset.map(parse, num_parallel_calls = thread_count)
        dataset = dataset.padded_batch(
            shuffle_size,
            padded_shapes = {
                'mel': tf.TensorShape([None, 80]),
                'text_ids': tf.TensorShape([None]),
                'len_mel': tf.TensorShape([1]),
                'len_text_ids': tf.TensorShape([1]),
                'g': tf.TensorShape([None, None]),
                'stop_token_target': tf.TensorShape([None]),
                'f0': tf.TensorShape([None]),
                'len_f0': tf.TensorShape([1]),
                'energy': tf.TensorShape([None]),
                'len_energy': tf.TensorShape([1]),
                'f': tf.TensorShape([1]),
            },
            padding_values = {
                'mel': tf.constant(0, dtype = tf.float32),
                'text_ids': tf.constant(0, dtype = tf.int32),
                'len_mel': tf.constant(0, dtype = tf.int32),
                'len_text_ids': tf.constant(0, dtype = tf.int32),
                'g': tf.constant(-1.0, dtype = tf.float32),
                'stop_token_target': tf.constant(0, dtype = tf.float32),
                'f0': tf.constant(0, dtype = tf.float32),
                'len_f0': tf.constant(0, dtype = tf.int32),
                'energy': tf.constant(0, dtype = tf.float32),
                'len_energy': tf.constant(0, dtype = tf.int32),
                'f': tf.constant('', dtype = tf.string),
            },
        )
        return dataset

    return get

In [9]:
features = get_dataset(files)()
features = features.make_one_shot_iterator().get_next()

Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


In [10]:
input_ids = features['text_ids']
input_lengths = features['len_text_ids'][:, 0]
speaker_ids = tf.constant([0], dtype = tf.int32)
mel_outputs = features['mel']
mel_lengths = features['len_mel'][:, 0]
guided = features['g']
stop_token_target = features['stop_token_target']
batch_size = tf.shape(guided)[0]

In [11]:
model = tacotron2.Model(
    [input_ids, input_lengths],
    [mel_outputs, mel_lengths],
    len(MALAYA_SPEECH_SYMBOLS),
)



Instructions for updating:
Use `tf.keras.layers.Conv1D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.batch_normalization` documentation).
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.Bidirectional(keras.layers.RNN(cell))`, which is equivalent to this API
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for u

In [12]:
r = model.decoder_logits['outputs']
decoder_output, post_mel_outputs, alignment_histories, _, _, _ = r
stop_token_predictions = model.decoder_logits['stop_token_prediction']

In [13]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [14]:
saver = tf.train.Saver()
saver.restore(sess, 'tacotron2-case-female-singlish/model.ckpt-44000')

INFO:tensorflow:Restoring parameters from tacotron2-case-female-singlish/model.ckpt-44000


In [15]:
import matplotlib.pyplot as plt

In [16]:
def decode(x):
    return ''.join([MALAYA_SPEECH_SYMBOLS[i] for i in x])

In [17]:
def get_duration_from_alignment(alignment):
    D = np.array([0 for _ in range(np.shape(alignment)[0])])

    for i in range(np.shape(alignment)[1]):
        max_index = list(alignment[:, i]).index(alignment[:, i].max())
        D[max_index] = D[max_index] + 1

    return D

In [18]:
count = 0
while True:
    try:
        o = sess.run([decoder_output, post_mel_outputs, stop_token_predictions, alignment_histories, features])
        f = o[-1]
        for i in range(len(f['f'])):
            file = f['f'][i,0].decode().split('/')[-1]
            file = f'{directory}/{file}'
            len_mel = f['len_mel'][i, 0]
            len_text_ids = f['len_text_ids'][i, 0]
            d = get_duration_from_alignment(o[3][i, :len_text_ids, :len_mel])
            assert d.sum() == len_mel
            np.save(file, d)
        print('done', count)
        count += 1
    except Exception as e:
        print(e)
        break

done 0
done 1
done 2
done 3
done 4
done 5
done 6
done 7
done 8
done 9
done 10
done 11
done 12
done 13
done 14
done 15
done 16
done 17
done 18
done 19
done 20
done 21
done 22
done 23
done 24
done 25
done 26
done 27
done 28
done 29
done 30
done 31
done 32
done 33
done 34
done 35
done 36
done 37
done 38
done 39
done 40
done 41
done 42
done 43
done 44
done 45
done 46
done 47
done 48
done 49
done 50
done 51
done 52
done 53
done 54
done 55
done 56
done 57
done 58
done 59
done 60
done 61
done 62
done 63
done 64
done 65
done 66
done 67
done 68
done 69
done 70
done 71
done 72
done 73
done 74
done 75
done 76
done 77
done 78
done 79
done 80
done 81
done 82
done 83
done 84
done 85
done 86
done 87
done 88
done 89
done 90
done 91
done 92
done 93
done 94
done 95
done 96
done 97
done 98
done 99
done 100
done 101
done 102
done 103
done 104
done 105
done 106
done 107
done 108
done 109
done 110
done 111
done 112
done 113
done 114
done 115
done 116
done 117
done 118
done 119
done 120
done 121
done 122
don

done 923
done 924
done 925
done 926
done 927
done 928
done 929
done 930
done 931
done 932
done 933
done 934
done 935
done 936
done 937
done 938
done 939
done 940
done 941
done 942
done 943
done 944
done 945
done 946
done 947
done 948
done 949
done 950
done 951
done 952
done 953
done 954
done 955
done 956
done 957
done 958
done 959
done 960
done 961
done 962
done 963
done 964
done 965
done 966
done 967
done 968
done 969
done 970
done 971
done 972
done 973
done 974
done 975
done 976
done 977
done 978
done 979
done 980
done 981
done 982
done 983
done 984
done 985
done 986
done 987
done 988
done 989
done 990
done 991
done 992
done 993
done 994
done 995
done 996
done 997
done 998
done 999
done 1000
done 1001
done 1002
done 1003
done 1004
done 1005
done 1006
done 1007
done 1008
done 1009
done 1010
done 1011
done 1012
done 1013
done 1014
done 1015
done 1016
done 1017
done 1018
done 1019
done 1020
done 1021
done 1022
done 1023
done 1024
done 1025
done 1026
done 1027
done 1028
done 1029
done 10

done 1750
done 1751
done 1752
done 1753
done 1754
done 1755
done 1756
done 1757
done 1758
done 1759
done 1760
done 1761
done 1762
done 1763
done 1764
done 1765
done 1766
done 1767
done 1768
done 1769
done 1770
done 1771
done 1772
done 1773
done 1774
done 1775
done 1776
done 1777
done 1778
done 1779
done 1780
done 1781
done 1782
done 1783
done 1784
done 1785
done 1786
done 1787
done 1788
done 1789
done 1790
done 1791
done 1792
done 1793
done 1794
done 1795
done 1796
done 1797
done 1798
done 1799
done 1800
done 1801
done 1802
done 1803
done 1804
done 1805
done 1806
done 1807
done 1808
done 1809
done 1810
done 1811
done 1812
done 1813
done 1814
done 1815
done 1816
done 1817
done 1818
done 1819
done 1820
done 1821
done 1822
done 1823
done 1824
done 1825
done 1826
done 1827
done 1828
done 1829
done 1830
done 1831
done 1832
done 1833
done 1834
done 1835
done 1836
done 1837
done 1838
done 1839
done 1840
done 1841
done 1842
done 1843
done 1844
done 1845
done 1846
done 1847
done 1848
done 1849


done 2570
done 2571
done 2572
done 2573
done 2574
done 2575
done 2576
done 2577
done 2578
done 2579
done 2580
done 2581
done 2582
done 2583
done 2584
done 2585
done 2586
done 2587
done 2588
done 2589
done 2590
done 2591
done 2592
done 2593
done 2594
done 2595
done 2596
done 2597
done 2598
done 2599
done 2600
done 2601
done 2602
done 2603
done 2604
done 2605
done 2606
done 2607
done 2608
done 2609
done 2610
done 2611
done 2612
done 2613
done 2614
done 2615
done 2616
done 2617
done 2618
done 2619
done 2620
done 2621
done 2622
done 2623
done 2624
done 2625
done 2626
done 2627
done 2628
done 2629
done 2630
done 2631
done 2632
done 2633
done 2634
done 2635
done 2636
done 2637
done 2638
done 2639
done 2640
done 2641
done 2642
done 2643
done 2644
done 2645
done 2646
done 2647
done 2648
done 2649
done 2650
done 2651
done 2652
done 2653
done 2654
done 2655
done 2656
done 2657
done 2658
done 2659
done 2660
done 2661
done 2662
done 2663
done 2664
done 2665
done 2666
done 2667
done 2668
done 2669
