In [1]:
# !pip3 install npy-append-array

In [2]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '1'

In [3]:
import sys

SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__name__)))
sys.path.insert(0, SOURCE_DIR)

In [6]:
import malaya_speech
import malaya_speech.config
import tensorflow as tf
from npy_append_array import NpyAppendArray
import joblib
import numpy as np
import torch
from malaya_speech.train.model import hubert

# tf.compat.v1.enable_eager_execution()

In [7]:
config = malaya_speech.config.transducer_featurizer_config
config['feature_type'] = 'mfcc'
config['num_feature_bins'] = 30
config['stride_ms'] = 20
featurizer = malaya_speech.utils.tf_featurization.STTFeaturizer(**config)

In [8]:
y, sr = malaya_speech.load('../speech/example-speaker/husein-zolkepli.wav', sr = 16000)
y1, sr = malaya_speech.load('../speech/example-speaker/shafiqah-idayu.wav', sr = 16000)
len(y), len(y1)

(90090, 56298)

In [9]:
i = tf.placeholder(tf.float32, [None])
v = featurizer.vectorize(i)
v

<tf.Tensor 'truediv_1:0' shape=(?, 30) dtype=float32>

In [10]:
deltas = malaya_speech.utils.tf_featurization.deltas(v)
deltas

<tf.Tensor 'strided_slice_3:0' shape=(?, 30) dtype=float32>

In [11]:
ddeltas = malaya_speech.utils.tf_featurization.deltas(deltas)
ddeltas

<tf.Tensor 'strided_slice_4:0' shape=(?, 30) dtype=float32>

In [12]:
concated = tf.concat([v, deltas, ddeltas], axis = 1)
concated

<tf.Tensor 'concat_1:0' shape=(?, 90) dtype=float32>

In [13]:
sess = tf.Session()

In [14]:
!rm out.npy
feat_f = NpyAppendArray('out.npy')
leng_path = 'out.len'
leng_f = open(leng_path, 'w')

rm: out.npy: No such file or directory


In [15]:
# 281
v1 = sess.run(concated, feed_dict = {i: y})
feat_f.append(v1)
leng_f.write(f"{len(v1)}\n")
v1.shape

(281, 90)

In [16]:
# 175
v1 = sess.run(concated, feed_dict = {i: y1})
feat_f.append(v1)
leng_f.write(f"{len(v1)}\n")
v1.shape

(175, 90)

In [17]:
leng_f.close()

In [18]:
feat = np.load('out.npy', mmap_mode="r")
with open(leng_path, "r") as f:
    lengs = [int(line.rstrip()) for line in f]
    offsets = [0] + np.cumsum(lengs[:-1]).tolist()
feat.shape

(456, 90)

In [19]:
offsets, lengs

([0, 281], [281, 175])

In [20]:
np.random.seed(0)
km_model = hubert.kmeans.get_km_model()
km_model.fit(feat)

Init 1/20 with method: k-means++
Inertia for init 1/20: 180.425690
Init 2/20 with method: k-means++
Inertia for init 2/20: 201.364258
Init 3/20 with method: k-means++
Inertia for init 3/20: 196.456314
Init 4/20 with method: k-means++
Inertia for init 4/20: 194.285400
Init 5/20 with method: k-means++
Inertia for init 5/20: 183.141525
Init 6/20 with method: k-means++
Inertia for init 6/20: 193.811905
Init 7/20 with method: k-means++
Inertia for init 7/20: 188.061935
Init 8/20 with method: k-means++
Inertia for init 8/20: 208.115768
Init 9/20 with method: k-means++
Inertia for init 9/20: 207.368896
Init 10/20 with method: k-means++
Inertia for init 10/20: 185.258530
Init 11/20 with method: k-means++
Inertia for init 11/20: 186.779404
Init 12/20 with method: k-means++
Inertia for init 12/20: 207.999359
Init 13/20 with method: k-means++
Inertia for init 13/20: 193.591293
Init 14/20 with method: k-means++
Inertia for init 14/20: 197.138016
Init 15/20 with method: k-means++
Inertia for init 1

Minibatch iteration 97/100: mean batch inertia: 0.522357, ewa inertia: 0.522357 
Minibatch iteration 98/100: mean batch inertia: 0.526860, ewa inertia: 0.526860 
Minibatch iteration 99/100: mean batch inertia: 0.518044, ewa inertia: 0.518044 
Minibatch iteration 100/100: mean batch inertia: 0.522727, ewa inertia: 0.522727 


MiniBatchKMeans(batch_size=10000, compute_labels=False, max_no_improvement=100,
                n_clusters=100, n_init=20, reassignment_ratio=0.0, verbose=1)

In [21]:
joblib.dump(km_model, 'kmean.km')

['kmean.km']

In [22]:
for offset, leng in zip(offsets, lengs):
    print(feat[offset: offset + leng].shape)

(281, 90)
(175, 90)


In [23]:
kmean = hubert.kmeans.ApplyKmeans_TF('kmean.km')

In [24]:
kmean(feat[offset: offset + leng])

array([40, 40, 40, 40, 49, 49, 49, 49,  1, 87, 87, 49, 49, 52, 52, 29, 29,
       31, 30, 30, 30, 63, 63, 17, 17, 31, 31, 54, 54, 62, 35, 35, 35,  0,
       93, 38, 38, 59, 83, 83, 24, 20, 20, 20, 20, 20, 45, 45, 45, 45, 45,
       45,  5,  5,  5,  5,  5,  5,  5, 70, 70, 70, 79, 73, 73, 73, 65, 93,
       71, 30, 36,  6,  6,  6, 18, 89, 89,  1,  1, 23, 23, 50, 50, 71, 71,
       24, 24, 24, 24, 20, 20, 20, 75, 26,  6, 66, 77, 67, 67, 24, 20, 74,
       74,  3,  3,  3,  3, 80, 80, 80, 80, 80, 55, 55,  1,  1,  1,  1, 32,
        1, 49, 87, 87, 49, 49, 87,  1, 87,  1,  1, 87, 87, 87, 87, 87,  1,
        1,  1, 49,  1,  1,  1, 49, 49, 87,  1,  1, 49, 87, 87,  1,  1, 87,
        1, 49, 87, 87, 87, 87,  1,  1, 87, 87, 87, 49, 87, 87, 87, 87,  1,
       87, 49, 49, 23, 94])

In [25]:
kmean_tf = kmean(concated)
kmean_tf

Instructions for updating:
keep_dims is deprecated, use keepdims instead


<tf.Tensor 'ArgMin:0' shape=(?,) dtype=int64>

In [26]:
sess.run(kmean_tf, feed_dict = {i: y1})

array([40, 40, 40, 40, 49, 49, 49, 49,  1, 87, 87, 49, 49, 52, 52, 29, 29,
       31, 30, 30, 30, 63, 63, 17, 17, 31, 31, 54, 54, 62, 35, 35, 35,  0,
       93, 38, 38, 59, 83, 83, 24, 20, 20, 20, 20, 20, 45, 45, 45, 45, 45,
       45,  5,  5,  5,  5,  5,  5,  5, 70, 70, 70, 79, 73, 73, 73, 65, 93,
       71, 30, 36,  6,  6,  6, 18, 89, 89,  1,  1, 23, 23, 50, 50, 71, 71,
       24, 24, 24, 24, 20, 20, 20, 75, 26,  6, 66, 77, 67, 67, 24, 20, 74,
       74,  3,  3,  3,  3, 80, 80, 80, 80, 80, 55, 55,  1,  1,  1,  1, 32,
        1, 49, 87, 87, 49, 49, 87,  1, 87,  1,  1, 87, 87, 87, 87, 87,  1,
        1,  1, 49,  1,  1,  1, 49, 49, 87,  1,  1, 49, 87, 87,  1,  1, 87,
        1, 49, 87, 87, 87, 87,  1,  1, 87, 87, 87, 49, 87, 87, 87, 87,  1,
       87, 49, 49, 23, 94])