In [196]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [197]:
%cd /content/drive/My Drive

/content/drive/My Drive


In [198]:
import pickle
import random

In [199]:
!pip install hmmlearn



In [200]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [201]:
import tensorflow as tf
from tensorflow import keras

In [202]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length)
        #hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [203]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [204]:
def clustering(X, n_clusters=5):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [205]:
class_names = ["bat_den", "tat_den"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("./data/", cname))
print("Load complete !")

Load bat_den dataset
Load tat_den dataset
Load complete !


In [206]:

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

vectors (18611, 36)
centers (5, 36)
centers (5, 36)


In [207]:
for key, val in dataset.items():
    print(key,'\n', np.array(val).shape)

bat_den 
 (75,)
tat_den 
 (75,)


In [208]:
models = {}
original_dataset = {}

In [209]:
cname = "bat_den"
original_dataset[cname] = dataset[cname].copy()
class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])
random.shuffle(dataset[cname])

hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=19, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_=np.array([ 
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.]
    ])
if cname[:4] != 'test':
  X = np.concatenate(dataset[cname])
  lengths = list([len(x) for x in dataset[cname]])
  print("training class", cname)
  print(X.shape, lengths, len(lengths))
  hmm.fit(X, lengths=lengths)
  models[cname] = hmm

training class bat_den
(10146, 1) [116, 124, 84, 100, 225, 177, 193, 129, 103, 140, 202, 131, 94, 165, 171, 103, 161, 129, 129, 145, 94, 108, 139, 103, 147, 145, 84, 181, 75, 122, 193, 110, 165, 161, 75, 241, 129, 161, 137, 94, 94, 84, 137, 145, 145, 97, 171, 84, 140, 100, 124, 161, 161, 161, 137, 161, 131, 84, 112, 209, 193, 84, 84, 103, 194, 137, 177, 84, 145, 177, 103, 139, 84, 225, 94] 75


         1      -13355.3960             +nan
         2       -5717.5754       +7637.8206
         3       -4345.1989       +1372.3765
         4       -3986.1240        +359.0749
         5       -3781.6535        +204.4706
         6       -3604.6434        +177.0101
         7       -3425.4749        +179.1685
         8       -3381.5490         +43.9259
         9       -3361.0065         +20.5425
        10       -3349.4582         +11.5483
        11       -3343.4489          +6.0093
        12       -3339.2790          +4.1698
        13       -3333.8142          +5.4649
        14       -3327.5622          +6.2519
        15       -3324.3708          +3.1915
        16       -3320.9974          +3.3734
        17       -3317.4140          +3.5833
        18       -3313.9683          +3.4457
        19       -3310.7167          +3.2516
        20       -3307.6538          +3.0629
        21       -3304.8710          +2.7829
        22       -3302.5388          +2.3322
        23

In [210]:
cname = "tat_den"
original_dataset[cname] = dataset[cname].copy()
class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])
random.shuffle(dataset[cname])

hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=19, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_=np.array([ 
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.]
    ])

if cname[:4] != 'test':
  X = np.concatenate(dataset[cname])
  lengths = list([len(x) for x in dataset[cname]])
  print("training class", cname)
  print(X.shape, lengths, len(lengths))
  hmm.fit(X, lengths=lengths)
  models[cname] = hmm

training class tat_den
(8465, 1) [75, 81, 103, 94, 116, 84, 84, 84, 129, 129, 121, 112, 129, 129, 75, 84, 112, 56, 126, 113, 113, 129, 139, 129, 186, 103, 145, 97, 97, 121, 113, 145, 84, 113, 121, 75, 150, 145, 97, 163, 84, 134, 84, 112, 84, 75, 158, 97, 97, 94, 84, 112, 145, 97, 113, 155, 129, 142, 129, 147, 94, 97, 84, 134, 97, 137, 129, 113, 126, 81, 139, 97, 131, 112, 121] 75


         1      -11283.4706             +nan
         2       -4878.4887       +6404.9819
         3       -3984.5334        +893.9553
         4       -3813.2249        +171.3085
         5       -3704.5455        +108.6794
         6       -3619.3969         +85.1486
         7       -3577.0562         +42.3408
         8       -3555.2470         +21.8092
         9       -3538.1145         +17.1324
        10       -3521.8436         +16.2710
        11       -3508.6309         +13.2126
        12       -3501.1767          +7.4543
        13       -3497.0627          +4.1140
        14       -3494.7357          +2.3270
        15       -3493.4469          +1.2888
        16       -3492.6559          +0.7911
        17       -3492.1052          +0.5507
        18       -3491.6889          +0.4162
        19       -3491.3529          +0.3360
        20       -3491.0689          +0.2840
        21       -3490.8289          +0.2400
        22       -3490.6352          +0.1937
        23

In [211]:
pickle.dump(models, open('model.pkl','wb'))

In [212]:
class_names = ["test_bat_den", "test_tat_den"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("./data/", cname))
print("Load complete !")

dataset["test_bat_den"] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_bat_den']])
dataset["test_tat_den"] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_tat_den']])

print("Accuracy:")
mapping = ["bat_den", "tat_den"]
class_names = ["test_bat_den","test_tat_den"]
count = 0
correct = 0
for true_cname in class_names:
    score = []
    for i in dataset[true_cname]:
        score = [model.score(i, [len(i)]) for cname, model in models.items() if cname[:4] != 'test']
        res = mapping[score.index(max(score))] 
        if res == true_cname[5:]:
            correct += 1
        count += 1 
print(100*correct/count, "%")

Load test_bat_den dataset
Load test_tat_den dataset
Load complete !
Accuracy:
76.08695652173913 %


In [213]:
models = pickle.load(open('model.pkl', 'rb'))
file_name = '/content/drive/My Drive/data/test_tat_den/1.wav'
 # file_name = request.get_json()['file_name']
sound_mfcc = get_mfcc(file_name)
kmeans = clustering(sound_mfcc)
sound_mfcc = kmeans.predict(sound_mfcc).reshape(-1,1)
score = {cname : model.score(sound_mfcc, [len(sound_mfcc)]) for cname, model in models.items()}
predict = max(score.keys(), key=(lambda k: score[k]))
    # return jsonify(score)
print(score)
print(predict)

centers (5, 36)
{'bat_den': -899.839957659007, 'tat_den': -866.0467019201021}
tat_den
