In [64]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [65]:
%cd /content/drive/My Drive

/content/drive/My Drive


In [66]:
import pickle
import random

In [67]:
!pip install hmmlearn



In [68]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [69]:
import tensorflow as tf
from tensorflow import keras

In [70]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length)
        #hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [71]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [72]:
def clustering(X, n_clusters=5):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [95]:
class_names = ["bat_den", "tat_den", "test_bat_den", "test_tat_den"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("./data/", cname))
print("Load complete !")

Load bat_den dataset
Load tat_den dataset
Load test_bat_den dataset
Load test_tat_den dataset
Load complete !


In [74]:

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

vectors (23951, 36)
centers (5, 36)
centers (5, 36)


In [75]:
for key, val in dataset.items():
    print(key,'\n', np.array(val).shape)

bat_den 
 (75,)
tat_den 
 (75,)
test_bat_den 
 (22,)
test_tat_den 
 (22,)


In [76]:
models = {}
original_dataset = {}

In [77]:
cname = "bat_den"
original_dataset[cname] = dataset[cname].copy()
class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])
random.shuffle(dataset[cname])

hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=19, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_=np.array([ 
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.]
    ])

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
models[cname] = hmm

training class bat_den
(10146, 1) [129, 140, 209, 94, 116, 161, 94, 177, 108, 161, 194, 131, 100, 140, 103, 94, 225, 165, 202, 137, 193, 145, 124, 177, 161, 137, 94, 139, 84, 161, 161, 75, 124, 165, 193, 171, 171, 129, 84, 122, 84, 129, 100, 137, 94, 84, 129, 84, 145, 110, 103, 103, 161, 103, 161, 75, 145, 103, 145, 147, 137, 181, 193, 225, 84, 97, 177, 84, 84, 84, 139, 145, 241, 112, 131] 75


         1      -14610.8433             +nan
         2       -5568.2128       +9042.6305
         3       -4244.9355       +1323.2773
         4       -4022.2945        +222.6410
         5       -3928.0877         +94.2068
         6       -3850.1228         +77.9649
         7       -3770.2294         +79.8934
         8       -3742.5329         +27.6965
         9       -3730.6014         +11.9315
        10       -3717.0269         +13.5745
        11       -3700.7611         +16.2658
        12       -3681.1809         +19.5802
        13       -3646.1382         +35.0427
        14       -3547.8836         +98.2546
        15       -3388.4413        +159.4422
        16       -3293.9617         +94.4797
        17       -3241.4507         +52.5109
        18       -3202.5265         +38.9242
        19       -3183.0257         +19.5008
        20       -3175.4048          +7.6209
        21       -3171.2564          +4.1484
        22       -3168.8321          +2.4243
        23

In [78]:
cname = "tat_den"
original_dataset[cname] = dataset[cname].copy()
class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in original_dataset[cname]])
random.shuffle(dataset[cname])

hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=19, random_state=0, n_iter=1000, verbose=True,
    params='te',
    init_params='e'
)
hmm.startprob_=np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_=np.array([ 
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.]
    ])
X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
models[cname] = hmm

training class tat_den
(8465, 1) [129, 129, 97, 97, 139, 163, 97, 129, 113, 97, 84, 129, 94, 84, 84, 112, 75, 145, 129, 94, 75, 142, 126, 112, 155, 97, 186, 145, 145, 84, 97, 147, 129, 81, 75, 112, 116, 84, 84, 129, 56, 75, 139, 103, 97, 158, 113, 145, 113, 113, 97, 94, 134, 121, 113, 134, 113, 103, 112, 121, 129, 112, 129, 131, 121, 84, 150, 126, 84, 97, 137, 84, 81, 84, 121] 75


         1      -12262.0523             +nan
         2       -5162.7810       +7099.2712
         3       -4136.7501       +1026.0310
         4       -3625.4705        +511.2796
         5       -3333.4156        +292.0549
         6       -3212.1708        +121.2449
         7       -3143.4109         +68.7599
         8       -3121.4543         +21.9566
         9       -3116.7283          +4.7260
        10       -3110.1387          +6.5896
        11       -3104.3762          +5.7625
        12       -3101.4313          +2.9448
        13       -3098.9911          +2.4402
        14       -3096.7319          +2.2593
        15       -3094.7006          +2.0312
        16       -3093.0067          +1.6940
        17       -3091.6617          +1.3450
        18       -3090.5720          +1.0897
        19       -3089.6277          +0.9443
        20       -3088.7421          +0.8856
        21       -3087.8394          +0.9027
        22       -3086.7922          +1.0472
        23

In [79]:
pickle.dump(models, open('model.pkl','wb'))

In [None]:
dataset["test_bat_den"] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_bat_den']])
dataset["test_tat_den"] = list([kmeans.predict(v).reshape(-1,1) for v in dataset['test_tat_den']])

print("Accuracy:")
mapping = ["bat_den", "tat_den"]
class_names = ["test_bat_den","test_tat_den"]
count = 0
correct = 0
for true_cname in class_names:
    score = []
    for i in dataset[true_cname]:
        score = [model.score(i, [len(i)]) for cname, model in models.items() if cname[:4] != 'test']
        res = mapping[score.index(max(score))] 
        if res == true_cname[5:]:
            correct += 1
        count += 1 
print(100*correct/count, "%")

In [98]:
models = pickle.load(open('model.pkl', 'rb'))
file_name = '/content/drive/My Drive/data/test_tat_den/24.wav'
 # file_name = request.get_json()['file_name']
sound_mfcc = get_mfcc(file_name)
kmeans = clustering(sound_mfcc)
sound_mfcc = kmeans.predict(sound_mfcc).reshape(-1,1)
score = {cname : model.score(sound_mfcc, [len(sound_mfcc)]) for cname, model in models.items()}
predict = max(score.keys(), key=(lambda k: score[k]))
    # return jsonify(score)
print(score)
print(predict)

centers (5, 36)
{'bat_den': -387.7714268408532, 'tat_den': -237.29413272001696}
tat_den
