In [4]:
!pip install python_speech_features fastdtw


Collecting fastdtw
  Downloading fastdtw-0.3.4.tar.gz (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.4/133.4 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fastdtw
  Building wheel for fastdtw (setup.py) ... [?25l[?25hdone
  Created wheel for fastdtw: filename=fastdtw-0.3.4-cp310-cp310-linux_x86_64.whl size=512710 sha256=9813598ebc43b3d2d1a563067875a08a30e5004573720d439795c2b77a81ce47
  Stored in directory: /root/.cache/pip/wheels/73/c8/f7/c25448dab74c3acf4848bc25d513c736bb93910277e1528ef4
Successfully built fastdtw
Installing collected packages: fastdtw
Successfully installed fastdtw-0.3.4


In [5]:
import numpy as np
from fastdtw import fastdtw
import math
from scipy.spatial.distance import euclidean, sqeuclidean, cosine, correlation, chebyshev, cityblock, minkowski

template = np.array([[1,1,3,6,12], [1,1,2,5,11], [3,3,3,3,3]])
input_arr = np.array([[1,2,11], [1,3,12], [3,3,3]])


def compute_cost_matrix(input_array, template):
    distance_matrix = np.zeros((len(template), len(input_array)))
    for i in range(len(distance_matrix)):
        for j in range(len(distance_matrix[0])):
            distance_matrix[i][j] = eucledian(input_array[j], template[i])

    return distance_matrix

def eucledian(a, b):
    total = 0
    for i in range(len(a)):
        total += (a[i]-b[i])**2
    return math.sqrt(total)

def compute_accumulated_cost_matrix(C):
    N = C.shape[0]
    M = C.shape[1]
    D = np.zeros((N, M))
    D[0, 0] = C[0, 0]
    for n in range(1, N):
        D[n, 0] = D[n-1, 0] + C[n, 0]
    for m in range(1, M):
        D[0, m] = D[0, m-1] + C[0, m]
    for n in range(1, N):
        for m in range(1, M):
            D[n, m] = C[n, m] + min(D[n-1, m], D[n, m-1], D[n-1, m-1])
    return D
# test fungsionalitas fungsi
C = compute_cost_matrix(input_array=input_arr.T, template=template.T)
D =  compute_accumulated_cost_matrix(C)
print('Accumulated cost matrix D =', D, sep='\n')
print('DTW distance DTW(X, Y) =', D[-1, -1])

print("DTW calculation using library:", fastdtw(input_arr.T, template.T, dist=euclidean))


Accumulated cost matrix D =
[[ 0.          2.23606798 17.10213672]
 [ 0.          2.23606798 17.10213672]
 [ 2.23606798  1.41421356 14.22046204]
 [ 8.63919221  5.88634952 10.01653883]
 [23.50526096 18.69259799  7.30056308]]
DTW distance DTW(X, Y) = 7.3005630797457695
DTW calculation using library: (7.3005630797457695, [(0, 0), (0, 1), (1, 2), (1, 3), (2, 4)])


In [46]:
from python_speech_features import mfcc
from python_speech_features import logfbank
import scipy.io.wavfile as wav
import os

folder_path = './dictionary/'
template = []
if os.path.exists(folder_path) and os.path.isdir(folder_path):
    # List all files in the folder
    files = os.listdir(folder_path)

    # Iterate through the files
    for file_name in files:
        # You can process each file here, for example, print the file name
        print(file_name)
        (rate,sig) = wav.read(folder_path + file_name)
        print("signal length and channel : ", sig.shape )
        mfcc_feat = mfcc(sig,rate,nfilt = 39,numcep = 39,nfft =2048)

        print("feature extraction length and channel : ", mfcc_feat.shape)
        template.append([file_name[:-4],mfcc_feat])

tugas.wav
signal length and channel :  (61056, 2)
feature extraction length and channel :  (253, 39)
merupakan.wav
signal length and channel :  (70272, 2)
feature extraction length and channel :  (292, 39)
suara2.wav
signal length and channel :  (70272, 2)
feature extraction length and channel :  (292, 39)
kuliah.wav
signal length and channel :  (81792, 2)
feature extraction length and channel :  (340, 39)
kecil.wav
signal length and channel :  (56448, 2)
feature extraction length and channel :  (234, 39)
ini.wav
signal length and channel :  (61056, 2)
feature extraction length and channel :  (253, 39)
halo.wav
signal length and channel :  (56448, 2)
feature extraction length and channel :  (234, 39)
pemrosesan.wav
signal length and channel :  (72576, 2)
feature extraction length and channel :  (301, 39)


## import suara rafli sebagai dataset



In [47]:
folder_path = './test/rafli/'
test_rafli = []
if os.path.exists(folder_path) and os.path.isdir(folder_path):
    # List all files in the folder
    files = os.listdir(folder_path)

    # Iterate through the files
    for file_name in files:
        # You can process each file here, for example, print the file name
        print(file_name)
        (rate,sig) = wav.read(folder_path + file_name)
        print("signal length and channel : ", sig.shape )
        mfcc_feat = mfcc(sig,rate,nfilt = 39,numcep = 39,nfft =2048)

        print("feature extraction length and channel : ", mfcc_feat.shape)
        test_rafli.append([file_name[:-4],mfcc_feat])


tugas.wav
signal length and channel :  (67584,)
feature extraction length and channel :  (140, 39)
merupakan.wav
signal length and channel :  (81920,)
feature extraction length and channel :  (170, 39)
kuliah.wav
signal length and channel :  (73728,)
feature extraction length and channel :  (153, 39)
kecil.wav
signal length and channel :  (73728,)
feature extraction length and channel :  (153, 39)
ini.wav
signal length and channel :  (73728,)
feature extraction length and channel :  (153, 39)
halo.wav
signal length and channel :  (71680,)
feature extraction length and channel :  (148, 39)
suara.wav
signal length and channel :  (60416,)
feature extraction length and channel :  (125, 39)
pemrosesan.wav
signal length and channel :  (82944,)
feature extraction length and channel :  (172, 39)


## import suara ronggur sebagai dataset

In [48]:
folder_path = './test/ronggur/'
test_ronggur = []
if os.path.exists(folder_path) and os.path.isdir(folder_path):
    # List all files in the folder
    files = os.listdir(folder_path)

    # Iterate through the files
    for file_name in files:
        # You can process each file here, for example, print the file name
        print(file_name)
        (rate,sig) = wav.read(folder_path + file_name)
        print("signal length and channel : ", sig.shape )
        mfcc_feat = mfcc(sig,rate,nfilt = 39,numcep = 39,nfft =2048)

        print("feature extraction length and channel : ", mfcc_feat.shape)
        test_ronggur.append([file_name[:-4],mfcc_feat])


tugas.wav
signal length and channel :  (73728, 2)
feature extraction length and channel :  (306, 39)
merupakan.wav
signal length and channel :  (94208, 2)
feature extraction length and channel :  (392, 39)
kuliah.wav
signal length and channel :  (82944, 2)
feature extraction length and channel :  (345, 39)
kecil.wav
signal length and channel :  (93184, 2)
feature extraction length and channel :  (387, 39)
ini.wav
signal length and channel :  (90112, 2)
feature extraction length and channel :  (374, 39)
halo.wav
signal length and channel :  (140288, 2)
feature extraction length and channel :  (584, 39)
suara.wav
signal length and channel :  (87040, 2)
feature extraction length and channel :  (362, 39)
pemrosesan.wav
signal length and channel :  (93184, 2)
feature extraction length and channel :  (387, 39)


## import dictionary sebagai dataset

In [60]:
folder_path = './dictionary/'
test_template = []
if os.path.exists(folder_path) and os.path.isdir(folder_path):
    # List all files in the folder
    files = os.listdir(folder_path)

    # Iterate through the files
    for file_name in files:
        # You can process each file here, for example, print the file name
        print(file_name)
        (rate,sig) = wav.read(folder_path + file_name)
        print("signal length and channel : ", sig.shape )
        mfcc_feat = mfcc(sig,rate,nfilt = 39,numcep = 39,nfft =2048)

        print("feature extraction length and channel : ", mfcc_feat.shape)
        test_template.append([file_name[:-4],mfcc_feat])


tugas.wav
signal length and channel :  (61056, 2)
feature extraction length and channel :  (253, 39)
merupakan.wav
signal length and channel :  (70272, 2)
feature extraction length and channel :  (292, 39)
suara2.wav
signal length and channel :  (70272, 2)
feature extraction length and channel :  (292, 39)
kuliah.wav
signal length and channel :  (81792, 2)
feature extraction length and channel :  (340, 39)
kecil.wav
signal length and channel :  (56448, 2)
feature extraction length and channel :  (234, 39)
ini.wav
signal length and channel :  (61056, 2)
feature extraction length and channel :  (253, 39)
halo.wav
signal length and channel :  (56448, 2)
feature extraction length and channel :  (234, 39)
pemrosesan.wav
signal length and channel :  (72576, 2)
feature extraction length and channel :  (301, 39)


## def prosedur untuk testing dan calc accuracy

In [59]:
def testSpeech(testset, template):
    truepos = 0
    for i in testset:
      bestGuessClass = template[0][0]
      bestScore = 99999999.0
      for j in template:
        C = compute_cost_matrix(input_array=i[1], template=j[1])
        D =  compute_accumulated_cost_matrix(C)
        # print(D[-1, -1])
        if (bestScore > D[-1, -1]).any():
          bestScore = D[-1, -1]
          bestGuessClass = j[0]
      print("groundtruth :", i[0])
      print("Predicted : ", bestGuessClass)
      if(i[0] == bestGuessClass):
        truepos += 1
      print("Score : ", bestScore)
      print("------------------")
    acc = truepos/len(test_rafli)
    print("accuracy : ", acc)
    return acc

## Test pake suara rafli

In [62]:
print(testSpeech(test_rafli, template))

groundtruth : tugas
Predicted :  halo
Score :  26707.485299908898
------------------
groundtruth : merupakan
Predicted :  halo
Score :  27221.776847280486
------------------
groundtruth : kuliah
Predicted :  halo
Score :  26456.12387813376
------------------
groundtruth : kecil
Predicted :  ini
Score :  26602.40984703177
------------------
groundtruth : ini
Predicted :  halo
Score :  28081.678683637438
------------------
groundtruth : halo
Predicted :  halo
Score :  28165.0635320707
------------------
groundtruth : suara
Predicted :  halo
Score :  25544.781633270613
------------------
groundtruth : pemrosesan
Predicted :  halo
Score :  26707.89421025205
------------------
accuracy :  0.125
0.125


## Test pake suara ronggur

In [63]:
print(testSpeech(test_ronggur, template))

groundtruth : tugas
Predicted :  kecil
Score :  26741.52633176501
------------------
groundtruth : merupakan
Predicted :  merupakan
Score :  33083.98182293037
------------------
groundtruth : kuliah
Predicted :  kecil
Score :  28341.532111976652
------------------
groundtruth : kecil
Predicted :  kecil
Score :  30824.46344478791
------------------
groundtruth : ini
Predicted :  kecil
Score :  31647.313078436764
------------------
groundtruth : halo
Predicted :  ini
Score :  42543.77701857394
------------------
groundtruth : suara
Predicted :  kecil
Score :  30473.62253096967
------------------
groundtruth : pemrosesan
Predicted :  kecil
Score :  32220.77128617396
------------------
accuracy :  0.25
0.25


## Test pake templatenya (harusnya acc 100% dan score 0)

In [61]:
print(testSpeech(test_template, template))

groundtruth : tugas
Predicted :  tugas
Score :  0.0
------------------
groundtruth : merupakan
Predicted :  merupakan
Score :  0.0
------------------
groundtruth : suara2
Predicted :  suara2
Score :  0.0
------------------
groundtruth : kuliah
Predicted :  kuliah
Score :  0.0
------------------
groundtruth : kecil
Predicted :  kecil
Score :  0.0
------------------
groundtruth : ini
Predicted :  ini
Score :  0.0
------------------
groundtruth : halo
Predicted :  halo
Score :  0.0
------------------
groundtruth : pemrosesan
Predicted :  pemrosesan
Score :  0.0
------------------
accuracy :  1.0
1.0
