In [90]:
import numpy as np
import math
import operator
import os
import librosa

In [91]:
def dtwDist(x, y, dist, warp=1, w=np.inf, s=1.0):
    assert len(x)
    assert len(y)
    assert np.isinf(w) or (w >= abs(len(x) - len(y)))
    assert s > 0
    r, c = len(x), len(y)
    if not np.isinf(w):
        D0 = full((r + 1, c + 1), np.inf)
        for i in range(1, r + 1):
            D0[i, max(1, i - w):min(c + 1, i + w + 1)] = 0
        D0[0, 0] = 0
    else:
        D0 = np.zeros((r + 1, c + 1))
        D0[0, 1:] = np.inf
        D0[1:, 0] = np.inf
    D1 = D0[1:, 1:]  # view
    for i in range(r):
        for j in range(c):
            if (np.isinf(w) or (max(0, i - w) <= j <= min(c, i + w))):
                D1[i, j] = dist(x[i], y[j])
    jrange = range(c)
    for i in range(r):
        if not np.isinf(w):
            jrange = range(max(0, i - w), min(c, i + w + 1))
        for j in jrange:
            min_list = [D0[i, j]]
            for k in range(1, warp + 1):
                i_k = min(i + k, r)
                j_k = min(j + k, c)
                min_list += [D0[i_k, j] * s, D0[i, j_k] * s]
            D1[i, j] += min(min_list)
    return D1[-1,-1]
    

In [92]:
def getNeighbours(mfcc_train_data,mfcc_test_instance,k,mfcc_train_labels):
    distances=[]
    neighbours=[]
    
    for i in range(len(mfcc_train_data)):
        dist=dtwDist(mfcc_test_instance.T,mfcc_train_data[i].T,dist=lambda x, y: np.linalg.norm(x - y, ord=1))
        distances.append((mfcc_train_labels[i],dist))
    distances.sort(key=operator.itemgetter(1))
    for i in range(k):
        neighbours.append(distances[i][0])
    return neighbours
        

In [93]:
def ClassResponse(neighbours):
    classvotes={}
    for i in range(len(neighbours)):
        response=neighbours[i][-1]
        if response in classvotes:
            classvotes[response]+=1
        else:
            classvotes[response]=1
    sortedVotes = sorted(classvotes.items(), key=operator.itemgetter(1), reverse=True)
    return sortedVotes[0][0]        

In [94]:
def getAccuracy(test_results,predictions):
    correctPred = 0
    for i in range(len(test_results)):
        if test_results[i] == predictions[i]:
            correctPred += 1
    return round((correctPred/float(len(test_results))) * 100.0, 3)


In [95]:
def knn(mfcc_train_data,mfcc_test_data,k,test_results,mfcc_train_labels):
    predictions=[]
    for i in range(len(mfcc_test_data)):
        neighbours=getNeighbours(mfcc_train_data,mfcc_test_data[i],k,mfcc_train_labels)
        neighbour_pred=ClassResponse(neighbours)
        predictions.append(neighbour_pred)
    
    accuracy=getAccuracy(test_results,predictions)
    print('Accuracy = ', accuracy, '%')

In [96]:
dirname = "free-spoken-digit-dataset-master/recordings"
files = [f for f in os.listdir(dirname)]

In [97]:
mfcc_list=[]
y_labels=[]
for i in range(len(files)):
    y, sr = librosa.load(dirname+"/"+files[i])
    mfcc1 = librosa.feature.mfcc(y,sr,n_mfcc=13)
    mfcc_list.append(mfcc1)
    y_labels.append(files[i][0])
    

In [98]:
from sklearn.model_selection import train_test_split
mfcc_train,mfcc_test,y_train,y_test=train_test_split(mfcc_list,y_labels,test_size=0.2)

In [99]:
knn(mfcc_train,mfcc_test,50,y_test,y_train)

Accuracy =  95.5 %
