#### Given mfcc features of 1,2,3 , model using SVM and ANNs

* Load all the necessary packages
* Load the data
* Find the reference utterance from each class 
* Reference utterance is that one whose length is maximum and common to all the class
* For all the utterances..do DTW with the reference utterance of the respective classes and find the path
* Using the indices in the path...shrink or expand your utterance and store it in 'X'
* Once you get the fixed length data..pass it to your SVM library
* Get the required scores and accuracy using different attributes of SVM library

In [2]:
# Load Packages
import numpy as np
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
%matplotlib inline
import glob
import pandas as pd
from scipy.spatial.distance import euclidean
from numpy import array, zeros, argmin, inf, equal, ndim
from scipy.spatial.distance import cdist
from sklearn import svm
from sklearn.svm import LinearSVC

In [3]:
# Set Path to the dataset
d1 = glob.glob('data/1/*')
d2 = glob.glob('data/2/*')
d3 = glob.glob('data/3/*')

In [4]:
# Load data.....
def load_mfcc(list_paths):
    X = []
    for i in list_paths:
        X.append(np.array(pd.read_csv(i, sep = " ", skiprows = [0], header = None))[:,1:])

    return np.array(X)

In [5]:
# Load and structure data
X1 = load_mfcc(d1)
X2 = load_mfcc(d2)
X3 = load_mfcc(d3)

#### Applying DTW

In [6]:
# Method to traceback the path generated after DTW
def _traceback(D):
    i, j = array(D.shape) - 2
    p, q = [i], [j]
    while ((i > 0) or (j > 0)):
        tb = argmin((D[i, j], D[i, j+1], D[i+1, j]))
        if (tb == 0):
            i -= 1
            j -= 1
        elif (tb == 1):
            i -= 1
        else: # (tb == 2):
            j -= 1
        p.insert(0, i)
        q.insert(0, j)
    return array(p), array(q)

In [7]:
# Code for DTW
def dtw(x, y, dist):
    """
    Computes Dynamic Time Warping (DTW) of two sequences.

    :param array x: N1*M array
    :param array y: N2*M array
    :param func dist: distance used as cost measure

    Returns the minimum distance, the cost matrix, the accumulated cost matrix, and the wrap path.
    """
    assert len(x)
    assert len(y)
    r, c = len(x), len(y)
    D0 = zeros((r + 1, c + 1))
    D0[0, 1:] = inf
    D0[1:, 0] = inf
    D1 = D0[1:, 1:] # view
    for i in range(r):
        for j in range(c):
            D1[i, j] = dist(x[i], y[j])
    C = D1.copy()
    for i in range(r):
        for j in range(c):
            D1[i, j] += min(D0[i, j], D0[i, j+1], D0[i+1, j])
    if len(x)==1:
        path = zeros(len(y)), range(len(y))
    elif len(y) == 1:
        path = range(len(x)), zeros(len(x))
    else:
        path = _traceback(D0)
    return (D1[-1, -1] / sum(D1.shape), C, D1, path)

In [8]:
from sklearn.metrics.pairwise import euclidean_distances
from nltk.metrics.distance import edit_distance
from sklearn.metrics.pairwise import manhattan_distances

In [9]:
max = 0
n_frames_1 = []
n_frames_2 = []
n_frames_3 = []

# Get the dimensions of all the frames for all classes
for i in range(0,57):
    n_frames_1.append(X1[i].shape[0])
    n_frames_2.append(X2[i].shape[0])
    n_frames_3.append(X3[i].shape[0])
    
# Find the maximum dimensions
print(np.max(np.array(n_frames_1)))
print(np.max(np.array(n_frames_2)))
print(np.max(np.array(n_frames_3)))

# Find the maximum dimension common to all the three classes
size = []
for each in n_frames_1:
    if each in n_frames_2 and each in n_frames_3:
        size.append(each)

# max(size) will give you the maximum dimension of the frames which is common to all the classes
print(np.max(size))

m = np.max(size)
# Find the index of the mfcc feature which has 124 frames
p, q, r = n_frames_1.index(m), n_frames_2.index(m), n_frames_3.index(m)
print(p,q,r)

194
131
183
124
30 25 30


In [10]:
# Set those mfcc features as your refrence in your DTW algo
ref_1 = X1[p]
ref_2 = X2[q]
ref_3 = X3[r]

# For each utterance in each class..do DTW with their reference utterance and find the path
X = []
Y = []
c = 38
# for Class 1
for i in range(0, 57):
    test = X1[i]
    dist_fun = lambda template, test: np.linalg.norm(template - test, ord=1)
    dist, cost, acc, path = dtw(ref_1, test, dist_fun)
    tmp = path[1]
    
    fixed_mfcc = np.zeros((m, c))
    for j in range(0, m):
        fixed_mfcc[j] = test[tmp[j]]
    X.append(fixed_mfcc)
    Y.append(1)

# for Class 2
for i in range(0, 57):
    test = X2[i]
    dist_fun = lambda template, test: np.linalg.norm(template - test, ord=1)
    dist, cost, acc, path = dtw(ref_2, test, dist_fun)
    tmp = path[1]
    
    fixed_mfcc = np.zeros((m, c))
    for j in range(0, m):
        fixed_mfcc[j] = test[tmp[j]]
    X.append(fixed_mfcc)
    Y.append(2)

# for Class 3
for i in range(0, 57):
    test = X3[i]
    dist_fun = lambda template, test: np.linalg.norm(template - test, ord=1)
    dist, cost, acc, path = dtw(ref_3, test, dist_fun)
    tmp = path[1]
    
    fixed_mfcc = np.zeros((m, c))
    for j in range(0, m):
        fixed_mfcc[j] = test[tmp[j]]        
    X.append(fixed_mfcc)
    Y.append(3)

    
# X will contain the fixed length utterances for each class
# Y will contain the actual class labels..like which utterance belongs to which class

In [11]:
# Some reshaping is rquired...basically each utterance is of dimensions 124x38...and we are reshaping it into 1x4712
X = np.array(X).reshape(-1, m*c)
Y = np.array(Y)

In [12]:
# Now split this fixed length data into training and testing data
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [13]:
# Build the classifier using different kernel functions
clf1 =svm.SVC(kernel= 'linear', probability=True)  # Linear Kernel
clf2 =svm.SVC(kernel= 'poly', probability=True)  # Linear Kernel
clf3 =svm.SVC(kernel= 'rbf', probability=True)  # Linear Kernel
clf4 =svm.SVC(kernel= 'sigmoid', probability=True)  # Linear Kernel

# Fit the classifier on the data
clf1.fit(X_train, y_train)
clf2.fit(X_train, y_train)
clf3.fit(X_train, y_train)
clf4.fit(X_train, y_train)

# Do predictions
y_pred_1=clf1.predict(X_test)
y_pred_2=clf2.predict(X_test)
y_pred_3=clf3.predict(X_test)
y_pred_4=clf4.predict(X_test)

In [14]:
# Get the scores for plotting ROC and DET curves
scores1 = clf1.predict_proba(X_test)
scores2 = clf2.predict_proba(X_test)
scores3 = clf3.predict_proba(X_test)
scores4 = clf4.predict_proba(X_test)

In [15]:
y_test = y_test.reshape(57, 1)

y_pred1 = y_pred_1.reshape(57, 1)
y_pred2 = y_pred_2.reshape(57, 1)
y_pred3 = y_pred_3.reshape(57, 1)
y_pred4 = y_pred_4.reshape(57, 1)

scores1 = np.append(scores1, y_pred1, axis=1)
scores1 = np.append(scores1, y_test, axis=1)
np.savetxt('scores_linear_kernel', scores1)

scores2 = np.append(scores2, y_pred2, axis=1)
scores2 = np.append(scores2, y_test, axis=1)
np.savetxt('scores_poly_kernel', scores2)

scores3 = np.append(scores3, y_pred3, axis=1)
scores3 = np.append(scores3, y_test, axis=1)
np.savetxt('scores_rbf_kernel', scores3)

scores4 = np.append(scores4, y_pred4, axis=1)
scores4 = np.append(scores4, y_test, axis=1)
np.savetxt('scores_sigmoid_kernel', scores4)

# Accuracy
print(clf1.score(X_test, y_test))
print(clf2.score(X_test, y_test))
print(clf3.score(X_test, y_test))
print(clf4.score(X_test, y_test))

1.0
0.982456140351
1.0
0.964912280702


In [16]:
# Number of support vectors from each class
print(clf1.n_support_)
print(clf2.n_support_)
print(clf3.n_support_)
print(clf4.n_support_)

# Indices of the support vectors in the data X
print(len(clf1.support_))
print(len(clf2.support_))
print(len(clf3.support_))
print(len(clf4.support_))

[13 13 16]
[18 12 16]
[26 24 32]
[15 10 18]
42
46
82
43


#### Using ANNs

In [17]:
from sklearn.preprocessing import LabelBinarizer

import tensorflow as tf

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout

Using TensorFlow backend.


In [26]:
label_binarizer = LabelBinarizer()
y_train_hot = label_binarizer.fit_transform(y_train)

In [74]:
model = Sequential()
model.add(Dense(50, input_shape=(X_train.shape[1],)))
model.add(Activation('softmax'))
model.add(Dense(3))
model.add(Activation('softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_41 (Dense)             (None, 50)                235650    
_________________________________________________________________
activation_35 (Activation)   (None, 50)                0         
_________________________________________________________________
dense_42 (Dense)             (None, 3)                 153       
_________________________________________________________________
activation_36 (Activation)   (None, 3)                 0         
Total params: 235,803
Trainable params: 235,803
Non-trainable params: 0
_________________________________________________________________


In [75]:
model.compile(optimizer = 'sgd', metrics=['accuracy'], loss='categorical_crossentropy')
history = model.fit(X_train, y_train_hot, epochs=10, batch_size=256)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [66]:
model.evaluate(X_test, label_binarizer.fit_transform(y_test))



[0.92673715373925991, 0.94736841164137187]

In [57]:
X_train.shape

(114, 4712)