In [1]:
import os

from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav

In [157]:
def get_feature_vectors(dataset_type):
    
    #set parameters for training and testing
    if (dataset_type == "train"):
        directory = os.path.join(os.getcwd(), 'data_thuyg20_sre/enroll')
        no_of_frames = 400
    elif (dataset_type == "test"):    
        directory = os.path.join(os.getcwd(), 'data_thuyg20_sre/test')
        no_of_frames = 40
    
    dataset = numpy.empty([0, 61])
    
    for file in os.listdir(directory):
        
        # filter speakers
        names = ['F101', 'F102', 'F103', 'F104', 'F105', 'M101', 'M102', 'M103', 'M104']

        if any(name in file for name in names):
            
            # extract mfcc vectors
            (rate,sig) = wav.read(os.path.join(directory, file))
            mfcc_feat = mfcc(sig,rate)
            d_mfcc_feat = delta(mfcc_feat, 2)
            dd_mfcc_feat = delta(d_mfcc_feat, 2)
            
#             fbank_feat = logfbank(sig,rate)
            mfcc_vectors = mfcc_feat[11:11+no_of_frames,:]
            dmfcc_vectors = d_mfcc_feat[11:11+no_of_frames,:]
            ddmfcc_vectors = dd_mfcc_feat[11:11+no_of_frames,:]
            
            feature_vectors = numpy.hstack((mfcc_vectors, dmfcc_vectors, ddmfcc_vectors))
#             print(feature_vectors.shape)
            
            # get speaker index from filename
            speaker_index = file.split("_")[0]
            if speaker_index[0] == 'M':
                speaker_index = 5 + int(speaker_index[3:])
            else:
                speaker_index = int(speaker_index[3:])

            #append speaker index to feature vectors
            np_speaker_index = numpy.array([speaker_index])
            temp = numpy.tile(np_speaker_index[numpy.newaxis,:], (feature_vectors.shape[0],1))
            concatenated_feature_vector = numpy.concatenate((feature_vectors,temp), axis=1)
            
            # append file's data to dataset
            dataset = numpy.concatenate((dataset, concatenated_feature_vector), axis=0)
            

    return dataset


In [158]:
from keras.models import Sequential
import numpy as numpy 

In [159]:
# from numpy import genfromtxt
my_data = get_feature_vectors("train")


In [160]:
# print(my_data)
print(my_data.shape)

(3600, 61)


In [161]:
Y = numpy.copy(my_data[:, 60:])
print(Y.shape)

(3600, 1)


In [162]:
X = numpy.copy(my_data[:, :60])
print(X.shape)
mean = X.mean(0, keepdims=True)

print(mean.shape)
std_deviation = numpy.std(X, axis=0, keepdims=True)
print(std_deviation.shape)

normalized_X = (X - mean) / std_deviation
print(normalized_X.shape)


(3600, 60)
(1, 60)
(1, 60)
(3600, 60)


In [163]:
from keras import utils as np_utils

one_hot_labels = np_utils.to_categorical(Y, num_classes=10)
print(one_hot_labels)

[[ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [164]:
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

# model = Sequential()
# model.add(Dense(32, activation='relu', input_dim=12))
# model.add(Dense(10, activation='softmax'))

# model.compile(optimizer='rmsprop',
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# model.fit(normalized_X, one_hot_labels, epochs=10, batch_size=32)


# MultiLayer Perceptron
model = Sequential()

model.add(Dense(2560, activation='relu', input_dim=60))
model.add(Dropout(0.5))
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

sgd = SGD(lr=0.005, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(normalized_X, one_hot_labels, epochs=10, batch_size=32)
# score = model.evaluate(x_test, y_test, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fd16322f6a0>

In [165]:
test_model = get_feature_vectors("test")
print(test_model.shape)

test_X = numpy.copy(test_model[:, :60])
print(test_X.shape)

normalized_test_X = (test_X - mean) / std_deviation
print(normalized_test_X.shape)

test_Y = numpy.copy(test_model[:, 60:])
print(test_Y.shape)
test_labels = np_utils.to_categorical(test_Y, num_classes=10)


(3600, 61)
(3600, 60)
(3600, 60)
(3600, 1)


In [166]:
print(model.test_on_batch(test_X, test_labels, sample_weight=None))
print(model.metrics_names)
predictions = model.predict(test_X)

[array(7.74141263961792, dtype=float32), array(0.4580555558204651, dtype=float32)]
['loss', 'acc']


In [167]:
b = [sum(predictions[current: current+40]) for current in range(0, len(predictions), 40)]
predicted_Y = []
for row in b:
    predicted_Y.append(row.argmax(axis=0))
    
# print(predicted_Y)
# print(test_Y[::40].T)

for t, p in zip(test_Y[::40].T[0], predicted_Y):
    print (int(t), p)

diff = predicted_Y - test_Y[::40].T[0]

print(len(predicted_Y))
print(sum(x == 0 for x in diff))

9 1
2 1
1 1
6 6
9 9
4 2
1 1
4 1
9 9
1 1
5 1
5 5
1 1
1 1
3 1
9 9
6 6
6 6
3 3
5 1
3 3
5 7
9 9
9 9
2 3
4 1
5 1
6 6
9 9
2 2
7 1
4 1
3 3
1 1
7 7
9 9
9 9
1 1
4 1
1 1
9 9
5 5
1 1
4 1
4 5
6 6
1 1
1 1
5 1
3 1
9 5
1 1
9 9
1 1
3 3
9 9
5 5
6 6
9 9
5 1
9 9
9 9
4 1
9 9
4 4
1 2
5 1
4 2
7 5
1 1
7 9
6 6
3 3
8 5
3 1
2 2
7 5
3 1
9 9
3 3
9 9
7 1
1 1
1 1
5 1
3 3
3 3
9 9
8 8
5 1
90
57
