In [1]:
import os

from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav
from random import randint

import numpy as numpy

In [2]:
def get_feature_vectors(dataset_type):
    
    #set parameters for training and testing
    if (dataset_type == "train"):
        directory = os.path.join(os.getcwd(), 'data_thuyg20_sre/enroll')
        no_of_frames = 400
    elif (dataset_type == "test"):    
        directory = os.path.join(os.getcwd(), 'data_thuyg20_sre/test')
        no_of_frames = 40
    
    dataset = numpy.empty([0, 40])
    
    for file in os.listdir(directory):
        
        # filter speakers
        names = ['F101', 'F102', 'F103', 'F104', 'F105', 'M101', 'M102', 'M103', 'M104']

        if any(name in file for name in names):
            
            # extract mfcc vectors
            (rate,sig) = wav.read(os.path.join(directory, file))
            mfcc_feat = mfcc(sig,rate)
            d_mfcc_feat = delta(mfcc_feat, 2)
            dd_mfcc_feat = delta(d_mfcc_feat, 2)
            
            #fbank_feat = logfbank(sig,rate)
            mfcc_vectors = mfcc_feat[11:11+no_of_frames,:]
            dmfcc_vectors = d_mfcc_feat[11:11+no_of_frames,:]
            ddmfcc_vectors = dd_mfcc_feat[11:11+no_of_frames,:]
            
            feature_vectors = numpy.hstack((mfcc_vectors, dmfcc_vectors, ddmfcc_vectors))
            #print(feature_vectors.shape)
            
            # get speaker index from filename
            speaker_index = file.split("_")[0]
            if speaker_index[0] == 'M':
                speaker_index = 5 + int(speaker_index[3:])
            else:
                speaker_index = int(speaker_index[3:])

            #append speaker index to feature vectors
            np_speaker_index = numpy.array([speaker_index])
            temp = numpy.tile(np_speaker_index[numpy.newaxis,:], (feature_vectors.shape[0],1))
            concatenated_feature_vector = numpy.concatenate((feature_vectors,temp), axis=1)
            
            #print(dataset.shape)
            #print(concatenated_feature_vector.shape)
            # append file's data to dataset
            dataset = numpy.concatenate((dataset, concatenated_feature_vector), axis=0)
            
            

    return dataset

In [3]:
# from numpy import genfromtxt
my_data = get_feature_vectors("train")

In [4]:
# print(my_data)
print(my_data.shape)

(3600, 40)


In [5]:
Y = numpy.copy(my_data[:, 39:])
print(Y.shape)

(3600, 1)


In [83]:
X = numpy.copy(my_data[:, :39])
print(X.shape)
mean = X.mean(0, keepdims=True)

print(mean.shape)
std_deviation = numpy.std(X, axis=0, keepdims=True)
print(std_deviation.shape)

normalized_X = (X - mean) / std_deviation
print(normalized_X.shape)

(3600, 39)
(1, 39)
(1, 39)
(3600, 39)


In [84]:
test_model = get_feature_vectors("test")
print(test_model.shape)

test_X = numpy.copy(test_model[:, :39])
print(test_X.shape)

normalized_test_X = (test_X - mean) / std_deviation
print(normalized_test_X.shape)

test_Y = numpy.copy(test_model[:, 39:])
print(test_Y.shape)

(3600, 40)
(3600, 39)
(3600, 39)
(3600, 1)


In [85]:
from neupy import algorithms, environment
from sklearn import metrics
#from sklearn import datasets
#from sklearn.model_selection import train_test_split

In [86]:
environment.reproducible()

In [87]:
# dataset = datasets.load_digits()
# x_train, x_test, y_train, y_test = train_test_split(
#     dataset.data, dataset.target, test_size=0.3
# )

Y = Y.flatten()
test_Y = test_Y.flatten()

In [88]:
pnn = algorithms.PNN(std=6, verbose=False)
pnn.train(normalized_X, Y)

In [89]:
y_predicted = pnn.predict(normalized_test_X)

                                                                               

In [90]:
print(test_Y)
print(y_predicted)

[6. 6. 6. ... 9. 9. 9.]
[5. 5. 5. ... 9. 9. 9.]


In [91]:
metrics.accuracy_score(test_Y, y_predicted)

0.26805555555555555