In [1]:
import os

from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav
from random import randint

In [2]:
# every frame is considered independent

no_of_features = 13
no_of_fbank_features = 20
no_of_columns = (3 * no_of_features) + no_of_fbank_features

def get_feature_vectors(dataset_type):
    
    #set parameters for training and testing
    if (dataset_type == "train"):
        directory = os.path.join(os.getcwd(), 'data_thuyg20_sre/enroll')
        no_of_frames = 400
        start_frame = 1
    elif (dataset_type == "test"):    
        directory = os.path.join(os.getcwd(), 'data_thuyg20_sre/test')
        no_of_frames = 40
        start_frame = randint(1, 200)
        
    dataset = numpy.empty([0, no_of_columns + 1])
    
    for file in os.listdir(directory):
        
        # filter speakers
        names = ['F101', 'F102', 'F103', 'F104', 'F105', 'M101', 'M102', 'M103', 'M104']

        if any(name in file for name in names):
            
            # extract mfcc vectors
            (rate,sig) = wav.read(os.path.join(directory, file))
            fbank_feat = logfbank(sig,rate)
            mfcc_feat = mfcc(sig,rate)
            d_mfcc_feat = delta(mfcc_feat, 2)
            dd_mfcc_feat = delta(d_mfcc_feat, 2)
            
#             fbank_feat = logfbank(sig,rate)
            mfcc_vectors = mfcc_feat[start_frame:start_frame+no_of_frames,:no_of_features]
            dmfcc_vectors = d_mfcc_feat[start_frame:start_frame+no_of_frames,:no_of_features]
            ddmfcc_vectors = dd_mfcc_feat[start_frame:start_frame+no_of_frames,:no_of_features]
            fbank_vectors = fbank_feat[start_frame:start_frame+no_of_frames,:no_of_fbank_features]
            
            feature_vectors = numpy.hstack((mfcc_vectors, dmfcc_vectors, ddmfcc_vectors, fbank_vectors))
#             print(feature_vectors.shape)
            
            # get speaker index from filename
            speaker_index = file.split("_")[0]
            if speaker_index[0] == 'M':
                speaker_index = 5 + int(speaker_index[3:])
            else:
                speaker_index = int(speaker_index[3:])

            #append speaker index to feature vectors
            np_speaker_index = numpy.array([speaker_index])
            temp = numpy.tile(np_speaker_index[numpy.newaxis,:], (feature_vectors.shape[0],1))
            concatenated_feature_vector = numpy.concatenate((feature_vectors,temp), axis=1)
            
#             print(concatenated_feature_vector.shape)
#             print(fbank_vectors.shape)
            
            # append file's data to dataset
            dataset = numpy.concatenate((dataset, concatenated_feature_vector), axis=0)
            

    return dataset


In [3]:
from keras.models import Sequential
import numpy as numpy 

Using TensorFlow backend.


In [4]:
# from numpy import genfromtxt
my_data = get_feature_vectors("train")


In [5]:
# print(my_data)
print(my_data.shape)

(3600, 60)


In [6]:
Y = numpy.copy(my_data[:, no_of_columns:])
print(Y.shape)

(3600, 1)


In [7]:
X = numpy.copy(my_data[:, :no_of_columns])
print(X.shape)
mean = X.mean(0, keepdims=True)

print(mean.shape)
std_deviation = numpy.std(X, axis=0, keepdims=True)
print(std_deviation.shape)

normalized_X = (X - mean) / std_deviation
print(normalized_X.shape)


(3600, 59)
(1, 59)
(1, 59)
(3600, 59)


In [8]:
from keras import utils as np_utils
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

one_hot_labels = np_utils.to_categorical(Y, num_classes=10)
print(one_hot_labels)

[[0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [21]:
def train():

    # MultiLayer Perceptron
    model = Sequential()

    # 3000 0.6 1500 0.6 -> 57% acc
    # 2560 0.7 1280 0.7 -> 60% acc
    # 2560 0.7 1600 0.7 -> 51% acc
    # 2560 0.6 1600 0.7 -> 49%
    # 2560 0.7 1280 -   -> 50
    # 3000 0.7 1280 0.7 -> 54% acc
    # 2560 0.7 1280 0.8 -> 45% acc

    model.add(Dense(3000, activation='tanh', input_dim=no_of_columns))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    
#     for layer in layers:
#         model.add(Dense(layer, activation='tanh'))
#         model.add(Dropout(0.3))
    
    model.add(Dense(10, activation='softmax'))

    sgd = SGD(lr=0.005, decay=1e-5, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    model.fit(normalized_X, one_hot_labels, epochs=10, batch_size=32)
    
    return model
# score = model.evaluate(x_test, y_test, batch_size=128)

In [22]:
def test(model):
    
    test_model = get_feature_vectors("test")
#     print(test_model.shape)

    test_X = numpy.copy(test_model[:, :no_of_columns])
#     print(test_X.shape)

    normalized_test_X = (test_X - mean) / std_deviation
#     print(normalized_test_X.shape)

    test_Y = numpy.copy(test_model[:, no_of_columns:])
#     print(test_Y.shape)
    test_labels = np_utils.to_categorical(test_Y, num_classes=10)

#     print(model.test_on_batch(test_X, test_labels, sample_weight=None))
#     print(model.metrics_names)
    predictions = model.predict(test_X)
    
    b = [sum(predictions[current: current+40]) for current in range(0, len(predictions), 40)]
    predicted_Y = []
    for row in b:
        predicted_Y.append(row.argmax(axis=0))
        
    diff = predicted_Y - test_Y[::40].T[0]
    correct = sum(x == 0 for x in diff)
    total = len(predicted_Y)
    percent = correct/total * 100
    print(str(correct) + " / " + str(total) + " = " + str(percent))
    
    loss = str(numpy.sum(numpy.abs(diff)))
    print("Loss = " + loss)
    
    return correct, percent, loss

In [27]:
model = train()
test(model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
55 / 90 = 61.111111111111114
Loss = 99.0


(55, 61.111111111111114, '99.0')

In [20]:
# # with open('automate_op.csv', 'a') as the_file:
# #     the_file.write('Hello\n')

# # print("Test", file=f)
# for i in range(100, 4001,500):  
#     for j in range(100,4001,500):  
#         for k in range(100,4001,500):  
#             for l in range(100,4001,500):  
# #                 print(str(i) + ", " + str(j))
#                 model = train([i, j, k, l])

#                 correct, percent, loss = test(model)
#                 string = "3000, " + str(i) + ", " + str(j) + ", " + str(k) + ", " + str(l) + ", " + str(correct) + ", " + str(percent) + ", " + str(loss)
#                 f = open('full_automate_op.csv', 'a')
#                 print(string, file=f)
#                 f.close()

#                 correct, percent, loss = test(model)
#                 string = "3000, " + str(i) + ", " + str(j) + ", " + str(k) + ", " + str(l) + ", " + str(correct) + ", " + str(percent) + ", " + str(loss)
#                 f = open('full_automate_op.csv', 'a')
#                 print(string, file=f)
#                 f.close()

#                 correct, percent, loss = test(model)
#                 string = "3000, " + str(i) + ", " + str(j) + ", " + str(k) + ", " + str(l) + ", " + str(correct) + ", " + str(percent) + ", " + str(loss)
#                 f = open('full_automate_op.csv', 'a')
#                 print(string, file=f)
#                 f.close()

In [55]:

    
# print(predicted_Y)
# print(test_Y[::40].T)

# for t, p in zip(test_Y[::40].T[0], predicted_Y):
#     print (int(t), p)

f.close()

In [28]:
# majority

# argmax_pred = numpy.argmax(predictions, axis=1)
# argmax_pred = argmax_pred.reshape((-1, 25))

# from scipy.stats import mode
# argmax_pred = mode(argmax_pred, axis=-1)[0]

# # print(argmax_pred)
# # print(test_Y)

# for t, p in zip(test_Y[::40].T[0], argmax_pred):
#     print (int(t), p)

In [17]:
# def find_majority(k):
#     myMap = {}
#     maximum = ( '', 0 ) # (occurring element, occurrences)
#     for n in k:
#         if n in myMap: myMap[n] += 1
#         else: myMap[n] = 1

#         # Keep track of maximum on the go
#         if myMap[n] > maximum[1]: maximum = (n,myMap[n])

#     return maximum