In [258]:
import os

from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank
import scipy.io.wavfile as wav
from random import randint
# import librosa

# import tensorflow as tf

In [259]:
# every frame is considered independent

no_of_features = 13
no_of_fbank_features = 13
no_of_columns = (3 * no_of_features) + no_of_fbank_features

def get_feature_vectors(dataset_type):
    
    #set parameters for training and testing
    if (dataset_type == "train"):
        directory = os.path.join(os.getcwd(), 'voices_processed/enroll')
        no_of_frames = 400
        start_frame = 1
    elif (dataset_type == "test"):    
        directory = os.path.join(os.getcwd(), 'voices_processed/test')
        no_of_frames = 40
        start_frame = 1
        
    dataset = numpy.empty([0, no_of_columns + 1])
    
    for file in os.listdir(directory):
        
        # filter speakers
        names = ['F001', 'F002', 'F003', 'F004', 'M001', 'M002', 'M003', 'M004']

        if any(name in file for name in names):
            
            # extract mfcc vectors
            (rate,sig) = wav.read(os.path.join(directory, file))
            fbank_feat = logfbank(sig,rate)
            mfcc_feat = mfcc(sig,rate,winlen=0.032,winstep=0.016,numcep=13,nfft=2048)
#             print("Rate :" + str(rate) + " " + file)
#             print("Signal :" + str(sig) + " " + file)
#             y, sr = librosa.load(os.path.join(directory, file))
#             mfcc_feat = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=2048).T
            d_mfcc_feat = delta(mfcc_feat, 2)
            dd_mfcc_feat = delta(d_mfcc_feat, 2)
            
#             fbank_feat = logfbank(sig,rate)
            mfcc_vectors = mfcc_feat[start_frame:start_frame+no_of_frames,:no_of_features]
            dmfcc_vectors = d_mfcc_feat[start_frame:start_frame+no_of_frames,:no_of_features]
            ddmfcc_vectors = dd_mfcc_feat[start_frame:start_frame+no_of_frames,:no_of_features]
            fbank_vectors = fbank_feat[start_frame:start_frame+no_of_frames,:no_of_fbank_features]
            
            feature_vectors = numpy.hstack((mfcc_vectors, dmfcc_vectors, ddmfcc_vectors, fbank_vectors))
#             print(feature_vectors.shape)
            
            # get speaker index from filename
            speaker_index = file.split("_")[0]
            if speaker_index[0] == 'M':
                speaker_index = 5 + int(speaker_index[3:])
            else:
                speaker_index = int(speaker_index[3:])

            #append speaker index to feature vectors
            np_speaker_index = numpy.array([speaker_index])
            temp = numpy.tile(np_speaker_index[numpy.newaxis,:], (feature_vectors.shape[0],1))
            concatenated_feature_vector = numpy.concatenate((feature_vectors,temp), axis=1)
            
#             print(concatenated_feature_vector.shape)
#             print(fbank_vectors.shape)
            
            # append file's data to dataset
            dataset = numpy.concatenate((dataset, concatenated_feature_vector), axis=0)
            

    return dataset


In [260]:
from keras.models import Sequential
import numpy as numpy 

In [261]:
# from numpy import genfromtxt
my_data = get_feature_vectors("train")




In [262]:
# numpy.set_printoptions(threshold=numpy.nan)
# print(my_data)
print(my_data.shape)

(3200, 53)


In [263]:
Y = numpy.copy(my_data[:, no_of_columns:])
print(Y.shape)

(3200, 1)


In [264]:
X = numpy.copy(my_data[:, :no_of_columns])
print(X.shape)
mean = X.mean(0, keepdims=True)

print(mean.shape)
std_deviation = numpy.std(X, axis=0, keepdims=True)
print(std_deviation.shape)

normalized_X = (X - mean) / std_deviation
print(normalized_X.shape)


(3200, 52)
(1, 52)
(1, 52)
(3200, 52)


In [265]:
from keras import utils as np_utils
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

one_hot_labels = np_utils.to_categorical(Y, num_classes=10)
print(one_hot_labels)

[[ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  0.  0. ...,  0.  1.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]
 [ 0.  0.  0. ...,  0.  0.  0.]]


In [266]:
def train():

    # MultiLayer Perceptron
    model = Sequential()

    # 3000 0.6 1500 0.6 -> 57% acc
    # 2560 0.7 1280 0.7 -> 60% acc
    # 2560 0.7 1600 0.7 -> 51% acc
    # 2560 0.6 1600 0.7 -> 49%
    # 2560 0.7 1280 -   -> 50
    # 3000 0.7 1280 0.7 -> 54% acc
    # 2560 0.7 1280 0.8 -> 45% acc

    model.add(Dense(3000, activation='tanh', input_dim=no_of_columns))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    model.add(Dense(100, activation='tanh'))
    model.add(Dropout(0.3))
    
    model.add(Dense(10, activation='softmax'))

    sgd = SGD(lr=0.005, decay=1e-5, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy',
                  optimizer=sgd,
                  metrics=['accuracy'])

    model.fit(normalized_X, one_hot_labels, epochs=10, batch_size=32)
    
    return model
# score = model.evaluate(x_test, y_test, batch_size=128)

In [267]:
def test(model):
    
    test_model = get_feature_vectors("test")
    print(test_model.shape)

    test_X = numpy.copy(test_model[:, :no_of_columns])
#     print(test_X.shape)

    normalized_test_X = (test_X - mean) / std_deviation
#     print(normalized_test_X.shape)

    test_Y = numpy.copy(test_model[:, no_of_columns:])
#     print(test_Y.shape)
    test_labels = np_utils.to_categorical(test_Y, num_classes=10)

#     print(model.test_on_batch(test_X, test_labels, sample_weight=None))
#     print(model.metrics_names)
    predictions = model.predict(test_X)
    
    b = [sum(predictions[current: current+40]) for current in range(0, len(predictions), 40)]
    predicted_Y = []
    for row in b:
        predicted_Y.append(row.argmax(axis=0))
    
    for t, p in zip(test_Y[::40].T[0], predicted_Y):
        print (int(t), p)
    
    diff = predicted_Y - test_Y[::40].T[0]
    correct = sum(x == 0 for x in diff)
    total = len(predicted_Y)
    percent = correct/total * 100
    print(str(correct) + " / " + str(total) + " = " + str(percent))
    
    loss = str(numpy.sum(numpy.abs(diff)))
    print("Loss = " + loss)
    
    return correct, percent, loss

In [268]:
model = train()
test(model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




(2280, 53)
6 6
2 1
4 4
9 7
2 1
3 4
7 9
3 3
1 2
1 1
7 7
6 6
9 9
1 8
1 6
3 4
4 4
1 1
9 7
4 4
4 4
2 1
6 6
2 1
9 7
7 3
3 3
7 9
2 1
6 6
3 4
6 6
4 7
8 8
7 7
7 9
3 4
7 7
4 4
1 1
9 9
8 8
3 4
6 6
1 8
4 4
9 9
7 3
9 9
1 1
2 1
3 4
6 6
6 6
9 9
8 8
8 8
32 / 57 = 56.1403508772
Loss = 55.0


(32, 56.140350877192979, '55.0')

In [269]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution1D, MaxPooling1D
from keras.utils import np_utils

from matplotlib import pyplot as plt

def cnn_train():
    
    temp = normalized_X.reshape(normalized_X.shape[0], no_of_columns, 1)
    
    model = Sequential()

    model.add(Convolution1D(32, 13, activation='tanh', input_shape=(no_of_columns,1)))
    model.add(Convolution1D(32, 7, activation='tanh'))
    model.add(Convolution1D(32, 1, activation='tanh'))

    model.add(MaxPooling1D(pool_size=(1)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(60, activation='tanh'))
    model.add(Dropout(0.25))
    model.add(Dense(10, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

    model.fit(temp, one_hot_labels, epochs=10, batch_size=32)
    return model

In [270]:
def test_cnn(model):
    
    test_model = get_feature_vectors("test")
#     print(test_model.shape)

    test_X = numpy.copy(test_model[:, :no_of_columns])
#     print(test_X.shape)

    normalized_test_X = (test_X - mean) / std_deviation
#     print(normalized_test_X.shape)

    test_Y = numpy.copy(test_model[:, no_of_columns:])
#     print(test_Y.shape)
    test_labels = np_utils.to_categorical(test_Y, num_classes=10)
    
    test_X = test_X.reshape(test_X.shape[0], no_of_columns, 1)
    normalized_test_X = normalized_test_X.reshape(normalized_test_X.shape[0], no_of_columns, 1)
    
    print(model.test_on_batch(test_X, test_labels, sample_weight=None))
    print(model.metrics_names)
    predictions = model.predict(test_X)

    b = [sum(predictions[current: current+40]) for current in range(0, len(predictions), 40)]
    predicted_Y = []
    for row in b:
        predicted_Y.append(row.argmax(axis=0))

    # print(predicted_Y)
    # print(test_Y[::40].T)

    for t, p in zip(test_Y[::40].T[0], predicted_Y):
       print (int(t), p)

    diff = predicted_Y - test_Y[::40].T[0]

    numerator = sum(x == 0 for x in diff)
    denominator = len(predicted_Y)

    print("{} of {}".format(numerator, denominator))

    print("Accuracy: {}".format(numerator/denominator))

In [271]:
cnn_model = cnn_train()
test_cnn(cnn_model)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




[array(1.8757615089416504, dtype=float32), array(0.4991227984428406, dtype=float32)]
['loss', 'acc']
6 6
2 1
4 4
9 7
2 1
3 3
7 7
3 7
1 1
1 1
7 7
6 6
9 7
1 1
1 1
3 3
4 4
1 1
9 7
4 4
4 3
2 1
6 6
2 1
9 7
7 3
3 3
7 7
2 1
6 6
3 3
6 6
4 7
8 8
7 7
7 3
3 3
7 7
4 4
1 1
9 7
8 8
3 3
6 6
1 1
4 3
9 9
7 7
9 9
1 1
2 1
3 3
6 6
6 6
9 9
8 8
8 8
40 of 57
Accuracy: 0.7017543859649122


In [272]:
test(model)
test_cnn(cnn_model)



(2280, 53)




6 6
2 1
4 4
9 7
2 1
3 4
7 9
3 3
1 2
1 1
7 7
6 6
9 9
1 8
1 6
3 4
4 4
1 1
9 7
4 4
4 4
2 1
6 6
2 1
9 7
7 3
3 3
7 9
2 1
6 6
3 4
6 6
4 7
8 8
7 7
7 9
3 4
7 7
4 4
1 1
9 9
8 8
3 4
6 6
1 8
4 4
9 9
7 3
9 9
1 1
2 1
3 4
6 6
6 6
9 9
8 8
8 8
32 / 57 = 56.1403508772
Loss = 55.0




[array(1.8757615089416504, dtype=float32), array(0.4991227984428406, dtype=float32)]
['loss', 'acc']
6 6
2 1
4 4
9 7
2 1
3 3
7 7
3 7
1 1
1 1
7 7
6 6
9 7
1 1
1 1
3 3
4 4
1 1
9 7
4 4
4 3
2 1
6 6
2 1
9 7
7 3
3 3
7 7
2 1
6 6
3 3
6 6
4 7
8 8
7 7
7 3
3 3
7 7
4 4
1 1
9 7
8 8
3 3
6 6
1 1
4 3
9 9
7 7
9 9
1 1
2 1
3 3
6 6
6 6
9 9
8 8
8 8
40 of 57
Accuracy: 0.7017543859649122


In [30]:

import python_speech_features
python_speech_features.__file__

# print(predicted_Y)
# print(test_Y[::40].T)

# for t, p in zip(test_Y[::40].T[0], predicted_Y):
#     print (int(t), p)



'/home/rohan/anaconda3/lib/python3.6/site-packages/python_speech_features-0.6-py3.6.egg/python_speech_features/__init__.py'

In [None]:
# majority

# argmax_pred = numpy.argmax(predictions, axis=1)
# argmax_pred = argmax_pred.reshape((-1, 25))

# from scipy.stats import mode
# argmax_pred = mode(argmax_pred, axis=-1)[0]

# # print(argmax_pred)
# # print(test_Y)

# for t, p in zip(test_Y[::40].T[0], argmax_pred):
#     print (int(t), p)

In [None]:
# def find_majority(k):
#     myMap = {}
#     maximum = ( '', 0 ) # (occurring element, occurrences)
#     for n in k:
#         if n in myMap: myMap[n] += 1
#         else: myMap[n] = 1

#         # Keep track of maximum on the go
#         if myMap[n] > maximum[1]: maximum = (n,myMap[n])

#     return maximum