## Data processing

In [1]:
import numpy as np
from matplotlib import pyplot as pl
import os
import scipy.io.wavfile as wav

%matplotlib inline

PATH = './vowels/'

In [186]:
import enum

def getGenderFromFilename(filename):
    if(filename[1] == 'k'):
        return 2
    elif(filename[1] == 'm'):
        return 0
    else:
        return 1 # female
    
def getVoiceTypeFromFilename(filename):
    if(filename[0] == 's'):
        return 0
    else:
        return 1
    
def isAdult(filename):
    return filename[1] != 'k'


In [187]:
from pathlib import Path
from python_speech_features import mfcc

mfccRawDatas = []

#Read all the wav files
pathlist = Path(PATH).glob('*.wav')
for path in pathlist:
    sample_rate, X = wav.read(str(path))
    category = getGenderFromFilename(path.stem)
    voiceType = getVoiceTypeFromFilename(path.stem)
    #TODO : maybe add age here...

    #Append the data as a tuple of : (category (male, female or kid), voicetype (natural or synthetic),Mfcc data)
    mfccRawDatas.append((category, voiceType, mfcc(X, samplerate = sample_rate, nfft = 1024)))


In [188]:
mfccDatas = []
#Recuce the MFCCs datas with a mean of all values
for i in range(len(mfccRawDatas)) :
    #Take the means of all windows
    values = mfccRawDatas[i][2].mean(axis=0)
    #Add the gender
    values = np.append(values, mfccRawDatas[i][0])
    #Add the voicetype
    values = np.append(values, mfccRawDatas[i][1])
    #Put the row in the array
    mfccDatas.append(values)
    

print(type(mfccDatas[13][0]))

<class 'numpy.float64'>


In [189]:
#For the rest of the first phase, we'll take only male and female datas.
datas = []
for d in mfccDatas:
    if(d[-2] != 2):
        datas.append(d)


In [190]:
#We shuffle the datas and split into trainset and testset
def split_dataset(dataset, train_test_ratio=0.8):
    np.random.shuffle(dataset)
    nb_train = int(len(dataset) * train_test_ratio)
    return (dataset[:nb_train], dataset[nb_train:])

In [191]:
train, test = split_dataset(datas)
train = np.asarray(train)
test = np.asarray(test)

In [194]:
#We split the dataset into the measure (X_train, X_test) and the "answer" (Y_train, Y_test)
X_train = train[:, :-2]
Y_train = train[:, -2]

X_test = test[:, :-2]
Y_test = test[:, -2]

X_train[0]

array([ 21.82196278,  -4.16408103,   0.27876964,  56.75769686,
       -30.52459712,  -7.81567849,   2.21771159, -19.80111617,
        -3.81153082, -34.45370888,   3.62814851,  -7.5014712 ,
       -27.23080347])

## Training part

In [195]:
import mlp_backprop_momentum as mlp
%matplotlib inline

In [196]:
# Init the neural network
nn = mlp.MLP([13, 5, 2], 'tanh')
# 13 inputs (for the 13 mfcc values )
# 2 hidden nodes
# 2 output (female or male)


In [197]:
MSE = nn.fit((X_train, Y_train), learning_rate=0.001, momentum=0.9, epochs=200)

In [None]:
# TODO NEXT !