In [1]:
import librosa
import numpy as np
import os
print(os.listdir(os.getcwd()))
import glob

['7383-3-0-0.wav', '1', '15', '2', '.ipynb_checkpoints', '7', '11', '31', '8', '20', '16', '10', '35', '21', '12', '26', '3', '24', '25', '19', '14', '6', '4', '5', '17', '13', '9', '33', 'vidtimit_documentation.pdf', '28', '32', 'Speaker Recognition.ipynb', '7061-6-0-0.wav', '29', '23', '30', '18', '34', '27', '22']


In [144]:
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name)
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
    return mfccs,chroma,mel,contrast,tonnetz

def parse_audio_files(parent_dir,sub_dirs,file_ext='*.wav'):
    features, labels = np.empty((0,193)), np.empty(0)
    for label, path in enumerate(parent_dir):
        for fn in glob.glob(os.path.join(str(path), sub_dirs, file_ext)):
            try:
                mfccs, chroma, mel, contrast,tonnetz = extract_feature(fn)
                ext_features = np.hstack([mfccs,chroma,mel,contrast,tonnetz])
                features = np.vstack([features,ext_features])
                labels = np.append(labels,path)
            except:
                continue
    return np.array(features), np.array(labels, dtype = np.int)

def one_hot_encode(labels):
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels,n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

In [91]:
#extract_feature(glob.glob(os.path.join('1','audio','*.wav'))[2]())
parent_dir=np.arange(1,36,1)
sub_dirs='audio'
file_ext='*.wav'

In [92]:
features,labels=parse_audio_files(parent_dir,sub_dirs,file_ext)

In [2]:
import pandas as pd
from patsy import dmatrices
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import train_test_split
from sklearn import metrics
from sklearn.cross_validation import cross_val_score



In [108]:
# evaluate the model by splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=0)
model2 = LogisticRegression()
model2.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [109]:
# predict class labels for the test set
predicted = model2.predict(X_test)
print predicted

[ 1  6 28  5 30 19 20 12 10 26 34  7 23 14 10 14 14 32 22 24 25 28  4 11 16
  3  2  9  8 18  8  7  6 12  6 11 22 20  8  3 29 18 20 35 30 11 11 21 18 12
 14 35 34  1  7 21 16 27  8  6 26 34  6 25  3 11  4 13 22  7 27 24 31  3 14
 31  5 29 29 18 23 21 29  3 27 35  4 23  7 13  2 23 32 29 26  2 17 13 28 22
 35  3 31 28 31]


In [112]:
probs = model2.predict_proba(X_test)
print probs

[[  9.21388220e-01   7.26179782e-05   3.99686560e-06 ...,   2.78799587e-05
    2.25508741e-04   8.21148060e-04]
 [  9.86267145e-04   3.75610054e-06   3.62539510e-02 ...,   1.22435431e-04
    9.26138445e-05   1.02212546e-04]
 [  1.37308271e-05   8.69859835e-07   2.85747880e-02 ...,   7.32623516e-02
    1.01359881e-04   4.52908532e-06]
 ..., 
 [  5.27385589e-06   1.02140418e-06   2.05510803e-04 ...,   1.82510302e-05
    4.13143123e-04   2.74858765e-08]
 [  9.63269670e-03   1.39103571e-06   1.00597464e-03 ...,   6.14738255e-04
    1.05341994e-06   2.46914162e-08]
 [  4.39502351e-06   8.36172214e-06   3.07358285e-07 ...,   6.33430378e-06
    5.68553885e-03   1.15365919e-08]]


In [115]:
print metrics.accuracy_score(y_test, predicted)


0.933333333333


In [122]:
#print metrics.confusion_matrix(y_test, predicted)
#print metrics.classification_report(y_test, predicted)
import time

In [125]:
# evaluate the model using 10-fold cross-validation
a=time.time()
scores = cross_val_score(LogisticRegression(), features, labels, scoring='accuracy', cv=10)
print scores
print scores.mean(),time.time()-a

[ 0.91428571  0.88571429  0.91428571  0.94285714  0.94285714  0.94285714
  1.          0.97142857  0.94285714  1.        ]
0.945714285714 3.87422990799


In [124]:
#help(cross_val_score)#2.50632381439,#3.87422990799