In [1]:
import librosa
import numpy as np

indir = 'chunks/' # already VAD
from os import listdir
from os.path import isfile, join
mypath = 'chunks/'
flist = [f for f in listdir(indir) if isfile(join(indir, f))]
print(len(flist), flist[0])

2289 DR1-MCPM0-SA1-00.wav


In [2]:
def get_attributes(fname):
    attr = fname.split('.')[0].split('-')
    dialect = attr[0]
    gender = attr[1][0]
    speaker_id = attr[1]
    sentence_type = attr[2][:2]
    return dialect, gender, speaker_id, sentence_type

In [3]:
train = {}
test = {}

for fname in flist:
    input_path = indir + fname
    y, sr = librosa.load(input_path, sr=None) # set sr=None for orig file sr otherwise it is converted to ~22K

    # scaling the maximum of absolute amplitude to 1
    processed_data = y/max(abs(y))
    
    # TODO: calc VAD (already done)
    
    # https://groups.google.com/forum/#!topic/librosa/V4Z1HpTKn8Q
    mfcc = librosa.feature.mfcc(y=processed_data, sr=sr, n_mfcc=13, n_fft=(25*sr)//1000, hop_length=(10*sr)//1000)
    mfcc[0] = librosa.feature.rmse(processed_data, hop_length=int(0.010*sr), n_fft=int(0.025*sr)) 
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
    features = np.vstack([mfcc, mfcc_delta, mfcc_delta2]) 
    
    # split train test
    dialect, gender, speaker_id, sentence_type = get_attributes(fname)
    if sentence_type == 'SA':
        test.setdefault(speaker_id, []).append(features)
    else:
        train.setdefault(speaker_id, []).append(features)

In [4]:
ids = list(test.keys())
ids.sort()
print(ids)

idx = {}
for i in range(len(ids)):
    idx[ids[i]] = i # TODO: for MATLAB set i+1 (i.e 1 to 200)
print(idx)

['MADC0', 'MAEB0', 'MAKB0', 'MAKR0', 'MAPV0', 'MARC0', 'MARW0', 'MBEF0', 'MBGT0', 'MBJV0', 'MBMA0', 'MBWP0', 'MCAL0', 'MCDC0', 'MCDD0', 'MCDR0', 'MCEF0', 'MCEW0', 'MCHL0', 'MCLM0', 'MCPM0', 'MCSS0', 'MCTM0', 'MDAC0', 'MDAS0', 'MDBB1', 'MDBP0', 'MDCD0', 'MDDC0', 'MDEF0', 'MDEM0', 'MDHL0', 'MDHS0', 'MDJM0', 'MDLB0', 'MDLC0', 'MDLC2', 'MDLH0', 'MDMA0', 'MDMT0', 'MDNS0', 'MDPK0', 'MDPS0', 'MDSJ0', 'MDSS0', 'MDSS1', 'MDTB0', 'MDWD0', 'MDWH0', 'MDWM0', 'MEDR0', 'MEFG0', 'MEGJ0', 'MESG0', 'MEWM0', 'MFER0', 'MFMC0', 'MFRM0', 'MFWK0', 'MGAF0', 'MGAG0', 'MGES0', 'MGJC0', 'MGRL0', 'MGRP0', 'MGSH0', 'MGXP0', 'MHIT0', 'MHJB0', 'MHMG0', 'MHMR0', 'MHRM0', 'MILB0', 'MJAC0', 'MJAE0', 'MJBG0', 'MJDA0', 'MJDC0', 'MJDE0', 'MJEB0', 'MJEB1', 'MJEE0', 'MJHI0', 'MJJB0', 'MJJJ0', 'MJKR0', 'MJLB0', 'MJLG1', 'MJLS0', 'MJMA0', 'MJMD0', 'MJMM0', 'MJPM0', 'MJPM1', 'MJRH0', 'MJRH1', 'MJRP0', 'MJSR0', 'MJWS0', 'MJWT0', 'MJXL0', 'MKAH0', 'MKAJ0', 'MKAM0', 'MKDT0', 'MKJO0', 'MKLS0', 'MKLS1', 'MKLW0', 'MKXL0', 'MLBC0', 

In [5]:
def concat(x, win_size=10, hop_size=3):
    r, c = x.shape
    y = []
    for i in range(0, c, hop_size):
        if i + win_size > c:
            break
        y.append(x[:, i:i + win_size].T.flatten())
    return np.array(y)

In [6]:
# gmvn
X_train = []
Y_train = []
X_test = []
Y_test = []

from sklearn import preprocessing

# test
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        frames = concat(features)
        for frame in frames:
            X_test.append(frame)
            Y_test.append(speaker_id)
            
X_test = np.array(X_test)
Y_test = np.array(Y_test)

# train
for speaker_id, feature_list in train.items():
    speaker_id = idx[speaker_id]    
    for features in feature_list:
        frames = concat(features)
        for frame in frames:
            X_train.append(frame)
            Y_train.append(speaker_id)
            
X_train = np.array(X_train)
Y_train = np.array(Y_train)

# mean var normalize
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

print(X_train.shape, X_test.shape)
print(Y_train.shape, Y_test.shape)

(137520, 390) (36091, 390)
(137520,) (36091,)


In [7]:
# shuffle training data
from sklearn.utils import shuffle
X_train, Y_train = shuffle(X_train, Y_train)
print('Shuffled')

In [8]:
from sklearn.neural_network import MLPClassifier
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
#                     solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp = MLPClassifier(hidden_layer_sizes=(200,), max_iter=1000, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.01, learning_rate='adaptive',
                    warm_start=True)

mlp.fit(X_train, Y_train)
print("Training set score: %f" % mlp.score(X_train, Y_train))
print("Test set score: %f" % mlp.score(X_test, Y_test))

Iteration 1, loss = 4.71730816
Iteration 2, loss = 3.78623088
Iteration 3, loss = 3.38797034
Iteration 4, loss = 3.13454551
Iteration 5, loss = 2.94932082
Iteration 6, loss = 2.80586097
Iteration 7, loss = 2.68735592
Iteration 8, loss = 2.58932403
Iteration 9, loss = 2.50644659
Iteration 10, loss = 2.43427894
Iteration 11, loss = 2.37067284
Iteration 12, loss = 2.31609285
Iteration 13, loss = 2.26741936
Iteration 14, loss = 2.22292598
Iteration 15, loss = 2.18275502
Iteration 16, loss = 2.14833112
Iteration 17, loss = 2.11331054
Iteration 18, loss = 2.08407377
Iteration 19, loss = 2.05708875
Iteration 20, loss = 2.03027751
Iteration 21, loss = 2.00547440
Iteration 22, loss = 1.98419588
Iteration 23, loss = 1.96284947
Iteration 24, loss = 1.94444603
Iteration 25, loss = 1.92402935
Iteration 26, loss = 1.90717032
Iteration 27, loss = 1.89015804
Iteration 28, loss = 1.87385501
Iteration 29, loss = 1.86028455
Iteration 30, loss = 1.84497108
Iteration 31, loss = 1.83076248
Iteration 32, los

Iteration 253, loss = 1.27171232
Iteration 254, loss = 1.26853829
Iteration 255, loss = 1.26719553
Iteration 256, loss = 1.26513318
Iteration 257, loss = 1.26621020
Iteration 258, loss = 1.26462495
Iteration 259, loss = 1.26509413
Iteration 260, loss = 1.26449429
Iteration 261, loss = 1.26297612
Iteration 262, loss = 1.26258637
Iteration 263, loss = 1.26263540
Iteration 264, loss = 1.26212084
Iteration 265, loss = 1.26001725
Iteration 266, loss = 1.25960532
Iteration 267, loss = 1.26037240
Iteration 268, loss = 1.25784532
Iteration 269, loss = 1.25812519
Iteration 270, loss = 1.25899076
Iteration 271, loss = 1.25627569
Iteration 272, loss = 1.25749370
Iteration 273, loss = 1.25563467
Iteration 274, loss = 1.25372125
Iteration 275, loss = 1.25483417
Iteration 276, loss = 1.25295418
Iteration 277, loss = 1.25397911
Iteration 278, loss = 1.25545449
Iteration 279, loss = 1.25414359
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Setting learning rate to 0.0

In [17]:
# segment acc
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        x = []
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
        x = scaler.transform(x)
        pred = stats.mode(mlp.predict(x)).mode[0]
        y_true.append(speaker_id)
        y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.873536299766
maxlen 496


In [18]:
np.save('coefs_gmvn', mlp.coefs_)
np.save('intercepts_gmvn', mlp.intercepts_)

In [19]:
# all segment acc ~ file
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    x = []
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
    x = scaler.transform(x)
    pred = stats.mode(mlp.predict(x)).mode[0]
    y_true.append(speaker_id)
    y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.985
maxlen 496


In [20]:
from sklearn.neural_network import MLPClassifier
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
#                     solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp2 = MLPClassifier(hidden_layer_sizes=(200,), max_iter=1, alpha=1e-3,
                    solver='sgd', verbose=10, tol=1e-3, random_state=1,
                    learning_rate_init=.01, learning_rate='adaptive',
                    warm_start=True)

max_iter = 100
for i in range(max_iter):
    mlp2.fit(X_train, Y_train)
    print("Training set score: %f" % mlp2.score(X_train, Y_train))
    print("Test set score: %f" % mlp2.score(X_test, Y_test))
    print()

Iteration 1, loss = 4.71842705




Training set score: 0.130185
Test set score: 0.109362

Iteration 2, loss = 3.78699473
Training set score: 0.205519
Test set score: 0.168823

Iteration 3, loss = 3.39188390
Training set score: 0.253207
Test set score: 0.202904

Iteration 4, loss = 3.14046168
Training set score: 0.290954
Test set score: 0.225763

Iteration 5, loss = 2.95567318
Training set score: 0.321139
Test set score: 0.242554

Iteration 6, loss = 2.81184253
Training set score: 0.346371
Test set score: 0.255438

Iteration 7, loss = 2.69486814
Training set score: 0.367816
Test set score: 0.266354

Iteration 8, loss = 2.59725279
Training set score: 0.386198
Test set score: 0.274528

Iteration 9, loss = 2.51442501
Training set score: 0.402363
Test set score: 0.279516

Iteration 10, loss = 2.44342985
Training set score: 0.415525
Test set score: 0.283561

Iteration 11, loss = 2.38169425
Training set score: 0.427225
Test set score: 0.286055

Iteration 12, loss = 2.32729595
Training set score: 0.438562
Test set score: 0.2892

Test set score: 0.317919

Iteration 96, loss = 1.50294932
Training set score: 0.607003
Test set score: 0.318307

Iteration 97, loss = 1.50046350
Training set score: 0.607475
Test set score: 0.317752

Iteration 98, loss = 1.49813745
Training set score: 0.607933
Test set score: 0.317946

Iteration 99, loss = 1.49580275
Training set score: 0.608573
Test set score: 0.317752

Iteration 100, loss = 1.49360012
Training set score: 0.608944
Test set score: 0.317309



In [21]:
# segment acc
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        x = []
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
        x = scaler.transform(x)
        pred = stats.mode(mlp2.predict(x)).mode[0]
        y_true.append(speaker_id)
        y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.861826697892
maxlen 496


In [22]:
# all segment acc ~ file
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    x = []
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
    x = scaler.transform(x)
    pred = stats.mode(mlp2.predict(x)).mode[0]
    y_true.append(speaker_id)
    y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.955
maxlen 496


In [23]:
for i in range(50):
    mlp2.fit(X_train, Y_train)
    print("Training set score: %f" % mlp2.score(X_train, Y_train))
    print("Test set score: %f" % mlp2.score(X_test, Y_test))
    print()

Iteration 101, loss = 1.49129646
Training set score: 0.609446
Test set score: 0.316672

Iteration 102, loss = 1.48910984
Training set score: 0.609562
Test set score: 0.317170

Iteration 103, loss = 1.48686376
Training set score: 0.610042
Test set score: 0.317420

Iteration 104, loss = 1.48475710
Training set score: 0.610878
Test set score: 0.317281

Iteration 105, loss = 1.48252212
Training set score: 0.611075
Test set score: 0.316977

Iteration 106, loss = 1.48048121
Training set score: 0.611511
Test set score: 0.316395

Iteration 107, loss = 1.47840514
Training set score: 0.612347
Test set score: 0.316478

Iteration 108, loss = 1.47642136
Training set score: 0.612769
Test set score: 0.316395

Iteration 109, loss = 1.47455120
Training set score: 0.613045
Test set score: 0.316450

Iteration 110, loss = 1.47253606
Training set score: 0.613823
Test set score: 0.316312

Iteration 111, loss = 1.47050352
Training set score: 0.614049
Test set score: 0.315508

Iteration 112, loss = 1.46857468

In [24]:
# segment acc
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        x = []
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
        x = scaler.transform(x)
        pred = stats.mode(mlp2.predict(x)).mode[0]
        y_true.append(speaker_id)
        y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.864168618267
maxlen 496


In [25]:
# all segment acc ~ file
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    x = []
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
    x = scaler.transform(x)
    pred = stats.mode(mlp2.predict(x)).mode[0]
    y_true.append(speaker_id)
    y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.96
maxlen 496


In [31]:
from sklearn.neural_network import MLPClassifier
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
#                     solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp3 = MLPClassifier(hidden_layer_sizes=(200,), max_iter=1, alpha=5e-4,
                    solver='sgd', verbose=10, tol=5e-4, random_state=1,
                    learning_rate_init=.01, learning_rate='adaptive',
                    warm_start=True)
test_scores = []
for i in range(150):
    mlp3.fit(X_train, Y_train)
    print("Training set score: %f" % mlp3.score(X_train, Y_train))
    sc = mlp3.score(X_test, Y_test)
    print("Test set score: %f" % sc)
    print()
    test_scores.append(sc)
    if i>2 and test_scores[i] < test_scores[i-1] and test_scores[i-1] < test_scores[i-2]:
        print('Test scores decreased in last 2 iter. Stopping.')
        break

Iteration 1, loss = 4.71781510




Training set score: 0.130076
Test set score: 0.109695

Iteration 2, loss = 3.78631292
Training set score: 0.205054
Test set score: 0.168518

Iteration 3, loss = 3.39134134
Training set score: 0.253476
Test set score: 0.202183

Iteration 4, loss = 3.13987274
Training set score: 0.290518
Test set score: 0.225070

Iteration 5, loss = 2.95490548
Training set score: 0.320957
Test set score: 0.243191

Iteration 6, loss = 2.81082890
Training set score: 0.346640
Test set score: 0.255853

Iteration 7, loss = 2.69388481
Training set score: 0.367808
Test set score: 0.267019

Iteration 8, loss = 2.59627334
Training set score: 0.386046
Test set score: 0.274030

Iteration 9, loss = 2.51333534
Training set score: 0.401912
Test set score: 0.279405

Iteration 10, loss = 2.44201357
Training set score: 0.415561
Test set score: 0.283312

Iteration 11, loss = 2.38022132
Training set score: 0.427276
Test set score: 0.287634

Iteration 12, loss = 2.32582483
Training set score: 0.438111
Test set score: 0.2910

In [37]:
for i in range(len(test_scores), 150):
    mlp3.fit(X_train, Y_train)
    print("Training set score: %f" % mlp3.score(X_train, Y_train))
    sc = mlp3.score(X_test, Y_test)
    print("Test set score: %f" % sc)
    print()
    test_scores.append(sc)
    if i>2 and test_scores[i] < test_scores[i-1] and test_scores[i-1] < test_scores[i-2]:
        print('Test scores decreased in last 2 iter. Stopping.')
        break

Iteration 43, loss = 1.72783009
Training set score: 0.561846
Test set score: 0.316256

Iteration 44, loss = 1.72011018
Training set score: 0.563118
Test set score: 0.316561

Iteration 45, loss = 1.71243133
Training set score: 0.564718
Test set score: 0.316589

Iteration 46, loss = 1.70521025
Training set score: 0.566114
Test set score: 0.316866

Iteration 47, loss = 1.69804745
Training set score: 0.567554
Test set score: 0.316921

Iteration 48, loss = 1.69103457
Training set score: 0.569030
Test set score: 0.317198

Iteration 49, loss = 1.68437581
Training set score: 0.570193
Test set score: 0.317143

Iteration 50, loss = 1.67777922
Training set score: 0.571175
Test set score: 0.317558

Iteration 51, loss = 1.67146303
Training set score: 0.572593
Test set score: 0.318029

Iteration 52, loss = 1.66528261
Training set score: 0.574091
Test set score: 0.317891

Iteration 53, loss = 1.65934850
Training set score: 0.575073
Test set score: 0.318196

Iteration 54, loss = 1.65338124
Training se

In [38]:
# segment acc
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        x = []
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
        x = scaler.transform(x)
        pred = stats.mode(mlp3.predict(x)).mode[0]
        y_true.append(speaker_id)
        y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.88056206089
maxlen 496


In [40]:
# all segment acc ~ file
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    x = []
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
    x = scaler.transform(x)
    pred = stats.mode(mlp2.predict(x)).mode[0]
    y_true.append(speaker_id)
    y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.96
maxlen 496


In [45]:
from sklearn.neural_network import MLPClassifier
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
#                     solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp4 = MLPClassifier(hidden_layer_sizes=(200,), max_iter=1, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.05, learning_rate='adaptive',
                    warm_start=True)
test_scores = []
for i in range(500):
    mlp4.fit(X_train, Y_train)
    print("Training set score: %f" % mlp4.score(X_train, Y_train))
    sc = mlp4.score(X_test, Y_test)
    print("Test set score: %f" % sc)
    print()
    test_scores.append(sc)
    if i>2 and test_scores[i] < test_scores[i-1] and test_scores[i-1] < test_scores[i-2]:
        print('Test scores decreased in last 2 iter. Stopping.')

Iteration 1, loss = 3.89630197




Training set score: 0.253956
Test set score: 0.197639

Iteration 2, loss = 3.02859132
Training set score: 0.328905
Test set score: 0.237206

Iteration 3, loss = 2.73404115
Training set score: 0.368965
Test set score: 0.253609

Iteration 4, loss = 2.56842211
Training set score: 0.395528
Test set score: 0.264747

Iteration 5, loss = 2.45548164
Training set score: 0.415300
Test set score: 0.272921

Iteration 6, loss = 2.37351458
Training set score: 0.430606
Test set score: 0.277022

Iteration 7, loss = 2.30687964
Training set score: 0.441398
Test set score: 0.281261

Iteration 8, loss = 2.25293919
Training set score: 0.450233
Test set score: 0.283838

Iteration 9, loss = 2.21090773
Training set score: 0.459031
Test set score: 0.286470

Iteration 10, loss = 2.17442721
Training set score: 0.465285
Test set score: 0.285833

Iteration 11, loss = 2.14255675
Training set score: 0.470855
Test set score: 0.285473

Test scores decreased in last 2 iter. Stopping.
Iteration 12, loss = 2.11561201
Tra

Training set score: 0.548786
Test set score: 0.284060

Iteration 87, loss = 1.72305084
Training set score: 0.549295
Test set score: 0.281594

Iteration 88, loss = 1.72137496
Training set score: 0.549091
Test set score: 0.283700

Iteration 89, loss = 1.72087031
Training set score: 0.551222
Test set score: 0.283367

Iteration 90, loss = 1.71940485
Training set score: 0.550742
Test set score: 0.283173

Test scores decreased in last 2 iter. Stopping.
Iteration 91, loss = 1.71658201
Training set score: 0.551105
Test set score: 0.282259

Test scores decreased in last 2 iter. Stopping.
Iteration 92, loss = 1.71619120
Training set score: 0.550887
Test set score: 0.282619

Iteration 93, loss = 1.71458046
Training set score: 0.552291
Test set score: 0.282757

Iteration 94, loss = 1.71307891
Training set score: 0.551760
Test set score: 0.282397

Iteration 95, loss = 1.71241414
Training set score: 0.552472
Test set score: 0.281289

Test scores decreased in last 2 iter. Stopping.
Iteration 96, loss

Test set score: 0.277964

Test scores decreased in last 2 iter. Stopping.
Iteration 172, loss = 1.65637488
Training set score: 0.563460
Test set score: 0.278324

Iteration 173, loss = 1.65599798
Training set score: 0.562595
Test set score: 0.278684

Iteration 174, loss = 1.65621384
Training loss did not improve more than tol=0.000100 for two consecutive epochs. Setting learning rate to 0.010000
Training set score: 0.563438
Test set score: 0.279987

Iteration 175, loss = 1.41040656
Training set score: 0.648342
Test set score: 0.299105

Iteration 176, loss = 1.30278196
Training set score: 0.655868
Test set score: 0.300241

Iteration 177, loss = 1.27655631
Training set score: 0.659729
Test set score: 0.301377

Iteration 178, loss = 1.26147999
Training set score: 0.662595
Test set score: 0.301432

Iteration 179, loss = 1.25046206
Training set score: 0.664754
Test set score: 0.301654

Iteration 180, loss = 1.24205926
Training set score: 0.666434
Test set score: 0.302042

Iteration 181, loss

Training set score: 0.690183
Test set score: 0.301266

Iteration 256, loss = 1.13806129
Training set score: 0.690307
Test set score: 0.300823

Iteration 257, loss = 1.13756260
Training set score: 0.690205
Test set score: 0.300490

Test scores decreased in last 2 iter. Stopping.
Iteration 258, loss = 1.13708628
Training set score: 0.690780
Test set score: 0.301155

Iteration 259, loss = 1.13665299
Training set score: 0.690750
Test set score: 0.300740

Iteration 260, loss = 1.13610389
Training set score: 0.690932
Test set score: 0.300407

Test scores decreased in last 2 iter. Stopping.
Iteration 261, loss = 1.13563784
Training set score: 0.690867
Test set score: 0.301045

Iteration 262, loss = 1.13532288
Training set score: 0.690743
Test set score: 0.301072

Iteration 263, loss = 1.13474850
Training set score: 0.690801
Test set score: 0.300352

Iteration 264, loss = 1.13436235
Training set score: 0.690910
Test set score: 0.300906

Iteration 265, loss = 1.13394653
Training set score: 0.69

Test set score: 0.299160

Iteration 343, loss = 1.10834820
Training set score: 0.696851
Test set score: 0.298911

Iteration 344, loss = 1.10808281
Training set score: 0.696546
Test set score: 0.298606

Test scores decreased in last 2 iter. Stopping.
Iteration 345, loss = 1.10791302
Training set score: 0.696633
Test set score: 0.298385

Test scores decreased in last 2 iter. Stopping.
Iteration 346, loss = 1.10760707
Training set score: 0.696568
Test set score: 0.298689

Iteration 347, loss = 1.10732282
Training set score: 0.696655
Test set score: 0.298717

Iteration 348, loss = 1.10717470
Training set score: 0.696859
Test set score: 0.298939

Iteration 349, loss = 1.10685119
Training set score: 0.697048
Test set score: 0.298911

Iteration 350, loss = 1.10659417
Training set score: 0.696873
Test set score: 0.298495

Test scores decreased in last 2 iter. Stopping.
Iteration 351, loss = 1.10640795
Training set score: 0.697055
Test set score: 0.298606

Iteration 352, loss = 1.10620190
Train

Test set score: 0.297692

Iteration 428, loss = 1.08990139
Training set score: 0.700298
Test set score: 0.297553

Test scores decreased in last 2 iter. Stopping.
Iteration 429, loss = 1.08973039
Training set score: 0.700313
Test set score: 0.297387

Test scores decreased in last 2 iter. Stopping.
Iteration 430, loss = 1.08955558
Training set score: 0.700465
Test set score: 0.297941

Iteration 431, loss = 1.08946456
Training set score: 0.700349
Test set score: 0.297747

Iteration 432, loss = 1.08925281
Training set score: 0.700429
Test set score: 0.297720

Test scores decreased in last 2 iter. Stopping.
Iteration 433, loss = 1.08899214
Training set score: 0.700262
Test set score: 0.297858

Iteration 434, loss = 1.08876771
Training set score: 0.700545
Test set score: 0.297470

Iteration 435, loss = 1.08859822
Training set score: 0.700095
Test set score: 0.297609

Iteration 436, loss = 1.08858665
Training set score: 0.700604
Test set score: 0.297221

Iteration 437, loss = 1.08827456
Train

In [46]:
# segment acc
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        x = []
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
        x = scaler.transform(x)
        pred = stats.mode(mlp4.predict(x)).mode[0]
        y_true.append(speaker_id)
        y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.873536299766
maxlen 496


In [47]:
# all segment acc ~ file
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    x = []
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
    x = scaler.transform(x)
    pred = stats.mode(mlp4.predict(x)).mode[0]
    y_true.append(speaker_id)
    y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.975
maxlen 496


In [48]:
from sklearn.neural_network import MLPClassifier
# mlp = MLPClassifier(hidden_layer_sizes=(100, 100), max_iter=400, alpha=1e-4,
#                     solver='sgd', verbose=10, tol=1e-4, random_state=1)
mlp5 = MLPClassifier(hidden_layer_sizes=(200,), max_iter=1, alpha=1e-4,
                    solver='sgd', verbose=10, tol=1e-4, random_state=1,
                    learning_rate_init=.01, learning_rate='adaptive',
                    warm_start=True)
test_scores = []
for i in range(500):
    mlp5.fit(X_train, Y_train)
    print("Training set score: %f" % mlp5.score(X_train, Y_train))
    sc = mlp5.score(X_test, Y_test)
    print("Test set score: %f" % sc)
    print()
    test_scores.append(sc)
    if i>2 and test_scores[i] < test_scores[i-1] and test_scores[i-1] < test_scores[i-2]:
        print('Test scores decreased in last 2 iter. Stopping.')

Iteration 1, loss = 4.71730816




Training set score: 0.130352
Test set score: 0.109612

Iteration 2, loss = 3.78554140
Training set score: 0.205250
Test set score: 0.168712

Iteration 3, loss = 3.39037691
Training set score: 0.253192
Test set score: 0.202848

Iteration 4, loss = 3.13886473
Training set score: 0.290569
Test set score: 0.225790

Iteration 5, loss = 2.95409596
Training set score: 0.321044
Test set score: 0.242554

Iteration 6, loss = 2.80999414
Training set score: 0.346757
Test set score: 0.255715

Iteration 7, loss = 2.69293682
Training set score: 0.368383
Test set score: 0.266521

Iteration 8, loss = 2.59522851
Training set score: 0.386075
Test set score: 0.273392

Iteration 9, loss = 2.51236000
Training set score: 0.401956
Test set score: 0.279931

Iteration 10, loss = 2.44121720
Training set score: 0.415525
Test set score: 0.284448

Iteration 11, loss = 2.37957712
Training set score: 0.427269
Test set score: 0.286775

Iteration 12, loss = 2.32524093
Training set score: 0.438067
Test set score: 0.2903

Iteration 88, loss = 1.51067251
Training set score: 0.603752
Test set score: 0.315369

Iteration 89, loss = 1.50781679
Training set score: 0.604021
Test set score: 0.314677

Iteration 90, loss = 1.50505476
Training set score: 0.604596
Test set score: 0.314843

Iteration 91, loss = 1.50238249
Training set score: 0.605185
Test set score: 0.314566

Iteration 92, loss = 1.49963898
Training set score: 0.605759
Test set score: 0.314621

Iteration 93, loss = 1.49711364
Training set score: 0.606057
Test set score: 0.315203

Iteration 94, loss = 1.49448064
Training set score: 0.606530
Test set score: 0.315453

Iteration 95, loss = 1.49218801
Training set score: 0.607250
Test set score: 0.315176

Iteration 96, loss = 1.48957941
Training set score: 0.608006
Test set score: 0.315148

Test scores decreased in last 2 iter. Stopping.
Iteration 97, loss = 1.48700538
Training set score: 0.608195
Test set score: 0.315286

Iteration 98, loss = 1.48457080
Training set score: 0.608850
Test set score: 0.315

Test set score: 0.314400

Iteration 172, loss = 1.36650822
Training set score: 0.632912
Test set score: 0.314233

Iteration 173, loss = 1.36547886
Training set score: 0.632984
Test set score: 0.314123

Test scores decreased in last 2 iter. Stopping.
Iteration 174, loss = 1.36446690
Training set score: 0.633021
Test set score: 0.314150

Iteration 175, loss = 1.36335065
Training set score: 0.632882
Test set score: 0.314067

Iteration 176, loss = 1.36239941
Training set score: 0.632853
Test set score: 0.314289

Iteration 177, loss = 1.36141749
Training set score: 0.633086
Test set score: 0.314455

Iteration 178, loss = 1.36036535
Training set score: 0.633733
Test set score: 0.313873

Iteration 179, loss = 1.35934522
Training set score: 0.633922
Test set score: 0.313956

Iteration 180, loss = 1.35835722
Training set score: 0.633682
Test set score: 0.314400

Iteration 181, loss = 1.35729453
Training set score: 0.634082
Test set score: 0.314538

Iteration 182, loss = 1.35625002
Training set 

Training set score: 0.643841
Test set score: 0.311906

Iteration 257, loss = 1.29935327
Training set score: 0.643841
Test set score: 0.311878

Iteration 258, loss = 1.29874485
Training set score: 0.644154
Test set score: 0.311906

Iteration 259, loss = 1.29826066
Training set score: 0.644299
Test set score: 0.311795

Iteration 260, loss = 1.29763873
Training set score: 0.644459
Test set score: 0.312349

Iteration 261, loss = 1.29698041
Training set score: 0.644364
Test set score: 0.312460

Iteration 262, loss = 1.29653321
Training set score: 0.644314
Test set score: 0.312543

Iteration 263, loss = 1.29594205
Training set score: 0.644655
Test set score: 0.312599

Iteration 264, loss = 1.29537655
Training set score: 0.644488
Test set score: 0.312349

Iteration 265, loss = 1.29473713
Training set score: 0.644568
Test set score: 0.312100

Test scores decreased in last 2 iter. Stopping.
Iteration 266, loss = 1.29416791
Training set score: 0.644924
Test set score: 0.312100

Iteration 267, lo

Test set score: 0.310133

Iteration 340, loss = 1.25915222
Training set score: 0.651505
Test set score: 0.309966

Iteration 341, loss = 1.25890341
Training set score: 0.651600
Test set score: 0.310410

Iteration 342, loss = 1.25853061
Training set score: 0.651665
Test set score: 0.309689

Iteration 343, loss = 1.25805845
Training set score: 0.651418
Test set score: 0.310050

Iteration 344, loss = 1.25770722
Training set score: 0.651847
Test set score: 0.309440

Iteration 345, loss = 1.25726938
Training set score: 0.651607
Test set score: 0.309773

Iteration 346, loss = 1.25694856
Training set score: 0.652203
Test set score: 0.309440

Iteration 347, loss = 1.25659433
Training set score: 0.651912
Test set score: 0.309385

Test scores decreased in last 2 iter. Stopping.
Iteration 348, loss = 1.25624463
Training set score: 0.652152
Test set score: 0.309579

Iteration 349, loss = 1.25580570
Training set score: 0.652094
Test set score: 0.309994

Iteration 350, loss = 1.25543773
Training set 

Iteration 423, loss = 1.22940321
Training set score: 0.656799
Test set score: 0.307916

Iteration 424, loss = 1.22921775
Training set score: 0.656894
Test set score: 0.308553

Iteration 425, loss = 1.22887201
Training set score: 0.656894
Test set score: 0.307694

Iteration 426, loss = 1.22850249
Training set score: 0.656908
Test set score: 0.307390

Test scores decreased in last 2 iter. Stopping.
Iteration 427, loss = 1.22826069
Training set score: 0.656704
Test set score: 0.307611

Iteration 428, loss = 1.22784989
Training set score: 0.656915
Test set score: 0.307556

Iteration 429, loss = 1.22765185
Training set score: 0.657039
Test set score: 0.308055

Iteration 430, loss = 1.22729068
Training set score: 0.657235
Test set score: 0.307639

Iteration 431, loss = 1.22689531
Training set score: 0.657090
Test set score: 0.307778

Iteration 432, loss = 1.22666331
Training set score: 0.656683
Test set score: 0.308165

Iteration 433, loss = 1.22631393
Training set score: 0.656792
Test set s

In [49]:
# segment acc
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        x = []
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
        x = scaler.transform(x)
        pred = stats.mode(mlp5.predict(x)).mode[0]
        y_true.append(speaker_id)
        y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.859484777518
maxlen 496


In [50]:
# all segment acc ~ file
from scipy import stats

y_true = []
y_pred = []
# test
maxlen = 0
for speaker_id, feature_list in test.items():
    speaker_id = idx[speaker_id]
    x = []
    for features in feature_list:
        maxlen = max(maxlen, features.shape[1])
        # y = []
        frames = concat(features)
        for frame in frames:
            x.append(frame)
            # y.append(speaker_id)
    x = scaler.transform(x)
    pred = stats.mode(mlp5.predict(x)).mode[0]
    y_true.append(speaker_id)
    y_pred.append(pred)
print(sum(np.array(y_true) == np.array(y_pred))/len(y_true))
print('maxlen', maxlen)

0.975
maxlen 496
