In [1]:
from tensorflow.contrib.learn.python.learn.datasets import base

import tensorflow as tf
import numpy as np
import os,sys
sys.path.insert(0, './scripts')
dataDir ='./data'


import py_compile
py_compile.compile('scripts/ivector_dataset.py')
py_compile.compile('scripts/siamese_model.py')
py_compile.compile('scripts/ivector_tools.py')

import ivector_dataset
import siamese_model
import ivector_tools as it
import csv

In [None]:
def load_ivectors(filename):
    """Loads ivectors

    Parameters
    ----------
    filename : string
        Path to ivector files (e.g. dev_ivectors.csv)

    Returns
    -------
    ids : list
        List of ivectorids
    durations : array, shaped('n_ivectors')
        Array of durations for each ivectorid
    languages : array, shaped('n_ivectors')
        Array of langs for each ivectorid (only applies to train)
    ivectors : array, shaped('n_ivectors', 600)
        Array of ivectors for each ivectorid
    """
    ids = []
    durations = []
    languages = []
    ivectors = []
    with open(filename, 'rb') as infile:
        reader = csv.reader(infile, delimiter='\t')
        reader.next()

        for row in csv.reader(infile, delimiter='\t'):
            ids.append(row[0])
            durations.append(float(row[1]))
            languages.append(row[2])
            ivectors.append(np.asarray(row[3:], dtype=np.float32))

#             sys.stdout.write("\r     %s  " % row[0])
#             sys.stdout.flush()

    print "\n   I-    Adding Transformed ivectors "

    return ids, np.array(durations, dtype=np.float32), np.array(languages), np.vstack(ivectors)


# load ivector ids, durations , languages, and ivectors (as row vectors)
print "\n 1. development data"
dev_ids, dev_durations, dev_languages, dev_ivec = \
    load_ivectors('./data/ivec15_lre_dev_ivectors.tsv')
print "\n 2. training data"
train_ids, train_durations, train_languages, train_ivec = \
    load_ivectors('./data/ivec15_lre_train_ivectors.tsv')
print "\n 3. test data"
test_ids, test_durations, test_languages, test_ivec = \
    load_ivectors('./data/ivec15_lre_test_ivectors.tsv')

In [3]:
[languages,train_languages_num] = np.unique(train_languages,return_inverse=True)

V = it.lda2(train_ivec,train_languages_num)
V = V[:,0:49]
dev_ivec = np.matmul(dev_ivec,V)
train_ivec = np.matmul(train_ivec,V)
test_ivec = np.matmul(test_ivec,V)

In [None]:
# compute the mean and whitening transformation over dev set only
m = np.mean(dev_ivec, axis=0)
S = np.cov(dev_ivec, rowvar=0)
D, V = np.linalg.eig(S)
W = (1 / np.sqrt(D) * V).transpose().astype('float32')

# # center and whiten all i-vectors
dev_ivec = np.dot(dev_ivec - m, W.transpose())
train_ivec = np.dot(train_ivec - m, W.transpose())
test_ivec = np.dot(test_ivec - m, W.transpose())

In [None]:
avg_train_ivec = np.zeros((len(np.unique(train_languages)), train_ivec.shape[1]))
avg_train_languages = []
for i, language in enumerate(np.unique(train_languages)):
    avg_train_ivec[i] = np.mean(train_ivec[train_languages == language], axis=0)
    avg_train_languages.append(language)


In [6]:
# project all i-vectors into unit sphere
dev_ivec /= np.sqrt(np.sum(dev_ivec ** 2, axis=1))[:, np.newaxis]
train_ivec /= np.sqrt(np.sum(train_ivec ** 2, axis=1))[:, np.newaxis]
test_ivec /= np.sqrt(np.sum(test_ivec ** 2, axis=1))[:, np.newaxis]


In [7]:
test_languages = np.loadtxt('data/ivec15_lre_trial_key.v1.tsv',usecols=[1],dtype='string')
test_set = np.loadtxt('data/ivec15_lre_trial_key.v1.tsv',usecols=[2],dtype='string')
test_languages = test_languages[1:]
test_set = test_set[1:]


In [8]:
test_languages_num=np.zeros(len(test_ids),dtype=int)
print test_languages

for i,lang in enumerate(languages):
    test_languages_num[ test_languages==languages[i]]=i

['out_of_set' 'thai' 'tagalog' ..., 'out_of_set' 'shona' 'zulu']


In [9]:
#baseline

dev_scores = avg_train_ivec.dot(train_ivec.transpose() )
hypo_lang = np.argmax(dev_scores,axis = 0)
temp = ((train_languages_num) - hypo_lang)
acc =1- np.size(np.nonzero(temp)) / float( np.size(train_languages_num) )
print acc

tst_scores = avg_train_ivec.dot(test_ivec.transpose() )
hypo_lang = np.argmax(tst_scores,axis = 0)
hypo_lang = hypo_lang[(test_languages=='out_of_set')==False]
test_languages_num = test_languages_num[(test_languages=='out_of_set')==False]
temp = ((test_languages_num) - hypo_lang)
acc_tst =1- np.size(np.nonzero(temp)) / float(np.size(test_languages_num))

print acc_tst

0.8518
0.789


In [10]:
confusionmat = np.zeros((50,50))
for i,lang in enumerate(languages):
    hypo_bylang = hypo_lang[ test_languages_num == i]
    hist_bylang = np.histogram(hypo_bylang,50)
    confusionmat[:,i] = hist_bylang[0]

precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision
recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall
    
print 'Confusion matrix'
print confusionmat
print 'Precision'
print precision
print 'Recall'
print recall

print '\n\n<Performance evaluation on Test dataset>'
print 'Accurary  : %0.3f' %(acc_tst)
print 'Precision : %0.3f' %(np.mean(precision))
print 'Recall    : %0.3f' %(np.mean(recall))

Confusion matrix
[[  5.  74.  95. ...,   1.   1.   1.]
 [  1.   0.   0. ...,   1.   1.   0.]
 [  3.   0.   0. ...,   1.   0.   3.]
 ..., 
 [  0.   0.   0. ...,   0.   1.   0.]
 [  2.   0.   0. ...,   1.   0.   0.]
 [  1.   3.   1. ...,  52.  63.  92.]]
Precision
[ 0.00883392  0.          0.          0.          0.03614458  0.          0.
  0.          0.          0.          0.01052632  0.          0.          0.
  0.          0.          0.          0.          0.74757282  0.          0.
  0.          0.          0.86538462  0.          0.          0.          0.
  0.          0.33566434  0.          0.          0.          0.          0.
  0.4787234   0.          0.          0.          0.          0.40340909
  0.          0.          0.29496403  0.01041667  0.00990099  0.          0.
  0.          0.17005545]
Recall
[ 0.05  0.    0.    0.    0.03  0.    0.    0.    0.    0.    0.01  0.    0.
  0.    0.    0.    0.    0.    0.77  0.    0.    0.    0.    0.9   0.    0.
  0.    0.    0

In [11]:
# making pair of train i-vector with mean of each language i-vector
#  example : for total 3 ivectors
#  ivector   ivector_p  label
#     1         1         1
#     1         2         0
#     1         3         0
#     2         1         0
#     2         2         1
#     ...      ...       ...
#     3         3         1

# preparing pair labels
sim = []
pair_a_idx = []
pair_b_idx = []

for i, lang in enumerate(languages):
    for j, label in enumerate(train_languages_num):
        pair_a_idx.append(i)
        pair_b_idx.append(j)
        if i == label:
            sim.append(1)
        else:
            sim.append(0)
            
print(np.shape(pair_a_idx),np.shape(pair_b_idx), np.shape(sim))
pair_a_idx=np.array(pair_a_idx)
pair_b_idx=np.array(pair_b_idx)
sim = np.array(sim)

#shuffling
shuffleidx = np.arange(0,np.size(pair_a_idx))
np.random.shuffle(shuffleidx)
pair_a_idx = pair_a_idx[shuffleidx]
pair_b_idx = pair_b_idx[shuffleidx]
sim = sim[shuffleidx]


data = []
data_p = []
    
for iter in np.arange(0,np.size(sim)) :
        data.append( avg_train_ivec[pair_a_idx[iter]] )
        data_p.append( train_ivec[pair_b_idx[iter]] )

data = np.array(data)
data_p = np.array(data_p)

# TRN dataset loading for feeding 
tar_data = data[sim==1]
tar_data_p = data_p[sim==1]
tar_sim = sim[sim==1]
non_data = data[sim==0]
non_data_p = data_p[sim==0]
non_sim = sim[sim==0]
print(tar_data.shape, tar_data_p.shape,tar_sim.shape,non_data.shape,non_data_p.shape,non_sim.shape)

trn_tar = ivector_dataset.DataSet(tar_data,tar_sim)
trn_tar_p = ivector_dataset.DataSet(tar_data_p,tar_sim)

trn_non = ivector_dataset.DataSet(non_data,non_sim)
trn_non_p = ivector_dataset.DataSet(non_data_p,non_sim)


((750000,), (750000,), (750000,))
((15000, 49), (15000, 49), (15000,), (735000, 49), (735000, 49), (735000,))


In [13]:
# init variables
sess = tf.InteractiveSession()
siamese = siamese_model.siamese();
global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(0.01, global_step,
                                           5000, 0.99, staircase=True)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(siamese.loss, global_step=global_step)
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())

(?, 2940)
(?, 1500)
(?, 600)
(?, 2940)
(?, 1500)
(?, 600)


In [14]:
#start training
batch_size = 32
max_acc = 0.40
max_step=0
saver_folder='snnmodel_ivector'
if not os.path.exists(saver_folder):
    os.mkdir(saver_folder)
for step in range(100000):
  
    if step %1 ==0:
        batch_x1_a, batch_y1_a = trn_tar.next_batch(batch_size,shuffle=False)
        batch_x2_a, batch_y2_a = trn_tar_p.next_batch(batch_size,shuffle=False)
        batch_x1_b, batch_y1_b = trn_non.next_batch(batch_size,shuffle=False)
        batch_x2_b, batch_y2_b = trn_non_p.next_batch(batch_size,shuffle=False)
        batch_x1 = np.append(batch_x1_a,batch_x1_b,axis=0)
        batch_y1 = np.append(batch_y1_a,batch_y1_b,axis=0)
        batch_x2 = np.append(batch_x2_a,batch_x2_b,axis=0)
        batch_y2 = np.append(batch_y2_a,batch_y2_b,axis=0)
    else:
        batch_x1_a, batch_y1_a = dev_tar.next_batch(batch_size,shuffle=False)
        batch_x2_a, batch_y2_a = dev_tar_p.next_batch(batch_size,shuffle=False)
        batch_x1_b, batch_y1_b = dev_non.next_batch(batch_size,shuffle=False)
        batch_x2_b, batch_y2_b = dev_non_p.next_batch(batch_size,shuffle=False)
        batch_x1 = np.append(batch_x1_a,batch_x1_b,axis=0)
        batch_y1 = np.append(batch_y1_a,batch_y1_b,axis=0)
        batch_x2 = np.append(batch_x2_a,batch_x2_b,axis=0)
        batch_y2 = np.append(batch_y2_a,batch_y2_b,axis=0)
        

#     batch_x1,batch_y1 = mgb3_siam1.train.next_batch(120,shuffle=False)
#     batch_x2,batch_y2 = mgb3_siam2.train.next_batch(120,shuffle=False)    
#     batch_y = (batch_y1==batch_y2).astype('float')
    batch_y = batch_y1*2-1
#     batch_y = 1-batch_y1
    
    _, loss_v = sess.run([train_step, siamese.loss], feed_dict={
        siamese.x1: batch_x1,
        siamese.x2: batch_x2,
        siamese.y_: batch_y
    })
    
    if np.isnan(loss_v):
        print ('Model diverged with loss = NAN')
        quit()
        
    if step % 10 ==0:
        trn_ivectors_siam = siamese.o1.eval({siamese.x1:train_ivec})
        lang_mean_siam = siamese.o1.eval({siamese.x1:avg_train_ivec})
        tst_ivectors_siam = siamese.o1.eval({siamese.x1:test_ivec})
        trn_scores = lang_mean_siam.dot(trn_ivectors_siam.transpose() )
        hypo_lang = np.argmax(trn_scores,axis = 0)
        temp = ((train_languages_num) - hypo_lang)
        acc =1- np.size(np.nonzero(temp)) / float( np.size(train_languages_num) )
        
        tst_scores = lang_mean_siam.dot(tst_ivectors_siam.transpose() )
        hypo_lang = np.argmax(tst_scores,axis = 0)
        hypo_lang = hypo_lang[(test_languages=='out_of_set')==False]

        temp = ((test_languages_num) - hypo_lang)
        acc_tst =1- np.size(np.nonzero(temp)) / float(np.size(test_languages_num))

        if max_acc < acc_tst:
            max_acc = acc_tst
            max_step=step
            print ('Step %d: loss %.3f, Acc.: (DEV)%.3f (TST)%.3f, lr : %.5f' % (step,loss_v,acc,acc_tst,sess.run(learning_rate)))
            saver.save(sess, saver_folder+'/model'+str(step)+'.ckpt')
        if loss_v<0.3:
            break


  return array(a, dtype, copy=False, order=order)


Step 40: loss 0.988, Acc.: (DEV)0.466 (TST)0.420, lr : 0.01000
Step 50: loss 0.983, Acc.: (DEV)0.519 (TST)0.467, lr : 0.01000
Step 60: loss 0.983, Acc.: (DEV)0.568 (TST)0.515, lr : 0.01000
Step 70: loss 0.980, Acc.: (DEV)0.597 (TST)0.544, lr : 0.01000
Step 80: loss 0.983, Acc.: (DEV)0.607 (TST)0.554, lr : 0.01000
Step 90: loss 0.974, Acc.: (DEV)0.623 (TST)0.564, lr : 0.01000
Step 100: loss 0.985, Acc.: (DEV)0.635 (TST)0.572, lr : 0.01000
Step 110: loss 0.974, Acc.: (DEV)0.639 (TST)0.583, lr : 0.01000
Step 120: loss 0.975, Acc.: (DEV)0.648 (TST)0.589, lr : 0.01000
Step 130: loss 0.976, Acc.: (DEV)0.650 (TST)0.592, lr : 0.01000
Step 140: loss 0.966, Acc.: (DEV)0.658 (TST)0.600, lr : 0.01000
Step 150: loss 0.971, Acc.: (DEV)0.662 (TST)0.604, lr : 0.01000
Step 160: loss 0.974, Acc.: (DEV)0.666 (TST)0.606, lr : 0.01000
Step 170: loss 0.968, Acc.: (DEV)0.670 (TST)0.611, lr : 0.01000
Step 180: loss 0.969, Acc.: (DEV)0.673 (TST)0.615, lr : 0.01000
Step 190: loss 0.964, Acc.: (DEV)0.680 (TST)0.

In [15]:

saver_folder='snnmodel_ivector'
print max_step
RESTORE_STEP=max_step
saver.restore(sess, saver_folder+'/model'+str(RESTORE_STEP)+'.ckpt')


trn_ivectors_siam = siamese.o1.eval({siamese.x1:train_ivec})
dev_ivectors_siam = siamese.o1.eval({siamese.x1:dev_ivec})
tst_ivectors_siam = siamese.o1.eval({siamese.x1:test_ivec})
lang_mean_siam = siamese.o1.eval({siamese.x1:avg_train_ivec})

tst_scores = lang_mean_siam.dot(tst_ivectors_siam.transpose() )
# print(tst_scores.shape)
hypo_lang = np.argmax(tst_scores,axis = 0)
hypo_lang = hypo_lang[(test_languages=='out_of_set')==False]
temp = ((test_languages_num) - hypo_lang)
acc =1- np.size(np.nonzero(temp)) / float(np.size(test_languages_num))
print 'Final accurary on test dataset : %0.3f' %(acc)

6200
INFO:tensorflow:Restoring parameters from snnmodel_ivector/model6200.ckpt
Final accurary on test dataset : 0.823


In [16]:
confusionmat = np.zeros((50,50))
for i,lang in enumerate(languages):
    hypo_bylang = hypo_lang[ test_languages_num == i]
    hist_bylang = np.histogram(hypo_bylang,50)
    confusionmat[:,i] = hist_bylang[0]

precision = np.diag(confusionmat) / np.sum(confusionmat,axis=1) #precision
recall = np.diag(confusionmat) / np.sum(confusionmat,axis=0) # recall
    
print 'Confusion matrix'
print confusionmat
print 'Precision'
print precision
print 'Recall'
print recall

print '\n\n<Performance evaluation on Test dataset>'
print 'Accurary  : %0.3f' %(acc)
print 'Precision : %0.3f' %(np.mean(precision))
print 'Recall    : %0.3f' %(np.mean(recall))

Confusion matrix
[[ 77.   1.   1. ...,   5.   3.   1.]
 [  0.  81.   0. ...,   0.   1.   0.]
 [  4.   0.  97. ...,   2.   0.   0.]
 ..., 
 [  0.   0.   0. ...,   0.   3.   0.]
 [  0.   0.   0. ...,  59.   0.   0.]
 [  1.   3.   1. ...,   1.  70.  89.]]
Precision
[ 0.23692308  0.87096774  0.49489796  0.76635514  0.07407407  0.25        0.
  0.42574257  0.71311475  0.          0.          0.08333333  0.
  0.06666667  0.          0.          0.68        0.          0.69642857
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.01020408  0.25490196  0.          0.86138614
  0.          0.          0.          0.          0.          0.          0.
  0.          0.79439252  0.          0.          0.          0.
  0.46067416  0.          0.          0.          0.2124105 ]
Recall
[ 0.77  0.81  0.97  0.82  0.02  0.01  0.    0.86  0.87  0.    0.    0.02
  0.    0.01  0.    0.    0.85  0.    0.78  0.    0.    0.    0.    0.    0.
  0.    0. 