In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
import pickle
import librosa

%matplotlib inline

In [2]:
# Load data
with open('hw4_trs.pkl', 'rb') as f:
    train_data_input = pickle.load(f)

with open('hw4_tes.pkl', 'rb') as f:
    test_data_input = pickle.load(f)


In [3]:
print(train_data_input.shape)
print(test_data_input.shape)

(500, 16180)
(200, 22631)


In [4]:
# turncate test_data
test_data_input = test_data_input[:,0:16180]
print(test_data_input.shape)

(200, 16180)


In [5]:
def generate_pair(data_size):
    # get positive pair
    positive_pair = np.zeros((0, 2), dtype=int)
    for i in range(0, data_size, 10):
        for j in range(i, i+10):
            for k in range(j+1, i+10):
                positive_pair = np.vstack((positive_pair, np.array([j, k], dtype=int)))
    
    # get negative pair
    negative_pair = np.zeros((0, 2), dtype=int)
    for i in range(0, data_size, 10):
        current = list(range(i, i+10))
        others = [ x for x in list(range(data_size)) if x not in current ]
        # generate 45 negative sample per speaker
        for j in range(45):
            p1 = np.random.choice(current)
            p2 = np.random.choice(others)        
            negative_pair = np.vstack((negative_pair, np.array([p1, p2], dtype=int)))
    return np.concatenate((negative_pair, positive_pair))

In [6]:
train_pair = generate_pair(500)
test_pair = generate_pair(200)
print(train_pair.shape, test_pair.shape)

(4500, 2) (1800, 2)


In [7]:
# print(test_pair[890:920])

In [8]:
def generate_label(n):
    # build data label
    negative_label = np.zeros(45*n)
    positive_label = np.ones(45*n)
    return np.concatenate((negative_label, positive_label))    

In [9]:
train_label = generate_label(50)
test_label = generate_label(20)
print(train_label.shape, test_label.shape)
# print(test_label[890:920])

(4500,) (1800,)


In [24]:
# Training Parameters
learning_rate = 0.0001
batch_size = 128

# Hyper Parameter
num_input =  16180 # 
num_hidden = 128  # hidden layer num of features
num_feature = 256

drop_rate = 0.2
is_training = True


In [25]:
# create train data pair
valid_input_x1 = np.array([train_data_input[x[0]] for x in train_pair]).reshape(-1, num_input)
valid_input_x2 = np.array([train_data_input[x[1]] for x in train_pair]).reshape(-1, num_input)
valid_input_y = train_label

print(valid_input_x1.shape, valid_input_y.shape)

(4500, 16180) (4500,)


In [26]:
# create test data pair
test_input_x1 = np.array([test_data_input[x[0]] for x in test_pair]).reshape(-1, num_input)
test_input_x2 = np.array([test_data_input[x[1]] for x in test_pair]).reshape(-1, num_input)
test_input_y = test_label

print(test_input_x1.shape, test_input_y.shape)

(1800, 16180) (1800,)


In [27]:
# print(test_input_x1[1500])
# print(test_input_x2[1500])
# print(test_input_y[1500])

In [28]:
tf.reset_default_graph()

# Create batch data for training
train_data = tf.data.Dataset.from_tensor_slices(tf.constant(train_pair))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(train_label))
batch_data = tf.data.Dataset.zip((train_data, label_data)).shuffle(10**7, reshuffle_each_iteration=True)
batch_data = batch_data.repeat().batch(batch_size)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

In [29]:
with tf.variable_scope("nn", reuse=tf.AUTO_REUSE):
    X1 = tf.placeholder("float", [None, num_input])
    X2 = tf.placeholder("float", [None, num_input])
    Y = tf.placeholder("float", [None])

In [30]:
with tf.variable_scope("nn", reuse=tf.AUTO_REUSE):
    initializer = tf.contrib.layers.variance_scaling_initializer()
    
    
    d1 = tf.layers.dense(X1, units=num_hidden, name="d1", activation=tf.nn.relu, kernel_initializer=initializer)
    dropout1 = tf.layers.dropout(d1, rate=drop_rate, training=is_training, name="dropout1")
    ln1 = tf.contrib.layers.layer_norm(dropout1, scope="ln1")    
    d2 = tf.layers.dense(ln1, units=num_hidden/2, name="d2", activation=tf.nn.relu, kernel_initializer=initializer)
    dropout2 = tf.layers.dropout(d2, rate=drop_rate, training=is_training, name="dropout2")
    ln2 = tf.contrib.layers.layer_norm(dropout2, scope="ln2")
    d3 = tf.layers.dense(ln2, units=num_hidden/4, name="d3", activation=tf.nn.relu, kernel_initializer=initializer)
    dropout3 = tf.layers.dropout(d3, rate=drop_rate, training=is_training, name="dropout3")
    ln3 = tf.contrib.layers.layer_norm(dropout3, scope="ln3")    
    d4 = tf.layers.dense(ln3, units=num_hidden/2, name="d4", activation=tf.nn.relu, kernel_initializer=initializer)    
    dropout4 = tf.layers.dropout(d4, rate=drop_rate, training=is_training, name="dropout4")
    ln4 = tf.contrib.layers.layer_norm(dropout4, scope="ln4")
    Y1 = tf.layers.dense(ln4, units=num_feature, name="d5", kernel_initializer=initializer)

    d1 = tf.layers.dense(X2, units=num_hidden, reuse=True, name="d1", activation=tf.nn.relu, kernel_initializer=initializer)
    dropout1 = tf.layers.dropout(d1, rate=drop_rate, training=is_training, name="dropout1")
    ln1 = tf.contrib.layers.layer_norm(dropout1, reuse=True, scope="ln1")    
    d2 = tf.layers.dense(ln1, units=num_hidden/2, reuse=True, name="d2", activation=tf.nn.relu, kernel_initializer=initializer)
    dropout2 = tf.layers.dropout(d2, rate=drop_rate, training=is_training, name="dropout2")
    ln2 = tf.contrib.layers.layer_norm(dropout2, reuse=True, scope="ln2")
    d3 = tf.layers.dense(ln2, units=num_hidden/4, reuse=True, name="d3", activation=tf.nn.relu, kernel_initializer=initializer)
    dropout3 = tf.layers.dropout(d3, rate=drop_rate, training=is_training, name="dropout3")
    ln3 = tf.contrib.layers.layer_norm(dropout3, reuse=True, scope="ln3")
    d4 = tf.layers.dense(ln3, units=num_hidden/2, reuse=True, name="d4", activation=tf.nn.relu, kernel_initializer=initializer)    
    dropout4 = tf.layers.dropout(d4, rate=drop_rate, training=is_training, name="dropout4")
    ln4 = tf.contrib.layers.layer_norm(dropout4, reuse=True, scope="ln4")
    Y2 = tf.layers.dense(ln4, units=num_feature, reuse=True, name="d5", kernel_initializer=initializer)

    print(Y1.shape)
    print(Y2.shape)
    Y1_Y2_product = tf.reduce_sum(tf.multiply( Y1, Y2 ), 1, keepdims=True)
    Y_pred = tf.reshape(tf.sigmoid(Y1_Y2_product), [-1])
    print(Y1_Y2_product.shape, Y_pred.shape, Y.shape)

    loss = tf.reduce_mean( -Y*tf.log(Y_pred + 10e-6) - (1 - Y) * tf.log(1 - Y_pred + 10e-6))

    correct_prediction = tf.equal(tf.round(Y_pred), Y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

(?, 256)
(?, 256)
(?, 1) (?,) (?,)


In [31]:
# Initial session
sess=tf.Session()
tf.global_variables_initializer().run(session=sess)

In [32]:
# Training
training_steps = 10000
for i in range(training_steps+1):
    (batch_x, batch_y) = sess.run(next_batch)
    # print(batch_x, batch_y)
    input_x1 = np.array([train_data_input[x[0]] for x in batch_x]).reshape(-1, num_input)
    input_x2 = np.array([train_data_input[x[1]] for x in batch_x]).reshape(-1, num_input)
    input_y = batch_y
    _, loss_value, acc_value = sess.run((train_op, loss, accuracy), feed_dict={X1: input_x1, X2: input_x2, Y: input_y})
    if i % 1000 == 0:
        is_training = False
        print('Training Step:' + str(i) + ' Train Loss = ' + str(loss_value) + 
              ' Train Accuracy = ' + str(acc_value) +
              ' Validation Accuracy = ' + str(sess.run(accuracy, feed_dict={X1: valid_input_x1, X2: valid_input_x2, Y: valid_input_y})) +
              ' Test Accuracy = ' + str(sess.run(accuracy, feed_dict={X1: test_input_x1, X2: test_input_x2, Y: test_input_y})))
        is_training = True

Training Step:0 Train Loss = 5.349769 Train Accuracy = 0.5 Validation Accuracy = 0.50622225 Test Accuracy = 0.48833334
Training Step:1000 Train Loss = 2.0514245 Train Accuracy = 0.8046875 Validation Accuracy = 0.7737778 Test Accuracy = 0.51222223
Training Step:2000 Train Loss = 2.242106 Train Accuracy = 0.796875 Validation Accuracy = 0.8391111 Test Accuracy = 0.515
Training Step:3000 Train Loss = 1.3204312 Train Accuracy = 0.875 Validation Accuracy = 0.8704444 Test Accuracy = 0.49666667
Training Step:4000 Train Loss = 0.93220377 Train Accuracy = 0.9140625 Validation Accuracy = 0.8893333 Test Accuracy = 0.5088889
Training Step:5000 Train Loss = 0.7556619 Train Accuracy = 0.9296875 Validation Accuracy = 0.9082222 Test Accuracy = 0.49444443
Training Step:6000 Train Loss = 0.556067 Train Accuracy = 0.9453125 Validation Accuracy = 0.9251111 Test Accuracy = 0.49611112
Training Step:7000 Train Loss = 0.3560372 Train Accuracy = 0.9609375 Validation Accuracy = 0.92866665 Test Accuracy = 0.49944