In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
import pickle
import librosa

%matplotlib inline

In [2]:
# Load data
with open('hw4_trs.pkl', 'rb') as f:
    train_data_input = pickle.load(f)

with open('hw4_tes.pkl', 'rb') as f:
    test_data_input = pickle.load(f)


In [3]:
print(train_data_input.shape)
print(test_data_input.shape)

(500, 16180)
(200, 22631)


In [4]:
# turncate test_data
test_data_input = test_data_input[:,:16180]
print(test_data_input.shape)

(200, 16180)


In [5]:
def generate_pair(data_size):
    # get positive pair
    positive_pair = np.zeros((0, 2), dtype=int)
    for i in range(0, data_size, 10):
        for j in range(i, i+10):
            for k in range(j+1, i+10):
                positive_pair = np.vstack((positive_pair, np.array([j, k], dtype=int)))
    
    # get negative pair
    negative_pair = np.zeros((0, 2), dtype=int)
    for i in range(0, data_size, 10):
        current = list(range(i, i+10))
        others = [ x for x in list(range(data_size)) if x not in current ]
        # generate 45 negative sample per speaker
        for j in range(45):
            p1 = np.random.choice(current)
            p2 = np.random.choice(others)        
            negative_pair = np.vstack((negative_pair, np.array([p1, p2], dtype=int)))
    return np.concatenate((negative_pair, positive_pair))

In [6]:
train_pair = generate_pair(500)
test_pair = generate_pair(200)
print(train_pair.shape, test_pair.shape)

(4500, 2) (1800, 2)


In [7]:
def generate_label(n):
    # build data label
    positive_label = np.ones(45*n)
    negative_label = np.zeros(45*n)
    return np.concatenate((negative_label, positive_label))    

In [8]:
train_label = generate_label(50)
test_label = generate_label(20)
print(train_label.shape, test_label.shape)

(4500,) (1800,)


In [9]:
# Training Parameters
learning_rate = 0.001
batch_size = 64

# Hyper Parameter
time_steps = 10   #
num_input =  1618 # 
num_hidden = 128  # hidden layer num of features
num_feature = 128
keep_prob = 0.8


In [10]:
# create test data pair
test_input_x1 = np.array([test_data_input[x[0]] for x in test_pair]).reshape(-1, time_steps, num_input)
test_input_x2 = np.array([test_data_input[x[1]] for x in test_pair]).reshape(-1, time_steps, num_input)
test_input_y = test_label.reshape(-1, 1)    

print(test_input_x1.shape, test_input_y.shape)

(1800, 10, 1618) (1800, 1)


In [11]:
tf.reset_default_graph()

# Create batch data for training
train_data = tf.data.Dataset.from_tensor_slices(tf.constant(train_pair))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(train_label))
batch_data = tf.data.Dataset.zip((train_data, label_data)).shuffle(10**7, reshuffle_each_iteration=True)
batch_data = batch_data.repeat().batch(batch_size)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

In [12]:
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    X1 = tf.placeholder("float", [None, time_steps, num_input])
    X2 = tf.placeholder("float", [None, time_steps, num_input])
    Y = tf.placeholder("float", [None, 1])
    weight = tf.Variable(tf.random_normal([time_steps*num_hidden, num_feature]))
    bias = tf.Variable(tf.random_normal([num_feature]))

In [13]:
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    # Define a lstm cell with tensorflow
    cell1 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell2 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell3 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2, cell3])            


In [14]:
# Calculate loss, train_op
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    outputs1, _ = tf.nn.dynamic_rnn(cell, X1, dtype=tf.float32)  
    Y1 = tf.matmul(tf.reshape(outputs1, (-1, time_steps*num_hidden)), weight) + bias
    
    outputs2, _ = tf.nn.dynamic_rnn(cell, X2, dtype=tf.float32)  
    Y2 = tf.matmul(tf.reshape(outputs2, (-1, time_steps*num_hidden)), weight) + bias    
    print(Y1.shape)
    print(Y2.shape)
    Y_pred = tf.sigmoid(tf.reduce_sum( tf.multiply( Y1, Y2 ), 1, keepdims=True))
    print(Y_pred.shape, Y.shape)
    
    loss = tf.reduce_mean( -Y*tf.log(Y_pred + 10e-6) - (1 - Y) * tf.log(1 - Y_pred + 10e-6))
        
    correct_prediction = tf.equal(Y_pred, Y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

(?, 128)
(?, 128)
(?, 1) (?, 1)


In [15]:
# Initial session
sess=tf.Session()
tf.global_variables_initializer().run(session=sess)

In [18]:
# Training
training_steps = 1000
for i in range(training_steps+1):
    (batch_x, batch_y) = sess.run(next_batch)
    #     print(batch_x, batch_y)
    # get data 
    input_x1 = np.array([train_data_input[x[0]] for x in batch_x]).reshape(-1, time_steps, num_input)
    input_x2 = np.array([train_data_input[x[1]] for x in batch_x]).reshape(-1, time_steps, num_input)
    input_y = batch_y.reshape(-1, 1)    
    _, loss_value, acc_value = sess.run((train_op, loss, accuracy), feed_dict={X1: input_x1, X2: input_x2, Y: input_y})
    if i % 200 == 0:
        print('Training Step:' + str(i) + ' Train Loss = ' + str(loss_value) + 
              ' Train Accuracy = ' + str(acc_value) + 
              ' Test Accuracy = ' + str(sess.run(accuracy, feed_dict={X1: test_input_x1, X2: test_input_x2, Y: test_input_y})))
        
 
        

Training Step:0 Train Loss = 5.036899 Train Accuracy = 0.5625 Test Accuracy = 0.5
Training Step:200 Train Loss = 5.396678 Train Accuracy = 0.53125 Test Accuracy = 0.5
Training Step:400 Train Loss = 5.036899 Train Accuracy = 0.5625 Test Accuracy = 0.5
Training Step:600 Train Loss = 6.1162367 Train Accuracy = 0.46875 Test Accuracy = 0.5
Training Step:800 Train Loss = 5.0368986 Train Accuracy = 0.5625 Test Accuracy = 0.5
Training Step:1000 Train Loss = 5.5765676 Train Accuracy = 0.515625 Test Accuracy = 0.5
