In [200]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
import pickle
import librosa

%matplotlib inline

In [201]:
# Load data
with open('hw4_trs.pkl', 'rb') as f:
    train_data_input = pickle.load(f)

with open('hw4_tes.pkl', 'rb') as f:
    test_data_input = pickle.load(f)


In [202]:
print(train_data_input.shape)
print(test_data_input.shape)

(500, 16180)
(200, 22631)


In [203]:
# get positive pair
positive_pair = np.zeros((0, 2), dtype=int)
for i in range(0, 500, 10):
    for j in range(i, i+10):
        for k in range(j+1, i+10):
            positive_pair = np.vstack((positive_pair, np.array([j, k], dtype=int)))

# print(positive_pair[0])
# print(positive_pair[1])

In [204]:
print(positive_pair.shape)

(2250, 2)


In [205]:
# get negative pair
negative_pair = np.zeros((0, 2), dtype=int)

for i in range(0, 500, 10):
    current = list(range(i, i+10))
    others = [ x for x in list(range(500)) if x not in current ]
    # generate 45 negative sample per speaker
    for j in range(45):
        p1 = np.random.choice(current)
        p2 = np.random.choice(others)        
        negative_pair = np.vstack((negative_pair, np.array([p1, p2], dtype=int)))



In [206]:
print(negative_pair.shape)

(2250, 2)


In [207]:
# build data
positive_label = np.ones(45*50)
negative_label = np.zeros(45*50)

In [304]:
# Training Parameters
learning_rate = 0.001
batch_size = 32

# Hyper Parameter
time_steps = 10   #
num_input =  1618 # 
num_hidden = 128  # hidden layer num of features
num_feature = 128
keep_prob = 0.8


In [330]:
tf.reset_default_graph()

# Create batch data for training
train_data = tf.data.Dataset.from_tensor_slices(tf.constant(np.concatenate((negative_pair, positive_pair))))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(np.concatenate((negative_label, positive_label))))
batch_data = tf.data.Dataset.zip((train_data, label_data)).shuffle(10**7, reshuffle_each_iteration=True)
batch_data = batch_data.repeat().batch(batch_size)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

In [331]:
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    X1 = tf.placeholder("float", [None, time_steps, num_input])
    X2 = tf.placeholder("float", [None, time_steps, num_input])
    Y = tf.placeholder("float", [None, 1])
    weight = tf.Variable(tf.random_normal([time_steps*num_hidden, num_feature]))
    bias = tf.Variable(tf.random_normal([num_feature]))

In [332]:
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    # Define a lstm cell with tensorflow
    cell1 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell2 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell3 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2, cell3])            


In [340]:
# Calculate loss, train_op
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    outputs1, _ = tf.nn.dynamic_rnn(cell, X1, dtype=tf.float32)  
    Y1 = tf.matmul(tf.reshape(outputs1, (-1, time_steps*num_hidden)), weight) + bias
    
    outputs2, _ = tf.nn.dynamic_rnn(cell, X2, dtype=tf.float32)  
    Y2 = tf.matmul(tf.reshape(outputs2, (-1, time_steps*num_hidden)), weight) + bias    
    print(Y1.shape)
    print(Y2.shape)
    Y_pred = tf.sigmoid(tf.reduce_sum( tf.multiply( Y1, Y2 ), 1, keepdims=True))
    print(Y_pred.shape, Y.shape)
    
    loss = tf.reduce_mean( -Y*tf.log(Y_pred + 10e-6) - (1 - Y) * tf.log(1 - Y_pred + 10e-6))
        
    correct_prediction = tf.equal(Y_pred, Y)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

(?, 128)
(?, 128)
(?, 1) (?, 1)


In [341]:
# Initial session
sess=tf.Session()
tf.global_variables_initializer().run(session=sess)


In [344]:
# Training
training_steps = 3000
for i in range(training_steps+1):
    (batch_x, batch_y) = sess.run(next_batch)
    #     print(batch_x, batch_y)
    # get data 
    input_x1 = np.array([train_data_input[x[0]] for x in batch_x]).reshape(-1, time_steps, num_input)
    input_x2 = np.array([train_data_input[x[1]] for x in batch_x]).reshape(-1, time_steps, num_input)
    input_y = batch_y.reshape(-1, 1)    
#     print(sess.run((loss), feed_dict={X1: input_x1, X2: input_x2, Y: input_y}))
    _, loss_value, acc_value = sess.run((train_op, loss, accuracy), feed_dict={X1: input_x1, X2: input_x2, Y: input_y})
    if i % 200 == 0:
        print('Training Step:' + str(i) + ' Train Loss = ' + str(loss_value) + ' Accuracy = ' + str(acc_value))
#         print('Training Step:' + str(i) + '  Validation Loss =  ' + 
#               str(sess.run(loss, feed_dict={X: v_mixture_abs_T_fit.reshape(-1, time_steps, num_input), Y: v_M_fit})) + 
#               '  Train Loss =  ' + str(loss_value))

Training Step:0 Train Loss = 5.756458 Accuracy = 0.5
Training Step:200 Train Loss = 5.756458 Accuracy = 0.5
Training Step:400 Train Loss = 8.634691 Accuracy = 0.25
Training Step:600 Train Loss = 2.8782241 Accuracy = 0.75
Training Step:800 Train Loss = 8.634691 Accuracy = 0.25
Training Step:1000 Train Loss = 5.756458 Accuracy = 0.5
Training Step:1200 Train Loss = 2.878224 Accuracy = 0.75
Training Step:1400 Train Loss = 2.878224 Accuracy = 0.75
Training Step:1600 Train Loss = 2.8782241 Accuracy = 0.75
Training Step:1800 Train Loss = 8.634691 Accuracy = 0.25
Training Step:2000 Train Loss = 8.634691 Accuracy = 0.25
Training Step:2200 Train Loss = 8.634691 Accuracy = 0.25
Training Step:2400 Train Loss = 8.634691 Accuracy = 0.25
Training Step:2600 Train Loss = 8.634691 Accuracy = 0.25
Training Step:2800 Train Loss = 5.756458 Accuracy = 0.5
Training Step:3000 Train Loss = 2.8782241 Accuracy = 0.75
