In [None]:
import random
from itertools import combinations, product

import pickle
import librosa
import numpy as np

import tensorflow as tf

In [None]:
with open('hw4_trs.pkl', 'rb') as pickle_file:
    train_data = pickle.load(pickle_file)
print(train_data.shape)

In [None]:
with open('hw4_tes.pkl', 'rb') as pickle_file:
    test_data = pickle.load(pickle_file)
print(test_data.shape)

In [None]:
train_data = np.stack([np.abs(librosa.stft(x, n_fft=1024, hop_length=512).T) for x in train_data])

In [None]:
test_data = np.stack([np.abs(librosa.stft(x, n_fft=1024, hop_length=512).T) for x in test_data])

In [None]:
def create_pos_pairs(spk_indices, L=45):
    pos_pairs = list(combinations(spk_indices, 2))
    return pos_pairs

In [None]:
def create_neg_pairs(spk_indices, other_indices, L=45):
    neg_pairs = list(product(spk_indices, other_indices))
    l_pairs = random.sample(neg_pairs, L)
    return l_pairs

In [None]:
def generate_batches(data):
    total_utterances = data.shape[0]
    all_indices = list(range(total_utterances))
    
    left_input = []
    right_input = []
    output = []
    
    for i in range(0, total_utterances, 10):
        speaker_indices = list(range(i, i+10))
        pos_pairs = create_pos_pairs(speaker_indices)
        other_indices = np.delete(all_indices, speaker_indices)
        neg_pairs = create_neg_pairs(speaker_indices, other_indices)
        
        l_batch = []
        r_batch = []
        o_batch = []

        for x, y in pos_pairs:
            l_batch.append(data[x])
            r_batch.append(data[y])
            o_batch.append(1)

        for x, y in neg_pairs:
            l_batch.append(data[x])
            r_batch.append(data[y])
            o_batch.append(0)
        
        left_input.append(l_batch)
        right_input.append(r_batch)
        output.append(o_batch)
    
    return np.stack(left_input), np.stack(right_input), np.stack(output)

In [None]:
left_train, right_train, y_train = generate_batches(train_data)
left_test, right_test, y_test = generate_batches(test_data)

In [None]:
print(left_train.shape, left_train.dtype)
print(right_train.shape, right_train.dtype)
print(y_train.shape,y_train.dtype)

In [None]:
print(left_test.shape, left_test.dtype)
print(right_test.shape, right_test.dtype)
print(y_test.shape,y_test.dtype)

In [None]:
 y_train = y_train.astype(np.float32)
 print(y_train.dtype)

In [None]:
 y_test = y_test.astype(np.float32)
 print(y_test.dtype)

In [None]:
def create_placeholders():
  left_x = tf.placeholder(tf.float32,shape = (None,None,513))
  right_x = tf.placeholder(tf.float32,shape = (None,None,513))
  y = tf.placeholder(tf.float32,shape = (None))
  rows = tf.placeholder(tf.int32)

  return left_x, right_x, y, rows

In [None]:
def siamese_model(inputs, reuse, rows, num_units = [513]):
  cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n, reuse = reuse) for n in num_units]
  stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(cells)
  rnn_outputs, state = tf.nn.dynamic_rnn(stacked_lstm, inputs, dtype = tf.float32)
  hidden1 = tf.layers.dense(rnn_outputs, 513, activation=tf.nn.tanh, reuse = reuse)
  drop_out = tf.nn.dropout(hidden1, keep_prob = 0.9)
  outputs = tf.layers.dense(drop_out, 513, activation=tf.nn.relu, reuse = reuse)
  outputs = tf.reshape(hidden1,shape = [-1, rows*513])
  return outputs

In [None]:
def model(train_data, test_data, learning_rate = 0.0005, num_epochs = 20):
  tf.reset_default_graph()

  x1, x2, y, rows  = create_placeholders()
  left_train, right_train, y_train = train_data
  left_test, right_test, y_test = test_data

  left_op = siamese_model(x1,False,rows)
  right_op = siamese_model(x2,True,rows)
  dotProduct = tf.reduce_sum(tf.multiply(left_op,right_op),axis = 1)
  yPred = tf.nn.sigmoid(dotProduct)

  binarisedOutput = tf.cast(tf.math.greater(yPred,0.5), tf.int16)
  
  cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = dotProduct))
  
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
  init = tf.global_variables_initializer()

  with tf.Session() as sess:
    sess.run(init)

    for epoch in range(num_epochs):
        error = 0
        i = 0
        for x1_batch,x2_batch,y_batch in zip(left_train,right_train,y_train):
          row = x1_batch.shape[1]
          sess.run(optimizer,feed_dict = {x1 : x1_batch, x2: x2_batch, y:y_batch, rows:row })

          error += sess.run(cost, feed_dict ={x1: x1_batch, x2: x2_batch, y:y_batch, rows:row })  
          i += 1
        
        test_accuracy = 0.0
        j = 0
        for x1_batch,x2_batch,y_batch in zip(left_test,right_test,y_test):
            row = x1_batch.shape[1]
            y_pred = sess.run(binarisedOutput, feed_dict ={x1: x1_batch, x2: x2_batch, y:y_batch, rows: row})
            test_accuracy += sum(y_pred == y_batch)
            j+=1

        print(epoch,"Cost:", error/i, " Test Accuracy: ", test_accuracy/j)

    test_accuracy = 0.0
    j = 0
    for x1_batch,x2_batch,y_batch in zip(left_test,right_test,y_test):
      row = x1_batch.shape[1]
      y_pred = sess.run(binarisedOutput, feed_dict ={x1: x1_batch, x2: x2_batch, y:y_batch, rows: row})
      test_accuracy += sum(y_pred == y_batch)
      j+=1
    
    print("Final Test Accuracy: ", test_accuracy/j)
    return test_accuracy/j

In [None]:
tr_data = [left_train, right_train, y_train.astype(float)]
te_data = [left_test, right_test, y_test]
acc = model(tr_data, te_data)

In [None]:
print("Test Accuracy = ", acc)