### Imports

In [0]:
%tensorflow_version 1.x
import random
from itertools import combinations, product

import pickle
import librosa
import numpy as np

import tensorflow as tf

### Load Training and Testing Data

In [3]:
from google.colab import files

uploaded = files.upload()

Saving hw4_tes.pkl to hw4_tes.pkl
Saving hw4_trs.pkl to hw4_trs.pkl


In [4]:
with open('hw4_trs.pkl', 'rb') as pickle_file:
    train_data = pickle.load(pickle_file)
print(train_data.shape)

(500, 16180)


In [5]:
with open('hw4_tes.pkl', 'rb') as pickle_file:
    test_data = pickle.load(pickle_file)
print(test_data.shape)

(200, 22631)


In [0]:
train_data = np.stack([np.abs(librosa.stft(x, n_fft=1024, hop_length=512).T) for x in train_data])

In [0]:
test_data = np.stack([np.abs(librosa.stft(x, n_fft=1024, hop_length=512).T) for x in test_data])

### Create Positive Pairs
Generates all 45 combinations of pairs

In [0]:
def create_pos_pairs(spk_indices, L=45):
    pos_pairs = list(combinations(spk_indices, 2))
    return pos_pairs

### Create Negative Pairs
A = indices of positive speaker's utterances  
B = Indices of other speakers' utterances  
Take the cartesian product of A and B and sample 45 pairs from it

In [0]:
def create_neg_pairs(spk_indices, other_indices, L=45):
    neg_pairs = list(product(spk_indices, other_indices))
    l_pairs = random.sample(neg_pairs, L)
    return l_pairs

### Create Training and Testing Batches
Creates 50 batches for Training and 20 Batches for Testing. Each batch has 45 pairs of negative examples and 45 pairs of positive examples, i.e, 90 total pairs in each batch.

Final Dimensions of Training Data - Batches(50) x Pairs (90) x Transposed Shape of Spectrogram (32x513)

Final Dimensions of Testing Data - Batches(20) x Pairs (90) x Transposed Shape of Spectrogram (45x513)

In [0]:
def generate_batches(data):
    total_utterances = data.shape[0]
    all_indices = list(range(total_utterances))
    
    left_input = []
    right_input = []
    output = []
    
    for i in range(0, total_utterances, 10):
        speaker_indices = list(range(i, i+10))
        pos_pairs = create_pos_pairs(speaker_indices)
        other_indices = np.delete(all_indices, speaker_indices)
        neg_pairs = create_neg_pairs(speaker_indices, other_indices)
        
        l_batch = []
        r_batch = []
        o_batch = []

        for x, y in pos_pairs:
            l_batch.append(data[x])
            r_batch.append(data[y])
            o_batch.append(1)

        for x, y in neg_pairs:
            l_batch.append(data[x])
            r_batch.append(data[y])
            o_batch.append(0)
        
        left_input.append(l_batch)
        right_input.append(r_batch)
        output.append(o_batch)
    
    return np.stack(left_input), np.stack(right_input), np.stack(output)

In [0]:
left_train, right_train, y_train = generate_batches(train_data)
left_test, right_test, y_test = generate_batches(test_data)

In [12]:
print(left_train.shape, left_train.dtype)
print(right_train.shape, right_train.dtype)
print(y_train.shape,y_train.dtype)

(50, 90, 32, 513) float32
(50, 90, 32, 513) float32
(50, 90) int64


In [13]:
print(left_test.shape, left_test.dtype)
print(right_test.shape, right_test.dtype)
print(y_test.shape,y_test.dtype)

(20, 90, 45, 513) float32
(20, 90, 45, 513) float32
(20, 90) int64


In [14]:
 y_train = y_train.astype(np.float32)
 print(y_train.dtype)

float32


In [15]:
 y_test = y_test.astype(np.float32)
 print(y_test.dtype)

float32


### Create Placeholders

In [0]:
def create_placeholders():
  left_x = tf.placeholder(tf.float32,shape = [None,None,513])
  right_x = tf.placeholder(tf.float32,shape = [None,None,513])
  y = tf.placeholder(tf.float32,shape = [None])
  rows = tf.placeholder(tf.int32)

  return left_x, right_x, y, rows

### Defines the Base of the Siamese Model
Uses an LSTM Layer followed by a Dense Layer 

In [0]:
def siamese_model(inputs, reuse, rows, num_units = [513]):
  cells = [tf.contrib.rnn.BasicLSTMCell(num_units=n, reuse = reuse) for n in num_units]
  stacked_lstm = tf.nn.rnn_cell.MultiRNNCell(cells)
  rnn_op, state = tf.nn.dynamic_rnn(stacked_lstm, inputs, dtype = tf.float32)
  dense_1 = tf.layers.dense(rnn_op, 513, activation=tf.nn.tanh, reuse = reuse)
  output = tf.reshape(dense_1,shape = [-1, rows*513])
  return output

### Define Complete Model
Total Number of Epochs =  100
Learning rate =  0.0005
Optimizer = Adam  
Loss Funtion = Sigmoid Cross Entropy  
Cost is the mean of all the losses for each prediction  

In [0]:
def model(train_data, test_data, learning_rate = 0.0005, num_epochs = 100):
  tf.reset_default_graph()

  x1, x2, y, rows  = create_placeholders()
  left_train, right_train, y_train = train_data
  left_test, right_test, y_test = test_data

  left_op = siamese_model(x1,False,rows)
  right_op = siamese_model(x2,True,rows)
  dot_prod = tf.reduce_sum(tf.multiply(left_op,right_op),axis = 1)
  yPred = tf.nn.sigmoid(dot_prod)

  binary_op = tf.cast(tf.math.greater(yPred,0.5), tf.int16)
  
  cost = tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels = y, logits = dot_prod))
  
  optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
  init = tf.global_variables_initializer()

  with tf.Session() as sess:
    sess.run(init)

    for epoch in range(num_epochs):
        epoch_loss = 0
        i = 0
        for left, right, y_batch in zip(left_train,right_train,y_train):
          row = left.shape[1]

          _, batch_loss = sess.run([optimizer, cost], feed_dict ={x1: left, x2: right, y:y_batch, rows:row })  
          epoch_loss += batch_loss
          i += 1
        
        test_accuracy = 0.0
        j = 0
        for left,right,y_batch in zip(left_test,right_test,y_test):
            row = left.shape[1]
            y_pred = sess.run(binary_op, feed_dict ={x1: left, x2: right, y:y_batch, rows: row})
            test_accuracy += sum(y_pred == y_batch)
            j+=1

        print(epoch,"Cost:", epoch_loss/i, " Test Accuracy: ", test_accuracy/j)

    test_accuracy = 0.0
    j = 0
    for left,right,y_batch in zip(left_test,right_test,y_test):
      row = left.shape[1]
      y_pred = sess.run(binary_op, feed_dict ={x1: left, x2: right, y:y_batch, rows: row})
      test_accuracy += sum(y_pred == y_batch)
      j+=1
    
    print("Final Test Accuracy: ", test_accuracy/j)
    return test_accuracy/j

### Training
Run the model

In [19]:
tr_data = [left_train, right_train, y_train.astype(float)]
te_data = [left_test, right_test, y_test]
acc = model(tr_data, te_data)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating

### Final Test Accuracy

In [20]:
print("Test Accuracy = ", acc)

Test Accuracy =  62.55


### Things I tried
Models - 
1. Stacking Convolution Layers and using Euclidean Distance between embeddings
2. Stacking GRUs
3. ConvLSTM2D
4. BinaryCrossEntropy Loss and Contrasive Loss

Used to get stuck at Test accuracy at 50% using the above models which were built using keras. I think the model wasn't reusing the weights so I built a new Siamese model just LSTM without keras

## References
1. https://keras.io/examples/mnist_siamese/
2. https://medium.com/predict/face-recognition-from-scratch-using-siamese-networks-and-tensorflow-df03e32f8cd0
3. https://becominghuman.ai/siamese-networks-algorithm-applications-and-pytorch-implementation-4ffa3304c18
4. https://towardsdatascience.com/one-shot-learning-with-siamese-networks-using-keras-17f34e75bb3d