# Deep Learning Assignment-4-1

## Mounting the drive

In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


##Importing libraries

In [2]:
import os
import librosa
import numpy as np
import pickle
import tensorflow as tf
import random
import matplotlib.pyplot as plt

In [3]:
%pwd
%cd gdrive/My Drive/Colab Notebooks/dl_assignment_4
%pwd

/content/gdrive/My Drive/Colab Notebooks/dl_assignment_4


'/content/gdrive/My Drive/Colab Notebooks/dl_assignment_4'

In [4]:
%ls -ltr

total 71619
-rw------- 1 root root  3223971 Nov 27 03:38 hw4_te7.pkl
-rw------- 1 root root 18104961 Nov 27 03:38 hw4_tes.pkl
-rw------- 1 root root 19647203 Nov 27 03:39 hw4_tr7.pkl
-rw------- 1 root root 32360162 Nov 27 03:40 hw4_trs.pkl


## Reading the data from pickle files

In [0]:
with open('hw4_trs.pkl', 'rb') as f:
  train_p_s = pickle.load(f)

In [0]:
with open('hw4_tes.pkl', 'rb') as f:
  test_p_s = pickle.load(f)

## Function to make speech and absolute speech list

In [0]:
def LoadData(pkl):
  s_l = []
  abs_s_l = []
  
  for file in pkl:
    S = librosa.stft(file, n_fft=1024, hop_length=512)
    abs_S = np.abs(S)
    
    s_l.append(np.transpose(S))
    abs_s_l.append(np.transpose(abs_S))
  print(len(s_l),len(abs_s_l))    
  return s_l , abs_s_l

In [8]:
train_s,train_abs=LoadData(train_p_s)
test_s,test_abs=LoadData(test_p_s)

500 500
200 200


In [9]:
train_abs

[array([[5.8302157e-02, 3.0648101e-02, 3.6731516e-03, ..., 8.2176295e-05,
         3.9720531e-03, 5.6832400e-03],
        [1.9710887e-02, 1.3129367e-02, 2.6153225e-02, ..., 1.9623231e-04,
         8.8176347e-04, 1.8317466e-04],
        [5.5986095e-02, 3.4570277e-02, 1.0422288e-02, ..., 8.4975490e-04,
         7.6210400e-04, 4.1595186e-04],
        ...,
        [1.7804930e-02, 1.2065225e-02, 7.2754677e-03, ..., 6.2095646e-02,
         9.1026112e-02, 8.7816514e-02],
        [4.0847674e-02, 2.9208636e-02, 9.5448177e-03, ..., 1.9229003e-03,
         2.8234351e-02, 4.4833506e-03],
        [3.1192869e-02, 7.2313659e-02, 5.7868779e-02, ..., 3.5997327e-02,
         1.7384330e-02, 3.7851665e-02]], dtype=float32),
 array([[0.02651631, 0.01149094, 0.00238631, ..., 0.00408146, 0.00277443,
         0.00204568],
        [0.03155294, 0.017131  , 0.00196619, ..., 0.00112135, 0.00039424,
         0.00010901],
        [0.02640126, 0.01126922, 0.00266385, ..., 0.00064208, 0.00035836,
         0.0001147 ]

In [10]:
train_abs[0].shape

(32, 513)

## Extracting positive pairs

In [0]:
def positive_pairs(L , begin , end):
  pairs = []
  for i in range(begin , end):
    for j in range(i+1 , end):
      pairs.append([i , j])
  
  if L == 45:
    return pairs
  else:    
    L_pairs = random.choices(pairs , k = L)
    return L_pairs

## Extracting negative pairs

In [0]:

def negative_pairs(L , begin , end , iter):
  pairs = []
  spr = list(range(begin , end))
  for i in range(0 , L):    
    s = random.randrange(begin , end)
    non_s = random.randrange(0 , iter)
    
    while non_s in spr:
      non_s = random.randrange(0 , iter)
      
    pairs.append([s , non_s])
  
  return pairs

## Making positive and negative pair as 2L pairs

In [0]:
def make_pairs(x , positive_p , negative_p):
  
  left_p = x[positive_p[0][0]]
  right_p = x[positive_p[0][1]]
  
  x_dim = np.shape(left_p)[0]
  y_dim = np.shape(right_p)[1]
  
  left_p = left_p.reshape((-1 ,  x_dim , y_dim))
  right_p = right_p.reshape((-1 ,  x_dim , y_dim))

  for j in range(1 , len(positive_p)):
    l_p = x[positive_p[j][0]].reshape((-1 , x_dim , y_dim))
    r_p = x[positive_p[j][1]].reshape((-1 , x_dim , y_dim))
    
    left_p = np.vstack((left_p , l_p))
    right_p = np.vstack((right_p , r_p))

  left_n = x[negative_p[0][0]]
  right_n = x[negative_p[0][1]]

  left_n = left_n.reshape((-1 , x_dim , y_dim))
  right_n = right_n.reshape((-1 , x_dim , y_dim))

  for j in range(1 , len(negative_p)):
    l_n = x[negative_p[j][0]].reshape((-1 , x_dim , y_dim))
    r_n = x[negative_p[j][1]].reshape((-1 , x_dim , y_dim))
    
    left_n = np.vstack((left_n , l_n))
    right_n = np.vstack((right_n , r_n))

  all_left = np.vstack((left_p , left_n))
  all_right = np.vstack((right_p , right_n))
  
  return all_left , all_right

## Defining the Siamese Model 

In [0]:
def Siamese_network_Model(x , gru_h_u , time , reuse = False):
  cells = []
  for i in range(len(gru_h_u)):
    gru_c = tf.nn.rnn_cell.GRUCell(num_units= gru_h_u[i] , kernel_initializer = tf.contrib.layers.xavier_initializer(), reuse=reuse , name='GRU_cell_'+str(i))
    cells.append(gru_c)
  multi_gru_c = tf.nn.rnn_cell.MultiRNNCell(cells)

  gru_o , s = tf.nn.dynamic_rnn(multi_gru_c , x , dtype=tf.float32 , swap_memory = True)
  b_gru = tf.contrib.layers.batch_norm(gru_o, is_training=True, updates_collections=None)
  
  output = tf.layers.dense(inputs=b_gru , units=30, activation = tf.nn.tanh ,
                           kernel_initializer = tf.contrib.layers.variance_scaling_initializer(),
                           bias_initializer = tf.zeros_initializer(), 
                           reuse = reuse, name='op')
  
  dropout = tf.nn.dropout(output , keep_prob = 0.9 , name = 'drop')
  
  f_output = tf.layers.dense(inputs=output , units=10, activation = tf.nn.tanh ,
                           kernel_initializer = tf.contrib.layers.variance_scaling_initializer(),
                           bias_initializer = tf.zeros_initializer(), 
                           reuse = reuse, name='op1')
  
  f_Output = tf.reshape(f_output,shape = [-1,time*10])
  
  return f_Output

In [0]:
l_i = tf.placeholder(tf.float32, [None, None , 513])
r_i = tf.placeholder(tf.float32, [None, None , 513])
label = tf.placeholder(tf.float32, [None])
time = tf.placeholder(tf.int32)

In [0]:
gru_hidden_units = [256 , 128]
learning_rate = 0.001
n_epochs = 200
num_utterances = 10
L = 45

In [17]:
left_output = Siamese_network_Model(l_i , gru_hidden_units, time , reuse =  False)
right_output = Siamese_network_Model(r_i , gru_hidden_units , time , reuse = True)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.GRUCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead 

In [0]:
label_pred = tf.reduce_sum(tf.multiply(left_output, right_output),axis=1 , name='dotprod')
label_pred_sig = tf.nn.sigmoid(label_pred)

In [19]:
loss = tf.nn.sigmoid_cross_entropy_with_logits(labels= label , logits= label_pred)
loss_calculate = tf.reduce_sum(loss)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss_calculate)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [0]:
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

In [0]:
loss_list = []
f_label = []

## Training the model

In [22]:
for epoch in range(n_epochs+1):
    for i in range(0 , len(train_s), num_utterances):
      begin = i
      end = i + num_utterances
      #print(L,begin,end)
      positive_pair = positive_pairs(L , begin , end)
      negative_pair = negative_pairs(L , begin , end , len(train_s))

      left , right = make_pairs(train_abs , positive_pair , negative_pair)
      
      if epoch == 0:    
        ones = np.ones(L)
        zeroes = np.zeros(L)

        f_label.extend(ones)
        f_label.extend(zeroes)

      if i == 0:
        all_left = left
        all_right = right
      else:
        all_left = np.vstack((all_left , left))
        all_right = np.vstack((all_right , right))
    
    train_time = all_left.shape[1]
    feed_dict = {l_i: all_left, r_i: all_right, time:train_time, label: f_label}
    train_step.run(feed_dict=feed_dict)

    
    if epoch%5 == 0:
        loss_calc = loss_calculate.eval(feed_dict=feed_dict)
        loss_list.append((loss_calc))
        print("Epoch %d, loss %g"%(epoch, loss_calc))

Epoch 0, loss 141988
Epoch 5, loss 34129.4
Epoch 10, loss 23009.7
Epoch 15, loss 12200.1
Epoch 20, loss 8457.15
Epoch 25, loss 6003.04
Epoch 30, loss 4566.5
Epoch 35, loss 3676.02
Epoch 40, loss 3344.06
Epoch 45, loss 3091.1
Epoch 50, loss 2981.13
Epoch 55, loss 2808.74
Epoch 60, loss 2738.53
Epoch 65, loss 2630.51
Epoch 70, loss 2565.89
Epoch 75, loss 2514.88
Epoch 80, loss 2432.05
Epoch 85, loss 2330.45
Epoch 90, loss 2328.26
Epoch 95, loss 2238.72
Epoch 100, loss 2125.31
Epoch 105, loss 2047.35
Epoch 110, loss 1986.52
Epoch 115, loss 1930.17
Epoch 120, loss 1910.53
Epoch 125, loss 1809.42
Epoch 130, loss 1711.72
Epoch 135, loss 1672.63
Epoch 140, loss 1666.81
Epoch 145, loss 1615.91
Epoch 150, loss 1536.49
Epoch 155, loss 1441.1
Epoch 160, loss 1393.51
Epoch 165, loss 1346.5
Epoch 170, loss 1378.04
Epoch 175, loss 1290.77
Epoch 180, loss 1247.19
Epoch 185, loss 1230.49
Epoch 190, loss 1140.53
Epoch 195, loss 1090.21
Epoch 200, loss 1042.96


## Testing with test data

In [0]:
f_label = []
for i in range(0 , len(test_s), num_utterances):
  begin = i
  end = i + num_utterances
  test_p_pair = positive_pairs(L , begin , end)
  test_n_pair = negative_pairs(L , begin , end , len(test_s))

  test_l , test_r = make_pairs(test_abs , test_p_pair , test_n_pair)
  
  
  ones = np.ones(L)
  zeroes = np.zeros(L)

  f_label.extend(ones)
  f_label.extend(zeroes)

  if i == 0:
    test_a_l = test_l
    test_a_r = test_r
  else:
    test_a_l = np.vstack((test_a_l , test_l))
    test_a_r = np.vstack((test_a_r , test_r))

In [0]:
test_time = test_a_l.shape[1]
feed_dict = {l_i: test_a_l, r_i: test_a_r, time:test_time}

pred_output = label_pred_sig.eval(feed_dict=feed_dict)

In [0]:
for i in range(len(pred_output)):
  if pred_output[i] >= 0.5:
    pred_output[i] = 1
  else:
    pred_output[i] = 0

## Accuracy

In [28]:
accuracy = sum(pred_output == f_label)
accuracy=accuracy/len(pred_output)
accuracy=round(accuracy*100,3)
print('Accuracy for test: ', accuracy)

Accuracy for test:  70.5
