# Mounting the google drive with audio files

In [1]:
from google.colab import drive
drive.mount('/content/gdrive/')

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


# Importing libraries

In [2]:
import os
import librosa
import glob
import pickle
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

# Function to read data from pickle if exists else from audio files to dump into pickle and read it

In [0]:
def loaddata(pkl , abs_pkl , input):
  if os.path.exists(pkl) and os.path.exists(abs_pkl):
    s_l = pickle.load(open(pkl, 'rb' ))
    abs_s_l = pickle.load(open(abs_pkl, 'rb' ))
    print('Loading the data from the pickle')
    
    return s_l , abs_s_l
  
  else:
    s_l = []
    abs_s_l = []
    for file in sorted(glob.iglob(input)):
      s,sr = librosa.load(file , sr=None)
      S = librosa.stft(s, n_fft=1024, hop_length=512)
      s_l.append(S)

      abs_S = np.abs(S)
      abs_s_l.append(abs_S)

    pickle.dump(s_l, open(pkl, 'wb'))
    pickle.dump(abs_s_l, open(abs_pkl, 'wb'))
    
    print('Loading the data from input files to pickle')
    return s_l , abs_s_l

In [4]:
import os
os.listdir('/content/gdrive/My Drive/Colab Notebooks/data/timit-homework')

['cln_s.pkl',
 'abs_cln_s.pkl',
 'nse_s.pkl',
 'abs_nse_s.pkl',
 'mix_sp.pkl',
 'abs_mix_s.pkl',
 'v',
 'te',
 'val_mix_s.pkl',
 'val_abs_mix_s.pkl',
 't_s.pkl',
 't_abs_s.pkl',
 'val_cln_s.pkl',
 'val_abs_cln_s.pkl',
 'val_nse_s.pkl',
 'val_abs_nse_s.pkl']

In [0]:
#os.rename('/content/gdrive/My Drive/Colab Notebooks/data/timit-homework/abs_cln_s (1).pkl','/content/gdrive/My Drive/Colab Notebooks/data/timit-homework/abs_cln_s.pkl')

In [0]:
#os.rename('/content/gdrive/My Drive/Colab Notebooks/data/timit-homework/cln_s (1).pkl','/content/gdrive/My Drive/Colab Notebooks/data/timit-homework/cln_s.pkl')

# Reading values of clean, noise and mixed audios and appending it to list

Checking the length of the appended list to see if the data is correctly read

In [7]:
dir = 'gdrive/My Drive/Colab Notebooks/data/timit-homework/'
cln_s_pkl = dir + 'cln_s.pkl'
abs_cln_s_pkl = dir + 'abs_cln_s.pkl'
cln_s_p = dir + 'tr/trs*.wav'

cln_s_l, abs_cln_s_l = loaddata(cln_s_pkl, abs_cln_s_pkl, cln_s_p)
len(cln_s_l), len(abs_cln_s_l)

Loading the data from the pickle


(1200, 1200)

In [8]:
nse_s_pkl = dir + 'nse_s.pkl'
abs_nse_s_pkl = dir + 'abs_nse_s.pkl'
nse_s_p = dir + 'tr/trn*.wav'

nse_s_l, abs_nse_s_l = loaddata(nse_s_pkl, abs_nse_s_pkl, nse_s_p)
len(nse_s_l), len(abs_nse_s_l)

Loading the data from the pickle


(1200, 1200)

In [9]:
mix_s_pkl = dir + 'mix_sp.pkl'
abs_mix_s_pkl = dir + 'abs_mix_s.pkl'
mix_s_p = dir + 'tr/trx*.wav'

mix_s_l, abs_mix_s_l = loaddata(mix_s_pkl, abs_mix_s_pkl, mix_s_p)
len(mix_s_l), len(abs_mix_s_l)

Loading the data from the pickle


(1200, 1200)

In [10]:
val_cln_s_pkl = dir + 'val_cln_s.pkl'
val_abs_cln_s_pkl = dir + 'val_abs_cln_s.pkl'
val_cln_s_p = dir + 'v/vs*.wav'

val_cln_s_l, val_abs_cln_s_l = loaddata(val_cln_s_pkl, val_abs_cln_s_pkl, val_cln_s_p)
len(val_cln_s_l), len(val_abs_cln_s_l)

Loading the data from the pickle


(1200, 1200)

In [11]:
val_nse_s_pkl = dir + 'val_nse_s.pkl'
val_abs_nse_s_pkl = dir + 'val_abs_nse_s.pkl'
val_nse_s_p = dir + 'v/vn*.wav'

val_nse_s_l, val_abs_nse_s_l = loaddata(val_nse_s_pkl, val_abs_nse_s_pkl, val_nse_s_p)
len(val_nse_s_l), len(val_abs_nse_s_l)

Loading the data from the pickle


(1200, 1200)

In [12]:
val_mix_s_pkl = dir + 'val_mix_s.pkl'
val_abs_mix_s_pkl = dir + 'val_abs_mix_s.pkl'
val_mix_s_p = dir + 'v/vx*.wav'

val_mix_s_l, val_abs_mix_s_l = loaddata(val_mix_s_pkl, val_abs_mix_s_pkl, val_mix_s_p)
len(val_mix_s_l), len(val_abs_mix_s_l)

Loading the data from the pickle


(1200, 1200)

In [13]:
t_s_pkl = dir + 't_s.pkl'
t_abs_s_pkl = dir + 't_abs_s.pkl'
t_s_p = dir + 'te/tex*.wav'

t_s_l, t_abs_s_l = loaddata(t_s_pkl, t_abs_s_pkl, t_s_p)
len(t_s_l), len(t_abs_s_l) 

Loading the data from the pickle


(400, 400)

# Function for the mask
We are converting the variable length to fixed length by taking the longest of the two sequences

In [0]:
def ibm(cln_s , nse_s):

  mask = np.greater(cln_s , nse_s) * 1
  
  return mask

In [0]:
#for i in range(0 , len(abs_cln_s_l)):
 # print(abs_cln_s_l[i].shape,abs_nse_s_l[i].shape)

In [0]:
mask_l = []

for i in range(0 , len(abs_cln_s_l)):
  #print(abs_cln_s_l[i].shape,abs_nse_s_l[i].shape)
  mask = ibm(abs_cln_s_l[i].T , abs_nse_s_l[i].T)
  
  mask_l.append(mask)

In [0]:
#mask_l

# Leaning rate,epochs and batch size

In [0]:
learning_rate = 0.003
n_epochs = 60
batch_size = 10

In [0]:
X = tf.placeholder(tf.float32, [None, None , 513])
y = tf.placeholder(tf.float32, [None, None , 513])

# Function for lstm cell

 I have used he initializer and sigmoid activation function for the lstm model 

In [0]:
def lstm(x , hidden_units):
  #LSTM
  lstm_cell = tf.contrib.rnn.LSTMCell(hidden_units, initializer=tf.contrib.layers.variance_scaling_initializer())
  dropout_lstm = tf.nn.rnn_cell.DropoutWrapper(lstm_cell , output_keep_prob=0.9)
  lstm_output , state = tf.nn.dynamic_rnn(dropout_lstm , x , dtype=tf.float32)

  output = tf.layers.dense(lstm_output , 513 , activation=tf.nn.sigmoid , kernel_initializer=tf.contrib.layers.variance_scaling_initializer())
  
  return output

# Loss, Optimizer and Session

In [22]:

output = lstm(X,  513)
loss = tf.losses.mean_squared_error(labels = y ,predictions = output)
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Please use `keras.layers.RNN(cell)`, which is equivalent to this API
Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.wh

# Calculating loss and sum loss for each epoch and appending it in a list

In [23]:
loss_l = []
sum_loss_l = []

for epoch in range(n_epochs):
    loss_l = []
    for i in range(0 , 1200, batch_size):
          batch_x = [abs_mix_s_l[j].T for j in range(i,i+batch_size)]
          batch_x = np.array(batch_x)
          
          batch_y = np.array(mask_l[i:i+batch_size])
          
          batch_x = batch_x.reshape(batch_size,-1,513)
          batch_y = batch_y.reshape(batch_size,-1,513)
        
          feed_dict = {X: batch_x, y: batch_y}
          train_step.run(feed_dict=feed_dict)

          loss1 = loss.eval(feed_dict=feed_dict)
          loss_l.append(loss1)
    
    sum_loss_l.append(sum(loss_l))
    print("Epoch:",epoch, " loss:",sum(loss_l))

Epoch: 0  loss: 24.459532618522644
Epoch: 1  loss: 20.175379991531372
Epoch: 2  loss: 18.533134669065475
Epoch: 3  loss: 17.85220941901207
Epoch: 4  loss: 17.123674362897873
Epoch: 5  loss: 16.612551115453243
Epoch: 6  loss: 16.234995365142822
Epoch: 7  loss: 15.884008184075356
Epoch: 8  loss: 15.61950547248125
Epoch: 9  loss: 15.32732992619276
Epoch: 10  loss: 15.019042037427425
Epoch: 11  loss: 14.756883069872856
Epoch: 12  loss: 14.632919698953629
Epoch: 13  loss: 14.400542326271534
Epoch: 14  loss: 14.175693146884441
Epoch: 15  loss: 13.973121732473373
Epoch: 16  loss: 13.850290581583977
Epoch: 17  loss: 13.727804705500603
Epoch: 18  loss: 13.623232401907444
Epoch: 19  loss: 13.464034527540207
Epoch: 20  loss: 13.309731185436249
Epoch: 21  loss: 13.149669088423252
Epoch: 22  loss: 13.052400708198547
Epoch: 23  loss: 12.930848889052868
Epoch: 24  loss: 12.877018183469772
Epoch: 25  loss: 12.791102916002274
Epoch: 26  loss: 12.606779530644417
Epoch: 27  loss: 12.477941334247589
Epoch

# Feed forward function

In [0]:

def feed_forward(input, output):
    output = output.eval(feed_dict = {X : input})
    
    return output


# Function to calculate SNR for the denoised audio

In [0]:
def calculate_SNR(cln_s , rec_s , size):
  cln_s = cln_s[: size]
  S_cap1 = np.dot(cln_s.T , cln_s)
  S_cap2 = np.dot((cln_s - rec_s).T,(cln_s - rec_s))
  SNR = 10 * np.log10(S_cap1/S_cap2)

  
  return SNR

In [26]:
len(val_abs_mix_s_l),len(val_mix_s_l)

(1200, 1200)

# Calculating SNR for the Validation Set

In [0]:
SNR = []
for i in range(1200):

  ff_ip = np.array([val_abs_mix_s_l[i].T])
  ff_op = feed_forward(ff_ip , output)

  s_clex = np.array([val_mix_s_l[i].T])
  shat = np.multiply(ff_op, s_clex)
  shat = shat.T[:,:,0]
  recon_s = librosa.istft(shat , hop_length=512 , win_length=1024)
  
  file_n = str(i).zfill(4)
  librosa.output.write_wav('val_recon' + file_n + '.wav', recon_s, sr = 16000)
  
  cln_s = librosa.istft(val_cln_s_l[i] , hop_length=512 , win_length=1024)
  
  size_recon_s = np.shape(recon_s)[0]
  
  snr = calculate_SNR(cln_s , recon_s , size_recon_s)
  (recon_s[i]).shape
  
  SNR.append(snr)

In [29]:
print('Minimum SNR of 1200 files:',min(SNR))
print('Maximum SNR of 1200 files:',max(SNR))
print('Mean SNR of 1200 files:',np.mean(SNR))


Minimum SNR of 1200 files: 4.776942133903503
Maximum SNR of 1200 files: 27.115120887756348
Mean SNR of 1200 files: 11.701403932273388


# Generating Reconstructed test audios

In [0]:
from google.colab import files
for i in range(400):
  ff_ip = np.array([t_abs_s_l[i].T])
  ff_op = feed_forward(ff_ip , output)

  s_clex = np.array([t_s_l[i].T])
  shat = np.multiply(ff_op, s_clex)
  shat = shat.T[:,:,0]
  recon_s = librosa.istft(shat , hop_length=512 , win_length=1024)
  
  file_n = str(i).zfill(4)
  librosa.output.write_wav('test_recon' + file_n + '.wav', recon_s, sr = 16000)
  files.download('test_recon' + file_n + '.wav')