In [0]:
import librosa
import numpy as np
import tensorflow as tf

In [0]:
#Loading training and testing files
#Computing STFT on all the files
s, sr = librosa.load('train_clean_male.wav', sr=None)
S = librosa.stft(s, n_fft=1024, hop_length=512)

sn, sr = librosa.load('train_dirty_male.wav', sr=None)
X = librosa.stft(sn, n_fft=1024, hop_length=512)

x_test, sr = librosa.load('test_x_01.wav', sr=None)
X_test = librosa.stft(x_test, n_fft=1024, hop_length=512)

x_test2, sr = librosa.load('test_x_02.wav', sr=None)
X_test2 = librosa.stft(x_test2, n_fft=1024, hop_length=512)

In [0]:
#Calculating the magnitude of all the input files
mag_S = np.abs(S)
mag_X = np.abs(X)
mag_X_test = np.abs(X_test)
mag_X_test2 = np.abs(X_test2)

#Defining model specifications
learning_rate = 0.0002
num_epochs = 1000

In [0]:
input = tf.placeholder(tf.float32, [None, 513])
labels = tf.placeholder(tf.float32, [None, 513])

In [0]:
def getModel(x):
  # Input Layer
  input_layer = tf.reshape(x, [-1, 513, 1])

  # Convolutional Layer #1
  conv1 = tf.layers.conv1d(
      inputs=input_layer,
      filters=16,
      kernel_size=16,
      padding="same",
      activation=tf.nn.relu)

  # Pooling Layer #1
  pool1 = tf.layers.max_pooling1d(inputs=conv1, pool_size=2, strides=2)

  # Convolutional Layer #2 and Pooling Layer #2
  conv2 = tf.layers.conv1d(
      inputs=pool1,
      filters=32,
      kernel_size=8,
      padding="same",
      activation=tf.nn.relu)
  
  pool2 = tf.layers.max_pooling1d(inputs=conv2, pool_size=2, strides=2)

  # Dense Layer
  pool2_flat = tf.layers.flatten(pool2)
  
  logits = tf.layers.dense(inputs=pool2_flat, units=513, activation=tf.nn.relu)
  
  return logits

In [6]:
output = getModel(input)
#Defining the loss function along with its optimizer
loss = tf.reduce_mean(tf.square(output - labels))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())

count = 0
batch_size = 100
flag = True

while flag:
    size = 0
    #Mini batching with the given batch size
    for i in range(0 , 2459, batch_size):
        size += batch_size
        if size <= 2459:
            batch_x = mag_X[:,i : size]
            batch_y = mag_S[:,i : size]
        else:
            batch_x = mag_X[:,i : 2459]
            batch_y = mag_S[:,i : 2459]
       
        feed_dict = {input: batch_x.T, labels: batch_y.T}
        train_step.run(feed_dict=feed_dict)
   
    if count%100 == 0:
        loss_calc = loss.eval(feed_dict=feed_dict)
        print("Epoch %d, loss %g"%(count, loss_calc))
    
    #Once all the epochs are completed, training is stopped
    if count >= num_epochs:
        flag = False  
        
    count+=1

Instructions for updating:
Use keras.layers.conv1d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling1d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Epoch 0, loss 0.0128128
Epoch 100, loss 0.00114099
Epoch 200, loss 0.000843799
Epoch 300, loss 0.000699082
Epoch 400, loss 0.000653681
Epoch 500, loss 0.000602551
Epoch 600, loss 0.000548083
Epoch 700, loss 0.000548907
Epoch 800, loss 0.000477183
Epoch 900, loss 0.000443766
Epoch 1000, loss 0.000448131


In [0]:
#Calculating the output from the given input, trained model and layer number
def feedforward(input_data, dnn_output):
    output = dnn_output.eval(feed_dict = {input : input_data})
    
    return output

#Recovering the complex values of the file from the output of the model
def recover_sound(X , mag_X , mag_output):
  temp = X / mag_X
  s_hat = temp * mag_output
  
  return s_hat

In [0]:
#Computing the output from the model for both the test files
s_hat_test1 = feedforward(mag_X_test.T , output)
s_hat_test2 = feedforward(mag_X_test2.T , output)

#Recovering the complex values of both the test files
s_hat1 = recover_sound(X_test , mag_X_test , s_hat_test1.T)
s_hat2 = recover_sound(X_test2 , mag_X_test2 , s_hat_test2.T)

In [0]:
#Reconstructing the test files after removing noise
recon_sound = librosa.istft(s_hat1 , hop_length=512 , win_length=1024)
librosa.output.write_wav('test_s_01_recons_q1.wav', recon_sound, sr)

recon_sound2 = librosa.istft(s_hat2 , hop_length=512 , win_length=1024)
librosa.output.write_wav('test_s_02_recons_q1.wav', recon_sound2, sr)

In [0]:
#For testing purpose, feeding the model with train_dirty_male file
#From the output generated, reconstructing the audio file
s_hat_test3 = feedforward(mag_X.T , output)
s_hat3 = recover_sound(X, mag_X , s_hat_test3.T)
recon_sound3 = librosa.istft(s_hat3 , hop_length=512 , win_length=1024)
size_recon_sound3 = np.shape(recon_sound3)[0]

In [11]:
#Once the audio file is generated, calculating the SNR value
s = s[: size_recon_sound3]
num = np.dot(s.T , s)
den = np.dot((s - recon_sound3).T,(s - recon_sound3))
SNR = 10 * np.log10(num/den)
print('Value of SNR : ' + str(SNR))

Value of SNR : 16.28388285636902
