# Simple Speech Denoising

### Imports

In [0]:
import librosa
import numpy as np
import tensorflow as tf

### Upload relevant files

Please upload the following files
1. test_x_01.wav
2. test_x_02.wav
3. train_clean_male.wav
4. train_dirty_male.wav

In [3]:
from google.colab import files

uploaded = files.upload()

Saving test_x_01.wav to test_x_01.wav
Saving test_x_02.wav to test_x_02.wav
Saving train_clean_male.wav to train_clean_male.wav
Saving train_dirty_male.wav to train_dirty_male.wav


### Load and Convert
Load the relevant files using Librosa and convert them from time domain to frequency domain using STFT (Short Time Fourier Transform)

In [0]:
s, sr = librosa.load('train_clean_male.wav', sr = None)
S_clean = librosa.stft(s, n_fft=1024, hop_length=512)
sn, sr = librosa.load('train_dirty_male.wav', sr = None)
X_dirty = librosa.stft(sn, n_fft=1024, hop_length=512)

### Take Magnitudes

S and X are complexe valued so we take their magnitudes

In [0]:
S_mag = np.abs(S_clean)
X_mag = np.abs(X_dirty)

## Training

### Create Placeholders

In [0]:
def create_placeholders(n_x = 513, n_y = 513):
  X = tf.placeholder(tf.float32, [n_x, None])
  Y = tf.placeholder(tf.float32, [n_y, None])
  
  return X, Y

### Initialize Parameters
Initializing weights and biases for all layers. <br>
Using Xavier Initialiazer for Weights and Zeros Initializer for biases <br>
Each hidden layer has 1024 hidden units

In [0]:
def initialize_parameters():
  W1 = tf.get_variable("W1", [1024, 513], initializer = tf.contrib.layers.xavier_initializer())
  b1 = tf.get_variable("b1", [1024, 1], initializer = tf.zeros_initializer())

  W2 = tf.get_variable("W2", [1024, 1024], initializer = tf.contrib.layers.xavier_initializer())
  b2 = tf.get_variable("b2", [1024, 1], initializer = tf.zeros_initializer())

  W3 = tf.get_variable("W3", [513, 1024], initializer = tf.contrib.layers.xavier_initializer())
  b3 = tf.get_variable("b3", [513, 1], initializer = tf.zeros_initializer())

  parameters = {
      "W1" : W1,
      "b1" : b1,
      "W2" : W2,
      "b2" : b2,
      "W3" : W3,
      "b3" : b3
  }
  
  return parameters

### Forward Propagation

Fully Connected 3 Layer Neural Network <br>
Using ReLU activation for all layers

In [0]:
def forward_propagation(X, parameters):
  W1 = parameters['W1']
  b1 = parameters['b1']
  Z1 = tf.matmul(W1, X) + b1
  A1 = tf.nn.relu(Z1)
  
  W2 = parameters['W2']
  b2 = parameters['b2']
  Z2 = tf.matmul(W2, A1) + b2
  A2 = tf.nn.relu(Z2)
  
  W3 = parameters['W3']
  b3 = parameters['b3']
  Z3 = tf.matmul(W3, A2) + b3
  
  return Z3
  

### Compute Cost
Loss function -> MSE (Mean Squared Error)

Cost -> Mean of the MSE of each prediction - label pair

In [0]:
def compute_cost(Z3, Y):
  logits = tf.transpose(Z3)
  labels = tf.transpose(Y)
  
  cost = tf.reduce_mean(tf.losses.mean_squared_error(tf.nn.relu(Z3), Y))
  
  return cost

### Initialize the Model for Training
Running the model for 100 epochs  
learning rate = 1e-3  
Using the Adam Optimizer  

In [0]:
def model(X_mag, S_mag, learning_rate = 0.001, epochs = 100, print_cost = True):
  
  tf.reset_default_graph()
  n_x, m = X_mag.shape
  n_y = S_mag.shape[0]
  
  X, Y = create_placeholders(n_x, n_y)
  parameters = initialize_parameters()
  print(parameters)
  Z3 = forward_propagation(X, parameters)
  cost = compute_cost(Z3, Y)
  
  optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
  init = tf.global_variables_initializer()
  
  with tf.Session() as sess:
    sess.run(init)
    
    costs = []
    
    for epoch in range(epochs):
      _, epoch_cost = sess.run([optimizer, cost], feed_dict = {X: X_mag, Y:S_mag})
                               
      if print_cost == True and epoch % 10 == 0:
        print("Cost after epoch %i: %f" %(epoch, epoch_cost))
        
      if print_cost == True and epoch % 5 == 0:
        costs.append(epoch_cost)
        
    parameters = sess.run(parameters)
    print ("Parameters have been trained!")

    
#     correct_prediction = tf.equal(tf.argmax(Z3), tf.argmax(Y))
#     accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
                               
#     print ("Train Accuracy:", accuracy.eval({X: X_mag, Y: S_mag}))
#     print ("Test Accuracy:", accuracy.eval({X: mnist.test.images.T, Y: mnist.test.labels.T}))
    
    sess.close()
    return parameters

### Run the model

In [11]:
parameters = model(X_mag, S_mag)

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

{'W1': <tf.Variable 'W1:0' shape=(1024, 513) dtype=float32_ref>, 'b1': <tf.Variable 'b1:0' shape=(1024, 1) dtype=float32_ref>, 'W2': <tf.Variable 'W2:0' shape=(1024, 1024) dtype=float32_ref>, 'b2': <tf.Variable 'b2:0' shape=(1024, 1) dtype=float32_ref>, 'W3': <tf.Variable 'W3:0' shape=(513, 1024) dtype=float32_ref>, 'b3': <tf.Variable 'b3:0' shape=(513, 1) dtype=float32_ref>}
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Cost after epoch 0: 0.098789
Cost after epoch 10: 0.026831
Cost after epoch 20: 0.013331
Cost after epoch 30: 0.009327
Cost after epoch 40: 0.007324
Cost after epo

### Clean Function
Forward propagation on trained network  
input data - Noisy Signal  
output data - Clean Signal  

In [0]:
def clean(X_mag, parameters):
  tf.reset_default_graph()

  n_x, m = X_mag.shape
# n_y = S_mag.shape[0]
  
  X = tf.placeholder(tf.float32, [n_x, None])
#   parameters = initialize_parameters()
#   print(parameters)
  Z3 = forward_propagation(X, parameters)
  S_test = tf.nn.relu(Z3) 
  
  init = tf.global_variables_initializer()
  with tf.Session() as sess:
    sess.run(init) 
    S_test = sess.run(S_test, feed_dict = {X: X_mag})
#     parameters = sess.run(parameters)
    sess.close()
  return S_test
    

Clean the mixture X_mag

In [0]:
c1 = clean(X_mag, parameters)

### Recover Complex Valued Speech Spectrogram 
And applying inverse STFT to convert from requency domain to time domain

In [0]:
S = np.multiply(np.divide(X_dirty, X_mag), c1)
sh_test = librosa.istft(S, win_length=1024 ,hop_length=512)

### Saving the Cleaned signal and downloading it

In [0]:
librosa.output.write_wav('test_clean_male.wav', sh_test, sr)
files.download('test_clean_male.wav')

## For test_x_01.wav
Load the file -> transform to feature domain using stft and take magnitude

In [0]:
test1, sr = librosa.load('test_x_01.wav', sr = None)
t1_dirty = librosa.stft(test1, n_fft=1024, hop_length=512)
t1_mag = np.abs(t1_dirty)

Clean the signal using forward propagation

In [0]:
clean_t1 = clean(t1_mag, parameters)

Recover Complex Valued Speech Spectrogram  
And apply inverse STFT to convert from requency domain to time domain

In [0]:
T1 = np.multiply(np.divide(t1_dirty, t1_mag), clean_t1)
test_01 = librosa.istft(T1, win_length=1024, hop_length=512)

Save the cleaned signal and download it

In [0]:
librosa.output.write_wav('test_s_01_recons.wav', test_01, sr)
files.download('test_s_01_recons.wav')

## For test_x_02.wav
Load the file -> transform to feature domain using stft and take magnitude

In [0]:
test2, sr = librosa.load('test_x_02.wav', sr = None)
t2_dirty = librosa.stft(test2, n_fft=1024, hop_length=512)
t2_mag = np.abs(t2_dirty)

Clean the signal using forward propagation

In [0]:
clean_t2 = clean(t2_mag, parameters)

Recover Complex Valued Speech Spectrogram  
And apply inverse STFT to convert from requency domain to time domain

In [0]:
T2 = np.multiply(np.divide(t2_dirty, t2_mag), clean_t2)
test_02 = librosa.istft(T2, win_length=1024, hop_length=512)

Save the cleaned signal and download it

In [0]:
librosa.output.write_wav('test_s_02_recons.wav', test_02, sr)
files.download('test_s_02_recons.wav')