In [2]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

import librosa
import math

%matplotlib inline

In [3]:
s, sr_s=librosa.load('data/train_clean_male.wav', sr=None)
S=librosa.stft(s, n_fft=1024, hop_length=512)
sn, sr_x=librosa.load('data/train_dirty_male.wav', sr=None)
X=librosa.stft(sn, n_fft=1024, hop_length=512)
print(S.shape)
print(X.shape)
S_abs = np.abs(S)
X_abs = np.abs(X)

(513, 2459)
(513, 2459)


In [100]:
x = tf.placeholder(tf.float32, [None, 513, 1])
y_ = tf.placeholder(tf.float32, [None, 513])
LEARNING_RATE = 10e-4

is_training = True

conv1 = tf.layers.conv1d(
    x,            
    filters=64,
    kernel_size=3,
    activation=tf.nn.relu)

conv2 = tf.layers.conv1d(
    conv1,            
    filters=32,
    kernel_size=3,
    activation=tf.nn.relu)

conv3 = tf.layers.conv1d(
    conv2,            
    filters=16,
    kernel_size=3,
    activation=tf.nn.relu)

flat = tf.reshape(conv3, [-1, 16*507])
l1 = tf.layers.dense(flat, units=512,  activation=tf.nn.relu)
l2 = tf.layers.dense(l1, units=512,  activation=tf.nn.relu)
dropout1 = tf.layers.dropout(l2, rate=0.2, training=is_training)
y = tf.layers.dense(dropout1, units=513, activation=tf.nn.relu)

loss = tf.losses.mean_squared_error(labels=y_, predictions=y)
train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

sess=tf.Session()
tf.global_variables_initializer().run(session=sess)

train_data = tf.data.Dataset.from_tensor_slices(tf.constant(X_abs.T.reshape(-1, 513, 1)))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(S_abs.T))
batch_data = tf.data.Dataset.zip((train_data, label_data)).repeat().batch(BATCH_SIZE)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

# https://www.tensorflow.org/guide/saved_model#models
saver = tf.train.Saver()



In [107]:
CHECK_POINT_FILE_NAME = "./hw2_1.ckpt"
TRAIN_STEPS = 4000

try:
    saver.restore(sess, CHECK_POINT_FILE_NAME)
except:
    pass

for i in range(TRAIN_STEPS+1):    
    batch = sess.run(next_batch)
    _, loss_value = sess.run((train_step, loss), feed_dict={x: batch[0], y_: batch[1]})
    if i% 500 == 0:
        print('Training Step:' + str(i) + '  Loss =  ' + str(loss_value))
        
# save model
save_path = saver.save(sess, CHECK_POINT_FILE_NAME)


INFO:tensorflow:Restoring parameters from ./hw2_1.ckpt
Training Step:0  Loss =  0.0062892335
Training Step:500  Loss =  0.024055108
Training Step:1000  Loss =  0.0032384258
Training Step:1500  Loss =  0.018285573
Training Step:2000  Loss =  0.004243582
Training Step:2500  Loss =  0.0016892772
Training Step:3000  Loss =  0.003568292
Training Step:3500  Loss =  0.0021236362
Training Step:4000  Loss =  0.005993988


In [108]:
def denoise_sound_conv1(input_file_name, output_file_name):
    sn, sr=librosa.load(input_file_name, sr=None)
    testX=librosa.stft(sn, n_fft=1024, hop_length=512)
    testX_abs = np.abs(testX)
    is_training = False
    S_test_abs = sess.run(y, feed_dict={x: testX_abs.T.reshape(-1, 513, 1) }).T
    is_training = True
    print(S_test_abs.shape)
    ratio = (testX / testX_abs)
    Sh = np.multiply(ratio, S_test_abs)
    librosa.output.write_wav(output_file_name, librosa.istft(Sh, hop_length=512), sr)
    

In [114]:
denoise_sound_conv1('data/test_x_01.wav', 'recover_01_d1x.wav')
denoise_sound_conv1('data/test_x_02.wav', 'recover_02_d2x.wav')

(513, 142)


In [104]:
# Calculate SNR 
def calculateSNR(st, st_h):
    st_sum = np.sum(np.abs(st))
    diff_sum = np.sum(np.abs(st-st_h))
#     print(st_sum, diff_sum)
    return 10*math.log(st_sum**2/diff_sum**2)

In [110]:
# Calculate SNR by training data
S_test_abs = sess.run(y, feed_dict={x: X_abs.T.reshape(-1, 513, 1) }).T
ratio = (X / X_abs)
s_t = np.multiply(ratio, S_test_abs)
print(calculateSNR(S, s_t))

15.526661721097629


### CNN 2D


In [115]:
# Transform a spectra to image like data form with dim 20x513
def imageize_spec(X):
    raw = []
    for t in range(len(X)-19):
        raw.append(X[t: t+20])
    X_abs_2d = np.array(raw).reshape(-1, 20, 513, 1)
    return X_abs_2d
    

In [116]:
def padding_frames(X):
    testX_pad = X
    # padding
    for i in range(19):
        testX_pad = np.hstack((testX_pad, np.zeros((513, 1))))
    return testX_pad


In [129]:
x = tf.placeholder(tf.float32, [None, 20, 513, 1])
y_ = tf.placeholder(tf.float32, [None, 513])
LEARNING_RATE = 10e-4

is_training = True

#  18 * 511 * 16 
conv1 = tf.layers.conv2d(
    x,            
    filters=16,
    kernel_size=[3, 3],
    activation=tf.nn.relu)

# 9 * 255 * 16
pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)

# 7 * 253 * 32
conv2 = tf.layers.conv2d(
    pool1,            
    filters=32,
    kernel_size=[3, 3],
    activation=tf.nn.relu)

# 3 * 126 * 32
pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)

# 1 * 124 * 64
conv3 = tf.layers.conv2d(
    pool2,            
    filters=64,
    kernel_size=[3, 3],
    activation=tf.nn.relu)

flat = tf.reshape(conv3, [-1, 124*64])

l1 = tf.layers.dense(flat, units=512,  activation=tf.nn.relu)
l2 = tf.layers.dense(l1, units=512,  activation=tf.nn.relu)
dropout1 = tf.layers.dropout(l2, rate=0.2, training=is_training)
y = tf.layers.dense(dropout1, units=513, activation=tf.nn.relu)

loss = tf.losses.mean_squared_error(labels=y_, predictions=y)
train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

sess=tf.Session()
tf.global_variables_initializer().run(session=sess)



In [130]:
# "imageize" data for 2D CNN
S_abs_T = S_abs.T
X_abs_T = np.abs(padding_frames(X)).T
S_abs_2d = S_abs_T
X_abs_2d = imageize_spec(X_abs_T)

print(X_abs_2d.shape)
print(S_abs_T.shape)



(2459, 20, 513, 1)
(2459, 513)


In [131]:
# Modify data for training
train_data = tf.data.Dataset.from_tensor_slices(tf.constant(X_abs_2d))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(S_abs.T))
batch_data = tf.data.Dataset.zip((train_data, label_data)).repeat().batch(BATCH_SIZE)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

saver = tf.train.Saver()


In [139]:
CHECK_POINT_FILE_NAME = "./hw2_2.ckpt"
TRAIN_STEPS = 4000

try:
    saver.restore(sess, CHECK_POINT_FILE_NAME)
except:
    pass

for i in range(TRAIN_STEPS+1):    
    batch = sess.run(next_batch)
    _, loss_value = sess.run((train_step, loss), feed_dict={x: batch[0], y_: batch[1]})
    if i% 500 == 0:
        print('Training Step:' + str(i) + '  Loss =  ' + str(loss_value))
        
# save model
save_path = saver.save(sess, CHECK_POINT_FILE_NAME)


INFO:tensorflow:Restoring parameters from ./hw2_2.ckpt
Training Step:0  Loss =  0.0024328316
Training Step:500  Loss =  0.060028516
Training Step:1000  Loss =  0.01041532
Training Step:1500  Loss =  0.0148081565
Training Step:2000  Loss =  0.0039612055
Training Step:2500  Loss =  0.010793383
Training Step:3000  Loss =  0.009091403
Training Step:3500  Loss =  0.0030028434
Training Step:4000  Loss =  0.0040838136


In [140]:
def denoise_sound_conv2(input_file_name, output_file_name):
    sn, sr=librosa.load(input_file_name, sr=None)
    testX=librosa.stft(sn, n_fft=1024, hop_length=512)
    testX_abs = np.abs(testX)
    testX_abs_pad = np.abs(padding_frames(testX))
    X_abs_2d = imageize_spec(testX_abs_pad.T)
    is_training = False
    S_test_abs = sess.run(y, feed_dict={x: X_abs_2d }).T
    is_training = True
    print(S_test_abs.shape)
    ratio = (testX / testX_abs)
    Sh = np.multiply(ratio, S_test_abs)
    librosa.output.write_wav(output_file_name, librosa.istft(Sh, hop_length=512), sr)
    

In [141]:
denoise_sound_conv2('data/test_x_01.wav', 'recover_01_d2x.wav')
denoise_sound_conv2('data/test_x_02.wav', 'recover_02_d2x.wav')

(513, 142)
(513, 380)


In [138]:
# Calculate SNR by training data
testX_abs = X_abs
testX_pad = X_abs

# padding
for i in range(19):
    testX_pad = np.hstack((testX_pad, np.zeros((513, 1))))
testX_abs_pad = np.abs(testX_pad)

X_abs_2d = imageize_spec(testX_abs_pad.T)
is_training = False
S_test_abs = sess.run(y, feed_dict={x: X_abs_2d }).T
is_training = True

ratio = (X / X_abs)
s_t = np.multiply(ratio, S_test_abs)
print(calculateSNR(S, s_t))


12.166169347936753
