In [241]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
import librosa

%matplotlib inline

In [242]:
# Calculate SNR 
def calculateSNR(st, st_h):
    st_sum = np.sum(np.abs(st))
    diff_sum = np.sum(np.abs(st-st_h))
    return 10*math.log(st_sum**2/diff_sum**2)

In [243]:
# Read data

def read_data(path, prefix, files_to_load):
    signal = np.zeros((513, 0))
    noise = np.zeros((513, 0))
    mixture = np.zeros((513, 0))
    for i in range(files_to_load):
        s, _=librosa.load('{}/{}s{:04d}.wav'.format(path, prefix, i), sr=None)
        S=librosa.stft(s, n_fft=1024, hop_length=512)
        signal = np.hstack((signal, S))
        s, _=librosa.load('{}/{}n{:04d}.wav'.format(path, prefix, i), sr=None)
        S=librosa.stft(s, n_fft=1024, hop_length=512)
        noise = np.hstack((noise, S))
        s, _=librosa.load('{}/{}x{:04d}.wav'.format(path, prefix, i), sr=None)
        S=librosa.stft(s, n_fft=1024, hop_length=512)
        mixture = np.hstack((mixture , S))

    return signal, noise, mixture



In [244]:
TRAIN_DATA_LEN = 600
# TRAIN_DATA_LEN = 1200
signal, noise, mixture = read_data('timit-homework/tr', 'tr', TRAIN_DATA_LEN)


In [297]:
v_signal, v_noise, v_mixture = read_data('timit-homework/v', 'v', 200)

In [544]:
print(signal.shape)
print(v_signal.shape)
# Try to output~
sr = 16000
# librosa.output.write_wav("./test_concat.wav", librosa.istft(signal, hop_length=512), sr)
librosa.output.write_wav("./v_test_concat.wav", librosa.istft(v_mixture, hop_length=512), sr)

(513, 58900)
(513, 18610)


In [247]:
# Preprocess data
signal_abs = np.abs(signal)
noise_abs = np.abs(noise)
mixture_abs = np.abs(mixture)

signal_T = signal.T
noise_T = noise.T
mixture_T = mixture.T
signal_abs_T = signal_abs.T
noise_abs_T = noise_abs.T
mixture_abs_T = mixture_abs.T

In [298]:
# For validation data 
# Preprocess Data
v_signal_abs = np.abs(v_signal)
v_noise_abs = np.abs(v_noise)
v_mixture_abs = np.abs(v_mixture)

v_signal_T = v_signal.T
v_noise_T = v_noise.T
v_mixture_T = v_mixture.T
v_signal_abs_T = v_signal_abs.T
v_noise_abs_T = v_noise_abs.T
v_mixture_abs_T = v_mixture_abs.T

In [249]:
# Calculate IBM 
def get_M(signal, noise):
    M = np.zeros_like(signal)
    (ht, wd) = M.shape
    for i in range(ht):
        for j in range(wd):
            if signal_abs_T[i][j] > noise_abs_T[i][j]:
                M[i][j] = 1
    return M

In [250]:
train_M = get_M(signal_abs_T, signal_abs_T)

In [299]:
v_M = get_M(v_signal_abs_T, v_signal_abs_T)

In [300]:
# Sanity check the shape of X(mixture) and M
print(mixture_abs_T.shape, train_M.shape)
print(v_mixture_abs_T.shape, v_M.shape)

(58900, 513) (58900, 513)
(18610, 513) (18610, 513)


In [542]:
# Testing M 
signal_recover = train_M * mixture_T
v_signal_recover = v_M * v_mixture_T
sr = 16000
# librosa.output.write_wav("./v_before_recover.wav", librosa.istft(mixture, hop_length=512), sr)
# librosa.output.write_wav("./v_recover.wav", librosa.istft(v_signal_recover.T, hop_length=512), sr)


In [302]:
# Test SNR based on v_signal_recover
print(calculateSNR(signal, signal_recover.T))
print(calculateSNR(v_signal, v_signal_recover.T))


24.732813943612374
12.992788440628296


In [573]:
# Training Parameters
learning_rate = 0.001
batch_size = 64

# Hyper Parameter
time_steps = 8   #
num_input = 513   # total data input (513 channel)
num_classes = 513 # total classes (513 channel)
num_hidden = 128  # hidden layer num of features
keep_prob = 0.8


is_training = False

In [574]:
tf.reset_default_graph()

# Create batch data for training
train_data = tf.data.Dataset.from_tensor_slices(tf.constant(mixture_abs_T))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(M.reshape(-1, num_classes)))
batch_data = tf.data.Dataset.zip((train_data, label_data)).repeat().batch(batch_size)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

In [575]:

with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    X = tf.placeholder("float", [None, time_steps, num_input])
    Y = tf.placeholder("float", [None, num_classes])
    weight = tf.Variable(tf.random_normal([num_hidden, num_classes]))
    bias = tf.Variable(tf.random_normal([num_classes]))


# batch_size*128*513
# weights = [tf.Variable(tf.random_normal([num_hidden, num_classes])) for i in range(time_steps)]
# biases = [tf.Variable(tf.random_normal([num_classes])) for i in range(time_steps)]



In [578]:
def RNN(x, weights, biases):    
    # Define a lstm cell with tensorflow
    cell1 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell2 = tf.contrib.rnn.DropoutWrapper(tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell(num_hidden), output_keep_prob=keep_prob)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])    
    # Try tf.contrib.cudnn_rnn.CudnnCompatibleLSTMCell
    
    outputs, states = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)    
#     print(outputs.shape, tf.reshape(outputs, (-1, time_steps*num_hidden)).shape, weight.shape)
    ret = tf.matmul(tf.reshape(outputs, (-1, num_hidden)), weight) + bias
    print(ret.shape)
    return ret

#     stack_outputs = tf.stack(outputs)    
#     print(outputs.shape)

#     ret_outputs = []
#     for i in range(len(outputs)):        
#         ret_outputs.append(tf.matmul(outputs[i], weights[i]) + biases[i])
#     stack_outputs = tf.stack(ret_outputs)
#     return tf.reshape(stack_outputs, (-1, num_classes))


In [579]:
# Calculate loss, train_op
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    Y_pred = RNN(X, weights, biases)
    loss = tf.losses.mean_squared_error(labels=Y, predictions=Y_pred)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

(?, 513)


In [555]:
# print(Y_pred.shape, Y.shape)

In [580]:
# Add Padding, to 
def fit_RNN_input_dim(data):
    (row, _) = data.shape
    count = 0
    while row % time_steps != 0:
        data = np.vstack((data, np.zeros(num_input)))
        row += 1
        count += 1
    
    return data, count

In [581]:
print(v_mixture_abs_T.shape)

(18610, 513)


In [582]:
(v_mixture_abs_T_fit, _) = fit_RNN_input_dim(v_mixture_abs_T)
(v_M_fit, _) = fit_RNN_input_dim(v_M)
print(v_mixture_abs_T_fit.shape, v_M_fit.shape)

(18616, 513) (18616, 513)


In [583]:
# Initial session
sess=tf.Session()
tf.global_variables_initializer().run(session=sess)

In [584]:
# Create checkpoint
saver = tf.train.Saver()
CHECK_POINT_FILE_NAME = "./hw3_2.ckpt"

# try:
#     saver.restore(sess, CHECK_POINT_FILE_NAME)
# except:
#     pass

In [591]:
# Training
training_steps = 10000
for i in range(training_steps+1):
    (batch_x, batch_y) = sess.run(next_batch)
    batch_x = batch_x.reshape(-1, time_steps, num_input)
#     print(batch_x.shape, batch_y.shape)
    _, loss_value = sess.run((train_op, loss), feed_dict={X: batch_x, Y: batch_y})
    if i % 1000 == 0:
        is_training = False
        print('Training Step:' + str(i) + '  Validation Loss =  ' + 
              str(sess.run(loss, feed_dict={X: v_mixture_abs_T_fit.reshape(-1, time_steps, num_input), Y: v_M_fit})) + 
              '  Train Loss =  ' + str(loss_value))
        is_training = True        

Training Step:0  Validation Loss =  0.2508672  Train Loss =  0.10139629
Training Step:1000  Validation Loss =  0.24971285  Train Loss =  0.06732198
Training Step:2000  Validation Loss =  0.25109106  Train Loss =  0.07073683
Training Step:3000  Validation Loss =  0.24781749  Train Loss =  0.07903202
Training Step:4000  Validation Loss =  0.24766901  Train Loss =  0.09114778
Training Step:5000  Validation Loss =  0.2498993  Train Loss =  0.1290096
Training Step:6000  Validation Loss =  0.24618882  Train Loss =  0.08853917
Training Step:7000  Validation Loss =  0.2513773  Train Loss =  0.0706407
Training Step:8000  Validation Loss =  0.24566773  Train Loss =  0.05474038
Training Step:9000  Validation Loss =  0.2476668  Train Loss =  0.07958381
Training Step:10000  Validation Loss =  0.24634673  Train Loss =  0.074462295



#### 1 GRUCell(128) / Timesteps 8  / dynamic_rnn
Training Step:0  Validation Loss =  3.9349077  Train Loss =  4.484976
Training Step:1000  Validation Loss =  0.2597179  Train Loss =  0.23647253
Training Step:2000  Validation Loss =  0.2569354  Train Loss =  0.17934279
Training Step:3000  Validation Loss =  0.25998798  Train Loss =  0.14066795
Training Step:4000  Validation Loss =  0.26135945  Train Loss =  0.14986697
Training Step:5000  Validation Loss =  0.2674995  Train Loss =  0.15539052
Training Step:6000  Validation Loss =  0.2602887  Train Loss =  0.14658244
Training Step:7000  Validation Loss =  0.28413093  Train Loss =  0.0959926
Training Step:8000  Validation Loss =  0.26343125  Train Loss =  0.14114633
Training Step:9000  Validation Loss =  0.2696621  Train Loss =  0.11511127
Training Step:10000  Validation Loss =  0.26768386  Train Loss =  0.10903315


Train SNR: 3.251641059081583
Validation SNR: 5.827979878762203


#### 2 CudnnCompatibleLSTMCell(128) / Timesteps 8  / dynamic_rnn
Training Step:0  Validation Loss =  1.3731027  Train Loss =  1.2348267
Training Step:1000  Validation Loss =  0.27806786  Train Loss =  0.26656747
Training Step:2000  Validation Loss =  0.26507422  Train Loss =  0.16778608
Training Step:3000  Validation Loss =  0.26399598  Train Loss =  0.12274204
Training Step:4000  Validation Loss =  0.2550358  Train Loss =  0.16121742
Training Step:5000  Validation Loss =  0.24873348  Train Loss =  0.14020343
Training Step:6000  Validation Loss =  0.2562572  Train Loss =  0.1109365
Training Step:7000  Validation Loss =  0.24956793  Train Loss =  0.09588985
Training Step:8000  Validation Loss =  0.2560808  Train Loss =  0.09700974
Training Step:9000  Validation Loss =  0.24594867  Train Loss =  0.08684269
Training Step:10000  Validation Loss =  0.2511251  Train Loss =  0.10253517

Train SNR: 5.780295033543666
Validation SNR: 9.500670758379465


In [586]:
saver.save(sess, CHECK_POINT_FILE_NAME)

'./hw3_2.ckpt'

In [587]:
# Get M for traiing data
def recover_train(input_mixture, mixture_abs_T):
    (mixture_fit, diff) = fit_RNN_input_dim(mixture_abs_T)
    input_x = mixture_fit.reshape(-1, time_steps, num_input)
    train_M = sess.run(Y_pred, feed_dict={X: input_x})
    print(train_M.shape)
    return train_M[:-diff] * input_mixture

In [592]:
recover_signal = recover_train(mixture_T, mixture_abs_T)
# librosa.output.write_wav("./train_recover.wav", librosa.istft(recover_signal.T, hop_length=512), sr)

(58904, 513)


In [593]:
# Recover Validaion track!
v_recover_signal = recover_train(v_mixture_T, v_mixture_abs_T)
librosa.output.write_wav("./v_recover_rnn.wav", librosa.istft(v_recover_signal.T, hop_length=512), sr)

(18616, 513)


In [594]:
# Calculate SNR for training data and validation data
print(calculateSNR(signal.T, recover_signal))
print(calculateSNR(v_signal.T, v_recover_signal))

5.780295033543666
9.500670758379465
