In [241]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
import math
import librosa

%matplotlib inline

In [242]:
# Calculate SNR 
def calculateSNR(st, st_h):
    st_sum = np.sum(np.abs(st))
    diff_sum = np.sum(np.abs(st-st_h))
    return 10*math.log(st_sum**2/diff_sum**2)

In [243]:
# Read data

def read_data(path, prefix, files_to_load):
    signal = np.zeros((513, 0))
    noise = np.zeros((513, 0))
    mixture = np.zeros((513, 0))
    for i in range(files_to_load):
        s, _=librosa.load('{}/{}s{:04d}.wav'.format(path, prefix, i), sr=None)
        S=librosa.stft(s, n_fft=1024, hop_length=512)
        signal = np.hstack((signal, S))
        s, _=librosa.load('{}/{}n{:04d}.wav'.format(path, prefix, i), sr=None)
        S=librosa.stft(s, n_fft=1024, hop_length=512)
        noise = np.hstack((noise, S))
        s, _=librosa.load('{}/{}x{:04d}.wav'.format(path, prefix, i), sr=None)
        S=librosa.stft(s, n_fft=1024, hop_length=512)
        mixture = np.hstack((mixture , S))

    return signal, noise, mixture



In [244]:
TRAIN_DATA_LEN = 600
# TRAIN_DATA_LEN = 1200
signal, noise, mixture = read_data('timit-homework/tr', 'tr', TRAIN_DATA_LEN)


In [297]:
v_signal, v_noise, v_mixture = read_data('timit-homework/v', 'v', 200)

In [246]:
print(signal.shape)
print(v_signal.shape)
# Try to output~
sr = 16000
# librosa.output.write_wav("./test_concat.wav", librosa.istft(signal, hop_length=512), sr)
# librosa.output.write_wav("./v_test_concat.wav", librosa.istft(v_mixture, hop_length=512), sr)

(513, 58900)
(513, 4310)


In [247]:
# Preprocess data
signal_abs = np.abs(signal)
noise_abs = np.abs(noise)
mixture_abs = np.abs(mixture)

signal_T = signal.T
noise_T = noise.T
mixture_T = mixture.T
signal_abs_T = signal_abs.T
noise_abs_T = noise_abs.T
mixture_abs_T = mixture_abs.T

In [298]:
# For validation data 
# Preprocess Data
v_signal_abs = np.abs(v_signal)
v_noise_abs = np.abs(v_noise)
v_mixture_abs = np.abs(v_mixture)

v_signal_T = v_signal.T
v_noise_T = v_noise.T
v_mixture_T = v_mixture.T
v_signal_abs_T = v_signal_abs.T
v_noise_abs_T = v_noise_abs.T
v_mixture_abs_T = v_mixture_abs.T

In [249]:
# Calculate IBM 
def get_M(signal, noise):
    M = np.zeros_like(signal)
    (ht, wd) = M.shape
    for i in range(ht):
        for j in range(wd):
            if signal_abs_T[i][j] > noise_abs_T[i][j]:
                M[i][j] = 1
    return M

In [250]:
train_M = get_M(signal_abs_T, signal_abs_T)

In [299]:
v_M = get_M(v_signal_abs_T, v_signal_abs_T)

In [300]:
# Sanity check the shape of X(mixture) and M
print(mixture_abs_T.shape, train_M.shape)
print(v_mixture_abs_T.shape, v_M.shape)

(58900, 513) (58900, 513)
(18610, 513) (18610, 513)


In [301]:
# Testing M 
signal_recover = train_M * mixture_T
v_signal_recover = v_M * v_mixture_T
sr = 16000
# librosa.output.write_wav("./v_before_recover.wav", librosa.istft(mixture, hop_length=512), sr)
# librosa.output.write_wav("./v_recover.wav", librosa.istft(v_signal_recover.T, hop_length=512), sr)


In [302]:
# Test SNR based on v_signal_recover
print(calculateSNR(signal, signal_recover.T))
print(calculateSNR(v_signal, v_signal_recover.T))


24.732813943612374
12.992788440628296


In [287]:

# Training Parameters
learning_rate = 0.001
batch_size = 128

# Network Parameters
num_input = 513   # total data input (513 channel)
time_steps = 8   #
num_hidden = 128  # hidden layer num of features
num_classes = 513 # total classes (513 channel)

is_training = False

In [290]:
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    X = tf.placeholder("float", [None, time_steps, num_input])
    Y = tf.placeholder("float", [None, num_classes])

# batch_size*128*513
weights = [tf.Variable(tf.random_normal([num_hidden, num_classes])) for i in range(time_steps)]

biases = [tf.Variable(tf.random_normal([num_classes])) for i in range(time_steps)]


In [313]:
# tf.reset_default_graph()

def RNN(x, weights, biases):    
    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, timesteps, n_input)
    # Required shape: 'timesteps' tensors list of shape (batch_size, n_input)
    # Unstack to get a list of 'timesteps' tensors of shape (batch_size, n_input)
    x = tf.unstack(x, axis=1)

    # Define a lstm cell with tensorflow
    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden)
    # Get lstm cell output
    outputs, states = tf.nn.static_rnn(lstm_cell, x, dtype=tf.float32)    
    ret_outputs = []
    for i in range(len(outputs)):        
        ret_outputs.append(tf.matmul(outputs[i], weights[i]) + biases[i])
    stack_outputs = tf.stack(ret_outputs)

    return tf.reshape(stack_outputs, (-1, num_classes))


In [None]:
# Calculate loss, train_op
with tf.variable_scope("rnn_model", reuse=tf.AUTO_REUSE):
    Y_pred = RNN(X, weights, biases)
    loss = tf.losses.mean_squared_error(labels=Y, predictions=Y_pred)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

In [293]:
print(Y_pred.shape, Y.shape)

(?, 513) (?, 513)


In [294]:
# Create batch data for training
train_data = tf.data.Dataset.from_tensor_slices(tf.constant(mixture_abs_T))
label_data = tf.data.Dataset.from_tensor_slices(tf.constant(M.reshape(-1, num_classes)))
batch_data = tf.data.Dataset.zip((train_data, label_data)).repeat().batch(batch_size)

iterator = batch_data.make_one_shot_iterator()
next_batch = iterator.get_next()

In [261]:
# Add Padding, to 
def fit_RNN_input_dim(data):
    (row, _) = data.shape
    count = 0
    while row % time_steps != 0:
        data = np.vstack((data, np.zeros(num_input)))
        row += 1
        count += 1
    
    return data, count

In [262]:
(v_mixture_abs_T_fit, _) = fit_RNN_input_dim(v_mixture_abs_T)
(v_M_fit, _) = fit_RNN_input_dim(v_M)
print(v_mixture_abs_T_fit.shape, v_M_fit.shape)

(4312, 513) (4312, 513)


In [295]:
# Initial session
sess=tf.Session()
tf.global_variables_initializer().run(session=sess)

In [264]:
# Create checkpoint
saver = tf.train.Saver()
CHECK_POINT_FILE_NAME = "./hw3_2.ckpt"

try:
    saver.restore(sess, CHECK_POINT_FILE_NAME)
except:
    pass

In [310]:
# Training
training_steps = 10000
for i in range(training_steps+1):
    (batch_x, batch_y) = sess.run(next_batch)
    batch_x = batch_x.reshape(-1, time_steps, num_input)
#     print(batch_x.shape, batch_y.shape)
    _, loss_value = sess.run((train_op, loss), feed_dict={X: batch_x, Y: batch_y})
    if i % 1000 == 0:
        is_training = False
        print('Training Step:' + str(i) + '  Validation Loss =  ' + 
              str(sess.run(loss, feed_dict={X: v_mixture_abs_T_fit.reshape(-1, time_steps, num_input), Y: v_M_fit})) + 
              'Train Loss =  ' + str(loss_value))
        is_training = True

        

Training Step:0  Validation Loss =  0.23965575Train Loss =  0.19815406
Training Step:1000  Validation Loss =  0.24122183Train Loss =  0.23180456
Training Step:2000  Validation Loss =  0.25111005Train Loss =  0.16151479
Training Step:3000  Validation Loss =  0.243449Train Loss =  0.18703413
Training Step:4000  Validation Loss =  0.24112882Train Loss =  0.20896038
Training Step:5000  Validation Loss =  0.24442476Train Loss =  0.21313599
Training Step:6000  Validation Loss =  0.24611773Train Loss =  0.16962238
Training Step:7000  Validation Loss =  0.24323905Train Loss =  0.21467513
Training Step:8000  Validation Loss =  0.2444713Train Loss =  0.17785412
Training Step:9000  Validation Loss =  0.2453906Train Loss =  0.20531043
Training Step:10000  Validation Loss =  0.24703872Train Loss =  0.19101194


#### Using 1 BasicLSTMCell
Training Step:0  Validation Loss =  0.23965575Train Loss =  0.19815406
Training Step:1000  Validation Loss =  0.24122183Train Loss =  0.23180456
Training Step:2000  Validation Loss =  0.25111005Train Loss =  0.16151479
Training Step:3000  Validation Loss =  0.243449Train Loss =  0.18703413
Training Step:4000  Validation Loss =  0.24112882Train Loss =  0.20896038
Training Step:5000  Validation Loss =  0.24442476Train Loss =  0.21313599
Training Step:6000  Validation Loss =  0.24611773Train Loss =  0.16962238
Training Step:7000  Validation Loss =  0.24323905Train Loss =  0.21467513
Training Step:8000  Validation Loss =  0.2444713Train Loss =  0.17785412
Training Step:9000  Validation Loss =  0.2453906Train Loss =  0.20531043
Training Step:10000  Validation Loss =  0.24703872Train Loss =  0.19101194

In [266]:
saver.save(sess, CHECK_POINT_FILE_NAME)

'./hw3_2.ckpt'

In [272]:
# Get M for traiing data
def recover_train(input_mixture, mixture_abs_T):
    (mixture_fit, diff) = fit_RNN_input_dim(mixture_abs_T)
    input_x = mixture_fit.reshape(-1, time_steps, num_input)
    train_M = sess.run(Y_pred, feed_dict={X: input_x})
    print(train_M.shape)
    return train_M[:-diff] * input_mixture

In [304]:
recover_signal = recover_train(mixture_T, mixture_abs_T)
# librosa.output.write_wav("./train_recover.wav", librosa.istft(recover_signal.T, hop_length=512), sr)

(58904, 513)


In [307]:
# Recover Validaion track!
v_recover_signal = recover_train(v_mixture_T, v_mixture_abs_T)
librosa.output.write_wav("./v_recover.wav", librosa.istft(v_recover_signal.T, hop_length=512), sr)

(18616, 513)


In [306]:
# Calculate SNR for training data and validation data
print(calculateSNR(signal.T, recover_signal))
print(calculateSNR(v_signal.T, v_recover_signal))

-0.14415701616086762
-0.08665901355862538


In [309]:
librosa.output.write_wav("./v_mixture.wav", librosa.istft(v_mixture, hop_length=512), sr)