# ENGR-E 533: Deep Learning Systems
## Homework 1

### Khandokar Md. Nayem (knayem@iu.edu)
### Mar 7, 2018

### Import necessary files and set environment parameters
My assigned Node is `r-006` and GPU `1`.

In [98]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"

import tensorflow as tf
import numpy as np
import librosa

### Importing MNIST dataset

In [99]:
# Noisy -> Input Data
sn, sr=librosa.load('train_dirty_male.wav', sr=None)
X=librosa.stft(sn, n_fft=1024, hop_length=512)
X_mag = np.abs(X)

# Clean -> Label
s, sr=librosa.load('train_clean_male.wav', sr=None)
S=librosa.stft(s, n_fft=1024, hop_length=512)
S_mag = np.abs(S)

In [100]:
def next_batch(X,Y, batch_size):
    num_samples, _ = X.shape
    
    selected_indics = np.random.randint(num_samples, size=batch_size)
#     print(selected_indics)
    return X[selected_indics], Y[selected_indics]
    
    
def next_batch_2(X,Y, batch_size):
    num_samples, _ = X.shape
    
    selected_indics = np.random.randint(num_samples-batch_size)
#     print(selected_indics)
    return X[selected_indics:selected_indics+batch_size], Y[selected_indics:selected_indics+batch_size]

In [114]:
# x,y = next_batch_2(X_mag.T, S_mag.T, 128)
# print(x.shape, y.shape)

NUM_ITERATION = 1000
BATCH_SIZE = 64
# Small epsilon value for the BN transform
epsilon = 1e-3

### Xavier Initialization of Weights
These are the weight initialization function used in defining model.

In [102]:
def weight_variable (shape):
    initial = tf.truncated_normal(shape, stddev = np.sqrt(2.0/sum(shape)) )
    return tf.Variable(initial)

def bias_variable (shape):
    initial = tf.truncated_normal(shape, stddev = np.sqrt(1.0/sum(shape)) )
    return tf.Variable(initial)

### Create the fully connected model 

In [50]:
x = tf.placeholder(tf.float32, [None, 513]) 

W_1 = weight_variable([513, 1024])
b_1 = bias_variable([1024])

W_2 = weight_variable([1024, 1024])
b_2 = bias_variable([1024])

W_3 = weight_variable([1024, 1024])
b_3 = bias_variable([1024])

W_4 = weight_variable([1024, 1024])
b_4 = bias_variable([1024])

W_5 = weight_variable([1024, 513])
b_5 = bias_variable([513])

y_ = tf.placeholder(tf.float32, [None, 513]) # original


# Layer connections and Activation functions
y_1 = tf.nn.relu(tf.matmul(x, W_1) + b_1)
y_2 = tf.nn.relu(tf.matmul(y_1, W_2) + b_2)
y_3 = tf.nn.relu(tf.matmul(y_2, W_3) + b_3)
y_4 = tf.nn.relu(tf.matmul(y_3, W_4) + b_4)
y =  tf.nn.relu(tf.matmul(y_4, W_5) + b_5) # predicted


# Define loss and optimizer
mse = tf.reduce_sum( tf.losses.mean_squared_error(labels=y_, predictions=y) )
# mse = -tf.reduce_sum(y_*tf.log(y))
train_step = tf.train.AdamOptimizer().minimize(mse)

### Run the model
If we increase the number of iteration (Line 10, currently 1000), `Accuracy` increases; but more time is needed to train.

In [51]:
# Configuration to control GPU use
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.33
sess = tf.InteractiveSession(config=config)

tf.global_variables_initializer().run()


# Train Model
for _ in range(NUM_ITERATION):
#     batch_xs, batch_ys = next_batch(X_mag.T,S_mag.T, BATCH_SIZE)
#     sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
    batch_xs, batch_ys = next_batch_2(X_mag.T,S_mag.T, BATCH_SIZE)
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
#     sess.run(train_step, feed_dict={x: X_mag.T, y_: S_mag.T})

In [52]:
# Load Test data-1
sn, sr=librosa.load('test_x_01.wav', sr=None)
X_test_01=librosa.stft(sn, n_fft=1024, hop_length=512)
X_mag_test_01 = np.abs(X_test_01)

# Load Test data-2
sn, sr=librosa.load('test_x_02.wav', sr=None)
X_test_02=librosa.stft(sn, n_fft=1024, hop_length=512)
X_mag_test_02 = np.abs(X_test_02)

print(X_mag_test_01.shape)

(513, 142)


In [53]:
# Test model-1
S_hat_mag_test_01=sess.run(y, feed_dict={x: X_mag_test_01.T})
S_hat_test_01=(X_test_01/X_mag_test_01)*S_hat_mag_test_01.T
S_hat_01=librosa.istft(S_hat_test_01, hop_length=512)
librosa.output.write_wav('test_s_01_recons.wav', S_hat_01, sr)

# Test model-2
S_hat_mag_test_02=sess.run(y, feed_dict={x: X_mag_test_02.T})
S_hat_test_02=(X_test_02/X_mag_test_02)*S_hat_mag_test_02.T
S_hat_02=librosa.istft(S_hat_test_02, hop_length=512)
librosa.output.write_wav('test_s_02_recons.wav', S_hat_02, sr)

### Batch Norm 

In [103]:
def batch_norm_wrapper(inputs, is_training, decay = 0.999):

    scale = tf.Variable(tf.ones([inputs.get_shape()[-1]]))
    beta = tf.Variable(tf.zeros([inputs.get_shape()[-1]]))
    pop_mean = tf.Variable(tf.zeros([inputs.get_shape()[-1]]), trainable=False)
    pop_var = tf.Variable(tf.ones([inputs.get_shape()[-1]]), trainable=False)

    if is_training:
        batch_mean, batch_var = tf.nn.moments(inputs,[0])
        train_mean = tf.assign(pop_mean,
                               pop_mean * decay + batch_mean * (1 - decay))
        train_var = tf.assign(pop_var,
                              pop_var * decay + batch_var * (1 - decay))
        
        with tf.control_dependencies([train_mean, train_var]):
            return tf.nn.batch_normalization(inputs,
                batch_mean, batch_var, beta, scale, epsilon)
    else:
        return tf.nn.batch_normalization(inputs,
            pop_mean, pop_var, beta, scale, epsilon)

In [108]:
def build_graph(is_training):
    # Placeholders
    x = tf.placeholder(tf.float32, shape=[None, 513])
    y_ = tf.placeholder(tf.float32, shape=[None, 513])

    # Layer 1
    w1 = weight_variable([513, 1024])
    z1 = tf.matmul(x,w1)
    bn1 = batch_norm_wrapper(z1, is_training)
    l1 = tf.nn.relu(bn1)

    #Layer 2
    w2 = weight_variable([1024, 1024])
    z2 = tf.matmul(l1,w2)
    bn2 = batch_norm_wrapper(z2, is_training)
    l2 = tf.nn.relu(bn2)
    
#     #Layer 3
#     w4 = weight_variable([1024, 1024])
#     z4 = tf.matmul(l2,w4)
#     bn4 = batch_norm_wrapper(z4, is_training)
#     l3 = tf.nn.relu(bn4)

    # relu
    w3 = weight_variable([1024, 513])
    b3 = bias_variable([513])
    y  = tf.nn.relu(tf.matmul(l2, w3)+b3)

    # Loss, Optimizer and Predictions
#     cross_entropy = -tf.reduce_sum(y_*tf.log(y))
    cross_entropy = tf.reduce_sum( tf.losses.mean_squared_error(labels=y_, predictions=y) )

#     train_step = tf.train.GradientDescentOptimizer(0.01).minimize(cross_entropy)
    train_step = tf.train.AdamOptimizer(0.01).minimize(cross_entropy)

    return (x, y_), train_step, y, tf.train.Saver()

In [115]:
sess.close()
tf.reset_default_graph()

(x, y_), train_step, y_hat,saver = build_graph(is_training=True)

# Configuration to control GPU use
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.33
sess = tf.InteractiveSession(config=config)

tf.global_variables_initializer().run()

# Train Model
for _ in range(NUM_ITERATION):
    batch_xs, batch_ys = next_batch_2(X_mag.T,S_mag.T, BATCH_SIZE)
#     train_step.run(feed_dict={x: batch_xs, y_: batch_ys})
    sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys})
    
saved_model = saver.save(sess, './temp-bn-save') 
    
    
    

In [116]:
sess.close()
tf.reset_default_graph()

(x, y_), train_step, y_hat, saver = build_graph(is_training=False)

# Configuration to control GPU use
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.33
sess = tf.InteractiveSession(config=config)

tf.global_variables_initializer().run()
saver.restore(sess, './temp-bn-save')


# Load Test data-1
sn, sr=librosa.load('test_x_01.wav', sr=None)
X_test_01=librosa.stft(sn, n_fft=1024, hop_length=512)
X_mag_test_01 = np.abs(X_test_01)

# Load Test data-2
sn, sr=librosa.load('test_x_02.wav', sr=None)
X_test_02=librosa.stft(sn, n_fft=1024, hop_length=512)
X_mag_test_02 = np.abs(X_test_02)

# Test model-1
S_hat_mag_test_01=sess.run(y_hat, feed_dict={x: X_mag_test_01.T})
S_hat_test_01=(X_test_01/X_mag_test_01)*S_hat_mag_test_01.T
S_hat_01=librosa.istft(S_hat_test_01, hop_length=512)
librosa.output.write_wav('test_s_01_recons.wav', S_hat_01, sr)

# Test model-2
S_hat_mag_test_02=sess.run(y_hat, feed_dict={x: X_mag_test_02.T})
S_hat_test_02=(X_test_02/X_mag_test_02)*S_hat_mag_test_02.T
S_hat_02=librosa.istft(S_hat_test_02, hop_length=512)
librosa.output.write_wav('test_s_02_recons.wav', S_hat_02, sr)

INFO:tensorflow:Restoring parameters from ./temp-bn-save
