# Constants

In [2]:
inputSize = 128

In [3]:
import numpy as np
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import SGD,Adam,RMSprop
from keras.models import Model
from keras.layers import Input, merge
from keras.layers import Conv1D,Conv2D,Conv2DTranspose,Reshape
from scipy import io
from keras.layers import LeakyReLU
from keras.layers.advanced_activations import PReLU
from keras.layers import BatchNormalization
from keras import backend as K  
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [4]:
def make_trainable(model, trainable):
    model.trainable = trainable
    for l in model.layers:
        l.trainable = trainable

In [5]:
def D_loss(y_true,y_pred):
    return 0.5*K.mean((y_pred-y_true)**2,axis = -1)

In [6]:
def G_loss(fake_output,true_input):
    def lossfun(y_true, y_pred):
        return 1*K.mean(K.abs(fake_output-true_input)) 
    return lossfun

In [7]:
def GAN_loss(fake_output,true_input):
    def lossfun(y_true, y_pred):
        return 0.5*K.mean((((y_pred-y_true)**2)),axis = -1)+100*K.mean(K.abs(fake_output-true_input))
    return lossfun

In [8]:
optim =RMSprop(lr=0.0002)

# Generator Model 

In [9]:
#input is noise and input1 is clean
inputs=Input(shape =(inputSize,1,1))
inputs1=Input(shape =(inputSize,1))

In [10]:
inputSize = 128

## Encoder is starting here

In [11]:
#64 kernels of size 31 and strides of 4
cov1=(Conv2D(64, 31, strides = 4,padding='same'))(inputs)
cov1=(PReLU())(cov1)  
print(cov1.shape)
#128 kernels of size 31 and strides of 4
cov2=(Conv2D(128, 31, strides=4,padding='same'))(cov1)
cov2=(PReLU())(cov2)
print(cov2.shape)
#256 kernels of size 31 and strides of 4
cov3=(Conv2D(256, 31, strides=4,padding='same'))(cov2)
cov3=(PReLU())(cov3)
print(cov3.shape)


(?, 32, 1, 64)
(?, 8, 1, 128)
(?, 2, 1, 256)


## Decoder is starting here

In [12]:
cov4=(Conv2DTranspose(256,31, strides=(1,1),padding='same'))(cov3)
cov4=(PReLU())(cov4)
z1 = merge([cov3,cov4], mode='sum')
cov5=(Conv2DTranspose(128,31, strides=(4,1),padding='same'))(z1)
cov5=(PReLU())(cov5)
z2=merge([cov2,cov5], mode='sum')
cov6=(Conv2DTranspose(64,31, strides=(4,1),padding='same'))(z2)
cov6=(PReLU())(cov6)
z3=merge([cov1,cov6], mode='sum')
cov7=(Conv2DTranspose(16,31, strides=(4,1),padding='same'))(z3)
cov7=(PReLU())(cov7)
cov8=(Conv2DTranspose(1,31, strides=(1,1),activation='tanh',padding='same'))(cov7)
#cov8=(PReLU())(cov8)
cov8=(Reshape((inputSize,1)))(cov8)


  This is separate from the ipykernel package so we can avoid doing imports until
  name=name)
  
  if __name__ == '__main__':


In [13]:
G = Model([inputs,inputs1],output = cov8)

  """Entry point for launching an IPython kernel.


In [14]:
G.compile(loss=G_loss(cov8,inputs1),optimizer=optim)

In [15]:
G.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 128, 1, 1)     0                                            
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 32, 1, 64)     61568       input_1[0][0]                    
____________________________________________________________________________________________________
p_re_lu_1 (PReLU)                (None, 32, 1, 64)     2048        conv2d_1[0][0]                   
____________________________________________________________________________________________________
conv2d_2 (Conv2D)                (None, 8, 1, 128)     7872640     p_re_lu_1[0][0]                  
___________________________________________________________________________________________

# Discriminator model

In [16]:
#discriminator model
inputs=Input((inputSize,1))
# encoder
#model.add(Reshape((16384,1,1),input_shape=input_shape2))
d1=(Conv1D(64, 31, strides=4,padding='same'))
d_hidden1=d1(inputs)
d2=(BatchNormalization())
d_hidden2=d2(d_hidden1)
d3=(LeakyReLU(alpha=0.3))
d_hidden3=d3(d_hidden2)
d4=(Conv1D(128, 31, strides=4,padding='same'))
d_hidden4=d4(d_hidden3)
d5=(BatchNormalization())
d_hidden5=d5(d_hidden4)
d6=(LeakyReLU(alpha=0.3))
d_hidden6=d6(d_hidden5)
d7=(Conv1D(256, 31, strides=4,padding='same'))
d_hidden7=d7(d_hidden6)
d8=(BatchNormalization())
d_hidden8=d8(d_hidden7)
d9=(LeakyReLU(alpha=0.3))
d_hidden9=d9(d_hidden8)
d10=(Conv1D(1, 31, strides=1,padding='same'))
d_hidden10=d10(d_hidden9)
d11=(BatchNormalization())
d_hidden11=d11(d_hidden10)
d12=(LeakyReLU(alpha=0.3))
d_hidden12=d12(d_hidden11)
d13=(Flatten())
d_hidden13=d13(d_hidden12)
d14=Dense(16,activation='sigmoid')   
d_output =d14(d_hidden13)

D= Model(input = inputs,output=d_output)
D.compile(loss=D_loss, optimizer=optim)
#D.compile(loss='mse', optimizer=optim)
D.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 128, 1)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 32, 64)            2048      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 64)            256       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 32, 64)            0         
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 8, 128)            254080    
_________________________________________________________________
batch_normalization_2 (Batch (None, 8, 128)            512       
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 8, 128)            0         
__________



# GAN 

In [17]:
make_trainable(D,False)
inputs=Input(shape = (inputSize,1,1))
inputs1=Input(shape = (inputSize,1))
input=([inputs,inputs1])
g_output = G(input)
gan_hidden = d1(g_output)
gan_hidden = d2(gan_hidden)
gan_hidden = d3(gan_hidden)
gan_hidden = d4(gan_hidden)
gan_hidden = d5(gan_hidden)
gan_hidden = d6(gan_hidden)
gan_hidden = d7(gan_hidden)
gan_hidden = d8(gan_hidden)
gan_hidden = d9(gan_hidden)
gan_hidden = d10(gan_hidden)
gan_hidden = d11(gan_hidden)
gan_hidden = d12(gan_hidden)
gan_hidden = d13(gan_hidden)
gan_output = d14(gan_hidden)

GAN =Model([inputs,inputs1],output=gan_output)
GAN.compile(loss=GAN_loss(g_output,inputs1), optimizer=optim) 
GAN.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_4 (InputLayer)             (None, 128, 1, 1)     0                                            
____________________________________________________________________________________________________
input_5 (InputLayer)             (None, 128, 1)        0                                            
____________________________________________________________________________________________________
model_1 (Model)                  (None, 128, 1)        142776289   input_4[0][0]                    
                                                                   input_5[0][0]                    
____________________________________________________________________________________________________
conv1d_1 (Conv1D)                (None, 32, 64)        2048        model_1[1][0]           



## Load data

In [32]:
# # --------------------load data--------------------
import librosa
i = 0
import os
cleanSignal = []
noisySignal = []
inputSizeSelect = 4096
dataset_dir = "musicDataset"
noise, sr = librosa.load("noise.wav", duration=2.0)
for filename in os.listdir(dataset_dir):
     y, sr = librosa.load(dataset_dir+"//" + filename, duration=2.0)
     yArr = np.array(y[0:inputSizeSelect])
     ySplit = np.split(yArr, len(yArr)/128)
#      print(noise)
     y1 = y + noise
#      print(len(y1))
     y1Arr = np.array(y1[0:inputSizeSelect])
     y1Split = np.split(y1Arr, len(y1Arr)/128)
     cleanSignal = cleanSignal + ySplit
     noisySignal = noisySignal + y1Split
#      print(y.shape)
#      noisySignal.append(y1[0:inputSize])
cleanSignal = np.array(cleanSignal)
noisySignal = np.array(noisySignal)

print(cleanSignal.shape)
print(noisySignal.shape)

clean = cleanSignal
noisy = noisySignal
#noisy=clean+1.5*noise



(32000, 128)
(32000, 128)


In [37]:
# # --------------------Main Code--------------------
batch_size=128
n_epochs = 20
n_minibatches = int(noisy.shape[0]/batch_size)

#----------------------  load data ------------


# Start Training

In [38]:
for i in range(n_epochs):
        print ('Epoch:', i+1)
        for index in range(n_minibatches):
            if(index%32 ==0):
                print("minibatch:" , index)
            noisy_batch = noisy[index*batch_size:(index+1)*batch_size]
            real_batch = clean[index*batch_size:(index+1)*batch_size]
            
            noisy_batch=np.reshape(noisy_batch,(batch_size,inputSize,1,1))
            real_batch=np.reshape(real_batch,(batch_size,inputSize,1))
            
            combined_G_batch=([noisy_batch,real_batch])
            
            fake_batch = G.predict(combined_G_batch)
            
            fake_batch=np.reshape(fake_batch,(batch_size,inputSize))
            real_batch=np.reshape(real_batch,(batch_size,inputSize))
            
            combined_X_batch = np.concatenate((real_batch, fake_batch))
            one_label=np.ones([batch_size, 16])
            zero_label=np.zeros([batch_size, 16])
            combined_y_batch =np.vstack((one_label,zero_label))

            make_trainable(D,True)
            combined_X_batch=np.reshape(combined_X_batch,(2*batch_size,inputSize,1))
            d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
            
            
            make_trainable(D,False)
            g_loss = GAN.train_on_batch(combined_G_batch,one_label)
            
#         print('--------------------enhanced speech Generated!--------------')
        print('--------------------Discriminator trained!------------------')
        print(d_loss)
        print('--------------------GAN trained!----------------------------')
        print(g_loss)
        
        if(i%10 ==0):
            G.save_weights('cnn_generator_weights_' + str(n_epochs)+'_.h5')

('Epoch:', 1)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
('minibatch:', 128)
('minibatch:', 160)
('minibatch:', 192)
('minibatch:', 224)
--------------------Discriminator trained!------------------
0.0420586
--------------------GAN trained!----------------------------
1.07937
('Epoch:', 2)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
('minibatch:', 128)
('minibatch:', 160)
('minibatch:', 192)
('minibatch:', 224)
--------------------Discriminator trained!------------------
0.0657632
--------------------GAN trained!----------------------------
1.0181
('Epoch:', 3)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
('minibatch:', 128)
('minibatch:', 160)
('minibatch:', 192)
('minibatch:', 224)
--------------------Discriminator trained!------------------
0.0990078
--------------------GAN trained!----------------------------
0.919006
('Epoch:', 4)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('m

# Sanity check in the values : by comparing the difference between fake and real vs noise and real

In [39]:
fake_batch = G.predict(combined_G_batch)

In [40]:
a = np.sum(abs(fake_batch[3] - real_batch[3]))

In [47]:
b = np.sum(abs(noise[0:128]))

In [48]:
print(a/batch_size)

8.7193813324


In [49]:
print(b)

4.20816


# Test time

In [51]:
ytest, srtest = librosa.load(dataset_dir + "//blues.00000.au", duration=8.0)

In [61]:
i = 0
step = 128
inp = []
y2 = []
b = noise[128:256]
b = np.array(b)
u = int(len(y)/128)
while i<=u-1:
    inp.append(ytest[i*step: i*step +step])
    c = np.array(inp[i])
    #c = np.array(inp[i]) + b
    y2.append(c + b)
    i+=1
print(len(c))
len(c.shape)

128


1

In [62]:
print(np.array(y2).shape[0])

344


In [63]:
size = np.array(y2).shape[0]
#fake_batch = G.predict(combined_G_batch)

y2=np.reshape(np.array(y2),(size,inputSize,1,1))
inp=np.reshape(np.array(inp),(size,inputSize,1))

x=([y2,inp])

fake_batch = G.predict(x)

In [64]:
noise_test = np.concatenate(y2, axis = 0)

# for y in y2:
#      np.hstack((noise_test, y)
noise_test.shape
# librosa.output.write_wav('file_trim_2s.wav', y2, sr)

(44032, 1, 1)

In [65]:
truth_Test = np.concatenate(fake_batch, axis = 0)

In [66]:
size_test = truth_Test.shape[0]

In [67]:
test_n=np.reshape(np.array(noise_test),(size_test,1))
test_t=np.reshape(np.array(truth_Test),(size_test,1))

In [68]:
librosa.output.write_wav('DiffNoiseGanBefore.wav', test_n , sr)
librosa.output.write_wav('DiffNoiseGanAfter.wav', test_t , sr)