# Constants

In [165]:
inputSize = 1024

In [2]:
import numpy as np
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import SGD,Adam,RMSprop
from keras.models import Model
from keras.layers import Input, merge
from keras.layers import Conv1D,Conv2D,Conv2DTranspose,Reshape
from scipy import io
from keras.layers import LeakyReLU
from keras.layers.advanced_activations import PReLU
from keras.layers import BatchNormalization
from keras import backend as K  
import matplotlib.pyplot as plt

Using TensorFlow backend.


In [23]:
def make_trainable(model, trainable):
    model.trainable = trainable
    for l in model.layers:
        l.trainable = trainable

In [24]:
def D_loss(y_true,y_pred):
    return 0.5*K.mean((y_pred-y_true)**2,axis = -1)

In [25]:
def G_loss(fake_output,true_input):
    def lossfun(y_true, y_pred):
        return 1*K.mean(K.abs(fake_output-true_input)) 
    return lossfun

In [26]:
def GAN_loss(fake_output,true_input):
    def lossfun(y_true, y_pred):
        return 0.5*K.mean((((y_pred-y_true)**2)),axis = -1)+100*K.mean(K.abs(fake_output-true_input))
    return lossfun

In [27]:
optim =RMSprop(lr=0.00045)

# Generator Model 

In [28]:
#input is noise and input1 is clean
inputs=Input(shape =(inputSize,1,1))
inputs1=Input(shape =(inputSize,1))

In [29]:
inputSize = 1024

## Encoder is starting here

In [30]:
#64 kernels of size 31 and strides of 4
cov1=(Conv2D(64, 31, strides = 4,padding='same'))(inputs)
cov1=(PReLU())(cov1)  
print(cov1.shape)
#128 kernels of size 31 and strides of 4
cov2=(Conv2D(128, 31, strides=4,padding='same'))(cov1)
cov2=(PReLU())(cov2)
print(cov2.shape)
#256 kernels of size 31 and strides of 4
cov3=(Conv2D(256, 31, strides=4,padding='same'))(cov2)
cov3=(PReLU())(cov3)
print(cov3.shape)


(?, 256, 1, 64)
(?, 64, 1, 128)
(?, 16, 1, 256)


## Decoder is starting here

In [31]:
cov4=(Conv2DTranspose(256,31, strides=(1,1),padding='same'))(cov3)
cov4=(PReLU())(cov4)
z1 = merge([cov3,cov4], mode='sum')
cov5=(Conv2DTranspose(128,31, strides=(4,1),padding='same'))(z1)
cov5=(PReLU())(cov5)
z2=merge([cov2,cov5], mode='sum')
cov6=(Conv2DTranspose(64,31, strides=(4,1),padding='same'))(z2)
cov6=(PReLU())(cov6)
z3=merge([cov1,cov6], mode='sum')
cov7=(Conv2DTranspose(16,31, strides=(4,1),padding='same'))(z3)
cov7=(PReLU())(cov7)
cov8=(Conv2DTranspose(1,31, strides=(1,1),activation='tanh',padding='same'))(cov7)
#cov8=(PReLU())(cov8)
cov8=(Reshape((inputSize,1)))(cov8)


  This is separate from the ipykernel package so we can avoid doing imports until
  
  if __name__ == '__main__':


In [32]:
G = Model([inputs,inputs1],output = cov8)

  """Entry point for launching an IPython kernel.


In [33]:
G.compile(loss=G_loss(cov8,inputs1),optimizer=optim)

In [34]:
G.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_8 (InputLayer)             (None, 1024, 1, 1)    0                                            
____________________________________________________________________________________________________
conv2d_4 (Conv2D)                (None, 256, 1, 64)    61568       input_8[0][0]                    
____________________________________________________________________________________________________
p_re_lu_8 (PReLU)                (None, 256, 1, 64)    16384       conv2d_4[0][0]                   
____________________________________________________________________________________________________
conv2d_5 (Conv2D)                (None, 64, 1, 128)    7872640     p_re_lu_8[0][0]                  
___________________________________________________________________________________________

# Discriminator model

In [35]:
#discriminator model
inputs=Input((inputSize,1))
# encoder
#model.add(Reshape((16384,1,1),input_shape=input_shape2))
d1=(Conv1D(64, 31, strides=4,padding='same'))
d_hidden1=d1(inputs)
d2=(BatchNormalization())
d_hidden2=d2(d_hidden1)
d3=(LeakyReLU(alpha=0.3))
d_hidden3=d3(d_hidden2)
d4=(Conv1D(128, 31, strides=4,padding='same'))
d_hidden4=d4(d_hidden3)
d5=(BatchNormalization())
d_hidden5=d5(d_hidden4)
d6=(LeakyReLU(alpha=0.3))
d_hidden6=d6(d_hidden5)
d7=(Conv1D(256, 31, strides=4,padding='same'))
d_hidden7=d7(d_hidden6)
d8=(BatchNormalization())
d_hidden8=d8(d_hidden7)
d9=(LeakyReLU(alpha=0.3))
d_hidden9=d9(d_hidden8)
d10=(Conv1D(1, 31, strides=1,padding='same'))
d_hidden10=d10(d_hidden9)
d11=(BatchNormalization())
d_hidden11=d11(d_hidden10)
d12=(LeakyReLU(alpha=0.3))
d_hidden12=d12(d_hidden11)
d13=(Flatten())
d_hidden13=d13(d_hidden12)
d14=Dense(16,activation='sigmoid')   
d_output =d14(d_hidden13)

D= Model(input = inputs,output=d_output)
D.compile(loss=D_loss, optimizer=optim)
#D.compile(loss='mse', optimizer=optim)
D.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_10 (InputLayer)        (None, 1024, 1)           0         
_________________________________________________________________
conv1d_5 (Conv1D)            (None, 256, 64)           2048      
_________________________________________________________________
batch_normalization_5 (Batch (None, 256, 64)           256       
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 256, 64)           0         
_________________________________________________________________
conv1d_6 (Conv1D)            (None, 64, 128)           254080    
_________________________________________________________________
batch_normalization_6 (Batch (None, 64, 128)           512       
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 64, 128)           0         
__________



# GAN 

In [36]:
make_trainable(D,False)
inputs=Input(shape = (inputSize,1,1))
inputs1=Input(shape = (inputSize,1))
input=([inputs,inputs1])
g_output = G(input)
gan_hidden = d1(g_output)
gan_hidden = d2(gan_hidden)
gan_hidden = d3(gan_hidden)
gan_hidden = d4(gan_hidden)
gan_hidden = d5(gan_hidden)
gan_hidden = d6(gan_hidden)
gan_hidden = d7(gan_hidden)
gan_hidden = d8(gan_hidden)
gan_hidden = d9(gan_hidden)
gan_hidden = d10(gan_hidden)
gan_hidden = d11(gan_hidden)
gan_hidden = d12(gan_hidden)
gan_hidden = d13(gan_hidden)
gan_output = d14(gan_hidden)

GAN =Model([inputs,inputs1],output=gan_output)
GAN.compile(loss=GAN_loss(g_output,inputs1), optimizer=optim) 
GAN.summary()


____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_11 (InputLayer)            (None, 1024, 1, 1)    0                                            
____________________________________________________________________________________________________
input_12 (InputLayer)            (None, 1024, 1)       0                                            
____________________________________________________________________________________________________
model_5 (Model)                  (None, 1024, 1)       142840801   input_11[0][0]                   
                                                                   input_12[0][0]                   
____________________________________________________________________________________________________
conv1d_5 (Conv1D)                (None, 256, 64)       2048        model_5[1][0]           



## Load data

In [292]:
# # --------------------load data--------------------
import librosa
i = 0
import os
cleanSignal = []
noisySignal = []
inputSizeSelect = 65536
dataset_dir = "musicDataset"
noise, sr = librosa.load("concertSound.wav", duration=3)
# noise_test = noise[-22050*0.5:]
# print(noiseSplit[0])
# import random
# random.shuffle(noiseSplit)
# print(noiseSplit[0])
noise_permut = noise[0:65536]
noise_permut = np.split(noise_permut, len(noise)/1024)
print(noise_permut[0])
print(np.array(noise_permut).shape)
t = np.sum(noise_permut[0])
#from random import shuffle
#shuffle(noise_permut)
# noise_permut = np.concatenate((noise[22050:44100], noise[0:22050]), axis = 0)
# print(len(noise_permut))
#noisebig,sr = librosa.load("concertSound.wav", duration=4.0)

for filename in os.listdir(dataset_dir):
     if "classical" not in filename: 
            if "jazz" not in filename:
                continue
     y, sr = librosa.load(dataset_dir+"//" + filename, duration=3)
     
     yArr = np.array(y[0:inputSizeSelect])
     ySplit = np.split(yArr, len(yArr)/1024)
     
#      shuffle(noise_permut)
     noise = np.concatenate(noise_permut)
#      print(noise)
     y = y[0:65536]
     y1 = y + (noise)*0.32
#      print(len(y1))
     y1Arr = np.array(y1[0:inputSizeSelect])
     y1Split = np.split(y1Arr, len(y1Arr)/1024)
     cleanSignal = cleanSignal + ySplit
     noisySignal = noisySignal + y1Split
#      print(y.shape)
#      noisySignal.append(y1[0:inputSize])
cleanSignal = np.array(cleanSignal)
noisySignal = np.array(noisySignal)

print(cleanSignal.shape)
print(noisySignal.shape)

clean = cleanSignal
noisy = noisySignal
#noisy=clean+1.5*noise



[  2.04263620e-06   1.34013926e-05   1.37722247e-06 ...,  -5.29842218e-03
  -4.95509384e-03  -4.66532260e-03]
(64, 1024)
(12800, 1024)
(12800, 1024)


In [248]:
# # --------------------Main Code--------------------
batch_size=128
n_epochs = 200
n_minibatches = int(noisy.shape[0]/batch_size)

#----------------------  load data ------------


In [94]:
for i in range(n_epochs):
        print ('Epoch:', i+1)
        for index in range(n_minibatches):
            if(index%32 ==0):
                print("minibatch:" , index)
            noisy_batch = noisy[index*batch_size:(index+1)*batch_size]
            real_batch = clean[index*batch_size:(index+1)*batch_size]
            
            noisy_batch=np.reshape(noisy_batch,(batch_size,inputSize,1,1))
            real_batch=np.reshape(real_batch,(batch_size,inputSize,1))
            
            combined_G_batch=([noisy_batch,real_batch])
            
            fake_batch = G.predict(combined_G_batch)
            
            fake_batch=np.reshape(fake_batch,(batch_size,inputSize))
            real_batch=np.reshape(real_batch,(batch_size,inputSize))
            
            combined_X_batch = np.concatenate((real_batch, fake_batch))
            one_label=np.ones([batch_size, 16])
            zero_label=np.zeros([batch_size, 16])
            combined_y_batch =np.vstack((one_label,zero_label))

            make_trainable(D,True)
            combined_X_batch=np.reshape(combined_X_batch,(2*batch_size,inputSize,1))
            d_loss = D.train_on_batch(combined_X_batch, combined_y_batch)
            
            
            make_trainable(D,False)
            g_loss = GAN.train_on_batch(combined_G_batch,one_label)
            
#         print('--------------------enhanced speech Generated!--------------')
        print('--------------------Discriminator trained!------------------')
        print(d_loss)
        print('--------------------GAN trained!----------------------------')
        print(g_loss)
        if n_epochs%10 == 0 : 
            G.save_weights('cnn_generator_weights_latest_.h5')
        
        
G.save_weights('cnn_generator_weights_2' + str(n_epochs)+'_.h5')

('Epoch:', 1)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
--------------------Discriminator trained!------------------
0.125445
--------------------GAN trained!----------------------------
0.324499
('Epoch:', 2)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
--------------------Discriminator trained!------------------
0.125071
--------------------GAN trained!----------------------------
0.307626
('Epoch:', 3)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
--------------------Discriminator trained!------------------
0.125044
--------------------GAN trained!----------------------------
0.292135
('Epoch:', 4)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:', 96)
--------------------Discriminator trained!------------------
0.125018
--------------------GAN trained!----------------------------
0.289821
('Epoch:', 5)
('minibatch:', 0)
('minibatch:', 32)
('minibatch:', 64)
('minibatch:',

KeyboardInterrupt: 

# Start Training

##### Sanity check in the values : by comparing the difference between fake and real vs noise and real

In [249]:
fake_batch = G.predict(combined_G_batch)

In [250]:
a = np.sum(abs(fake_batch[3] - real_batch[3]))

In [251]:
b = np.sum(abs(fake_batch[3] - noisy_batch[3]))

In [252]:
print(a)

66686.7


In [253]:
print(b)

76573.3


# Test time

In [294]:
# G.load_weights("./cnn_generator_weights_latest_.h5")

In [295]:
ytest, srtest = librosa.load(dataset_dir + "//classical.00089.au", duration=3.0)

In [312]:
i = 0
step = 1024
inp = []
y2 = []
noise_test1, sr = librosa.load('concertSound.wav', duration=6.0)
noise = noise_test1[3*22050:]
b = noise*0.32
#b = np.concatenate((b[33075:66150],b[0:33075]), 0)
b = np.array(b)
print(len(b))

u = int(len(y)/step)
while i<=u-1:
    inp.append(ytest[i*step: i*step +step])
    c = np.array(inp[i])
    #c = np.array(inp[i]) + b
    y2.append(c + b[i*step: i*step +step])
    #y2.append(c + b[10241: 10241+1024])
    i+=1
print(len(c))
len(c.shape)

66150
1024


1

In [313]:
print(np.array(y2).shape[0])

64


In [314]:
size = np.array(y2).shape[0]
#fake_batch = G.predict(combined_G_batch)
y2=np.reshape(np.array(y2),(size,inputSize,1,1))
inp=np.reshape(np.array(inp),(size,inputSize,1))

x=([y2,inp])

fake_batch = G.predict(x)

In [315]:
noise_test = np.concatenate(y2, axis = 0)

# for y in y2:
#      np.hstack((noise_test, y)
noise_test.shape
# librosa.output.write_wav('file_trim_2s.wav', y2, sr)

(65536, 1, 1)

In [316]:
truth_Test = np.concatenate(fake_batch, axis = 0)

In [317]:
size_test = truth_Test.shape[0]

In [318]:
test_n=np.reshape(np.array(noise_test),(size_test,1))
test_t=np.reshape(np.array(truth_Test),(size_test,1))
test_t1=np.reshape(np.array(truth_Test),(size_test,1))
test_or=np.reshape(np.array(ytest[:65536]),(size_test,1))
#print(test_n)
#print(test_t)
m = []
c = 0
#print(len(test_t))
index = 0
for i,j,k in zip(test_t,test_n,test_or):
    index+=1
    if(abs(i-k) > 0.08):
        print("Predicted " + str(i))
        print("Noisy " + str(j))
        print("original " + str(k))
        print(index)
        print("====")
    if((i-j) < 0):
        c+=1
    m.append(abs(i-j))
m.sort()
#print(m[-10: -1])
#print(c)

Predicted [-0.16411793]
Noisy [-0.16362399]
original [-0.04223633]
1978
====
Predicted [-0.14354229]
Noisy [-0.14281788]
original [-0.0607605]
1979
====
Predicted [-0.1755217]
Noisy [-0.1834335]
original [-0.08685303]
2112
====
Predicted [-0.19022392]
Noisy [-0.19979031]
original [-0.09289551]
2113
====
Predicted [-0.09527602]
Noisy [-0.0971425]
original [-0.00869751]
2810
====
Predicted [-0.12967873]
Noisy [-0.13175106]
original [-0.04058838]
2811
====
Predicted [-0.14871959]
Noisy [-0.16488639]
original [-0.06466675]
3378
====
Predicted [-0.11616827]
Noisy [-0.12944907]
original [-0.03518677]
3379
====
Predicted [-0.15509935]
Noisy [-0.15549316]
original [-0.06890869]
3599
====
Predicted [-0.15742457]
Noisy [-0.15738091]
original [-0.06280518]
3600
====
Predicted [-0.13951778]
Noisy [-0.13974342]
original [-0.04956055]
3601
====
Predicted [ 0.09522328]
Noisy [ 0.09265465]
original [ 0.01373291]
3833
====
Predicted [-0.08079483]
Noisy [-0.08122978]
original [ 0.00418091]
4431
====
Pre

26918
====
Predicted [-0.1316037]
Noisy [-0.14924906]
original [-0.03115845]
26919
====
Predicted [-0.20513135]
Noisy [-0.22351828]
original [-0.1003418]
26920
====
Predicted [-0.25679505]
Noisy [-0.27717838]
original [-0.1491394]
26921
====
Predicted [-0.24244155]
Noisy [-0.26291692]
original [-0.13330078]
26922
====
Predicted [-0.21706864]
Noisy [-0.23268352]
original [-0.10287476]
26923
====
Predicted [-0.20432112]
Noisy [-0.21855479]
original [-0.09197998]
26924
====
Predicted [-0.16359617]
Noisy [-0.17852223]
original [-0.05975342]
26925
====
Predicted [-0.10837749]
Noisy [-0.12126592]
original [-0.01345825]
26926
====
Predicted [-0.05751099]
Noisy [-0.06919018]
original [ 0.02420044]
26927
====
Predicted [-0.01468418]
Noisy [-0.02675666]
original [ 0.06835938]
26937
====
Predicted [-0.03204634]
Noisy [-0.04497521]
original [ 0.05700684]
26938
====
Predicted [-0.05035821]
Noisy [-0.06368348]
original [ 0.04364014]
26939
====
Predicted [-0.09729689]
Noisy [-0.11141121]
original [-0

original [-0.00390625]
34027
====
Predicted [ 0.12917545]
Noisy [ 0.13099283]
original [ 0.03356934]
39265
====
Predicted [-0.14063394]
Noisy [-0.14271635]
original [-0.04629517]
44687
====


In [319]:
librosa.output.write_wav('DiffNoiseGanBefore40.wav', test_n , sr)
librosa.output.write_wav('DiffNoiseGanAfter40.wav', test_t , sr)

#test_t1 = test_t + [-0.002] *  len(test_t)
print(len(test_n))
np.sum(test_n) - np.sum(test_t) 

65536


-111.03288

In [320]:
test_t1 = test_t
test_t1 = test_t1-0.00321


In [321]:
#test_t1 = test_t +k
np.sum(test_or)

-73.763367

In [322]:
np.sum(np.array(test_t1))

-185.24202

In [323]:
#print(len(test_t1))
librosa.output.write_wav('DiffNoiseGanAfter40.wav', np.array(test_t) , sr)

import IPython.display as ipd

In [324]:
print(len(test_or))
#test_or = test_or[0:32000]
ipd.Audio(test_or[:,0], rate=sr)

65536


In [325]:
#test_n = test_n[0:32000]
ipd.Audio(test_n[:,0], rate=sr)

In [326]:
#test_t = test_t[0:32000]
ipd.Audio(test_t[:,0], rate=sr)

In [327]:
ipd.Audio(noise, rate=sr)