In [None]:
import os
from pathlib import Path
import numpy as np

with open('X_train_original.npy', 'rb') as f:
    X_train_org = np.load(f)

    
with open('X_train_reverb_random.npy', 'rb') as f:
    X_train_reverb = np.load(f)
    
with open('X_test_reverb_random.npy', 'rb') as f:
    X_test_reverb = np.load(f)


In [None]:
def normalize_data(data):
    data_min = np.min(data)
    data_max = np.max(data)
    data_norm = (data - data_min)/(data_max - data_min)
    return data_norm

X_train_org_norm = normalize_data(X_train_org)
X_train_reverb_norm = normalize_data(X_train_reverb)
X_test_reverb_norm = normalize_data(X_test_reverb)


In [None]:
import matplotlib.pyplot as plt

plt.plot(X_train_org_norm[0])
plt.show()

print(X_train_org_norm.shape)

In [None]:
import tensorflow as tf
from tensorflow.keras import optimizers, initializers, regularizers
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5*1024)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Virtual devices must be set before GPUs have been initialized
        print(e)

def prepare_vnet_encoder(config, inputs, trainable=True):
    prev_output = inputs
    regularizer = regularizers.l2(0.001)
    save_layers = []
    
    
    for i in range(len(config)):
        layer_prev_output = prev_output
        for j in range(config[i][1]):
            c = layers.Conv1D(config[i][0], 100, kernel_initializer='he_normal',
                              trainable=trainable, kernel_regularizer=regularizer,
                              padding='same', name='dw_c{}_{}'.format(i, j))(prev_output)
            
            # skip connection
            if j == config[i][1] - 1 and i != 0:
                c = layers.Add()([c, layer_prev_output])
              
            c = layers.Activation('relu')(c)
            c = tf.keras.layers.BatchNormalization()(c)
                
            prev_output = c
        
        # down convolution
        if i != len(config) -1:
            save_layers.append(c)
            c = layers.Conv1D(config[i][0] * 2, 2, strides=2, kernel_initializer='he_normal',
                              trainable=trainable, kernel_regularizer=regularizer, activation='elu',
                              padding='same', name='p_cd{}_{}'.format(i, j))(c)
            c = tf.keras.layers.BatchNormalization()(c)
            prev_output = c
    
    return prev_output, save_layers
        
def prepare_vnet_decoder(config, inputs, skip_layers, trainable=True):
    prev_output = inputs
    regularizer = regularizers.l2(0.001)
    
    for i in range(len(config)):
        # up deconvolution
        u = layers.Conv1DTranspose(config[i][0], 2, strides=2, activation='elu',
                                   kernel_initializer='he_normal', trainable=trainable,
                                   padding='same', name='up_u{}_1'.format(i))(prev_output)
        u = tf.keras.layers.BatchNormalization()(u)
        layer_prev_output = u
        prev_output = u        
        for j in range(config[i][1]):
            # long skip connection
            if j == 0:
                c = layers.concatenate([prev_output, skip_layers[-(i+1)]])
            else:
                c = prev_output

            c = layers.Conv1D(config[i][0], 100, kernel_initializer='he_normal', trainable=trainable,
                            kernel_regularizer=regularizer, padding='same', name='up_cd{}_{}'.format(i, j))(c)
            
            # short skip connection
            if j == config[i][1] -1:
                c = layers.Add()([c, layer_prev_output])
            
            c = layers.Activation('elu')(c)
            c = tf.keras.layers.BatchNormalization()(c)
                
        prev_output = c
        
    return prev_output
        
def prepare_vnet(config, weight_path=None):
    input_shape = config['input_size']
    output_size = config['output_size']
    loss_function = config['loss_function']
    

    #Build the model
    inputs = tf.keras.layers.Input(input_shape)
    
    encoder_config = [[16, 1], [32, 2], [64, 3], [128, 3], [256, 3]]
    encoder, skip_layers = prepare_vnet_encoder(encoder_config, inputs)
    
    decoder_config = [[128, 3], [64, 3], [32, 2], [16, 1]]
    decoder = prepare_vnet_decoder(decoder_config, encoder, skip_layers)

    outputs = tf.keras.layers.Conv1D(1, 1)(decoder)
    full_model = keras.Model(inputs=inputs,
                             outputs=outputs)

#     if weight_path is not None:
#         full_model.load_weights(weight_path)

#     full_model.compile(optimizer='adam',
#                        loss=loss_function,
#                        metrics=['mae'])
    return full_model

size = 16000
test_config={
    "input_size": (size, 1),
    "output_size": (size, 1),
    "loss_function": 'mse'
}


model = None
model = prepare_vnet(test_config)
print(model.summary())

In [None]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Input
from tensorflow.keras import Model

def dice_coef(y_true, y_pred, smooth=2e-126):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(K.square(y_true_f)) + K.sum(K.square(y_pred_f)) + smooth)

def dice_coef_loss(y_true, y_pred):
    return 1-dice_coef(y_true, y_pred)


def train_model(X_train, y_train, epoch, batch_size):
    size = 16000
    test_config={
        "input_size": (size, 1),
        "output_size": (size, 1),
        "loss_function": 'mse'
    }

    model = None
    model = prepare_vnet(test_config)
    model.compile('Adam', dice_coef_loss, ['mae'])

    model.fit(X_train, y_train, batch_size=batch_size, epochs=epoch)
    return model

In [None]:
model = train_model(X_train_reverb_norm[100:], X_train_org_norm[100:], 20, 32)

In [None]:
from scipy.signal import butter, lfilter
from scipy.signal import freqs
from scipy import signal

def butter_bandpass(lowcut, highcut, fs, order=5):
    nyq = 0.5 * fs
    low = lowcut / nyq
    high = highcut / nyq
    b, a = butter(order, [low, high], btype='band')
    return b, a


def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
    b, a = butter_bandpass(lowcut, highcut, fs, order=order)
    y = lfilter(b, a, data)
    return y

# Do filter on one data + remove artifact with bandpass filter

In [None]:
test_org_data = np.reshape( X_train_org_norm[25], (1, 16000,1))
test_reverb_data = np.reshape( X_train_reverb_norm[25], (1, 16000,1))
y_predict = model.predict(test_reverb_data)

In [None]:
import librosa
def denormalize_data(data, desired_min=-0.5, desired_max=0.5):
    flat_data = data.flatten()
    data_min = np.min(flat_data)
    data_max = np.max(flat_data)
    data_range = data_max - data_min
    desired_range = desired_max - desired_min
    
    x = []
    for i in range(len(flat_data)):
        entry = (flat_data[i] - data_min) / data_range * desired_range + desired_min
        x.append(entry)
        
    x = np.asarray(x)
    print(np.min(x), np.max(x))
    return x
    
y = denormalize_data(y_predict)
y = butter_bandpass_filter(y, 20, 4000, 16000)

D = librosa.amplitude_to_db(np.abs(librosa.stft(y.flatten())), ref=np.max)

In [None]:
import librosa

import librosa.display
import matplotlib.pyplot as plt

plt.figure(figsize=(6, 4))
librosa.display.specshow(D, y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Vnet')
plt.savefig('img/vnet_fix.png')
plt.show()

In [None]:
import scipy.io.wavfile as wav

y_int16 = denormalize_data(y, desired_min=-16384, desired_max=16383)

wav.write('./audio/vnet_fix.wav', 16000, y_int16.astype(np.int16))

# Do filter on all data

In [None]:
from tqdm.notebook import tnrange

train_data = []
for i in tnrange(len(X_train_reverb_norm)):
    denoised_signal = model.predict(X_train_reverb_norm[i])
    y = butter_bandpass_filter(denoised_signal, 20, 4000, 16000)
    train_data.append(y)
    
train_data = np.asarray(train_data)

In [None]:
save_dir = Path('./E2E_data')
np.save(save_dir / "Vnet_train_reverb.npy", train_data)

In [None]:
test_data = []
for i in tnrange(len(X_test_reverb_norm)):
    denoised_signal = model.predict(X_test_reverb_norm[i])
    y = butter_bandpass_filter(denoised_signal, 20, 4000, 16000)
    test_data.append(y)
    
test_data = np.asarray(test_data)

In [None]:
save_dir = Path('./E2E_data')
np.save(save_dir / "Vnet_train_reverb.npy", test_data)