In [1]:
import librosa
import os
import random
import time
import gc
import pickle
import numpy as np
import tensorflow as tf
from typing import List
from numpy import save
from sklearn.preprocessing import MinMaxScaler

COLAB=True
#WORKDIR="drive/My Drive/Colab Notebooks/data"
WORKDIR="./data"
INITIAL_EPOCH=0

In [0]:
def make_colab_env():
  os.mkdir(os.path.join(WORKDIR, "clean"))
  os.mkdir(os.path.join(WORKDIR, "noisy"))
  os.mkdir(os.path.join(WORKDIR, "checkpoints"))
  os.mkdir(os.path.join(WORKDIR, "logs"))
  noisy_samples_directory = os.mkdir(os.path.join(WORKDIR, "noisy-samplified"))
  clean_samples_directory = os.mkdir(os.path.join(WORKDIR, "clean-samplified"))

In [0]:
# update librosa in colab
!pip install librosa==0.7.2

In [0]:
def read_noise(audio_files: List[str], sampling: int, frame_length: int, hop_length: float = 0.4) -> np.ndarray:
    audio_stacks = []
    total = len(audio_files)
    for i, f in enumerate(audio_files):
        y, _ = librosa.load(f, sr=sampling)
        audio_stacks.append(librosa.util.frame(y, frame_length=frame_length, hop_length=int(frame_length * hop_length), axis=0))
        print("Reading noise [{0} / {1}]".format(i+1, total))
    random.shuffle(audio_stacks)
    return np.vstack(audio_stacks)

# Blend speech with noise
def create_audio_samples(noise_dir: str, speech_dir: str, noisy_dir: str, clean_dir: str,
           frame_length: int, sampling: int, noise_frame_hop: float = 0.4) -> int:
    
    noise_files = list(os.scandir(noise_dir))
    speech_files = list(os.scandir(speech_dir))

    random.shuffle(noise_files)
    random.shuffle(speech_files)
    
    noise_files = noise_files[:10]

    print("Reading noise into memory")
    noise_frames = read_noise(noise_files, sampling,
                              int(frame_length/2), noise_frame_hop)

    samples_count = 0
    total = len(speech_files)
    for idx, sample_file in enumerate(speech_files):
        y, _ = librosa.load(sample_file, sr=sampling)
        if len(y) < frame_length:
            print("Dropping {0} because is shorter than frame_length".format(sample_file))
            continue
        y = librosa.util.frame(y, frame_length=frame_length, hop_length=frame_length, axis=0)
        samples_count += y.shape[0]
        filename = os.path.splitext(os.path.basename(sample_file))[0] + ".wav"
        librosa.output.write_wav(os.path.join(clean_dir, filename), y.reshape(1, -1)[0], sr=sampling)
        for i in range(y.shape[0]):
            y[i, :] += random.uniform(0.2, 0.5) * np.concatenate((random.choice(noise_frames),random.choice(noise_frames)))
        librosa.output.write_wav(os.path.join(noisy_dir, filename), y.reshape(1, -1)[0], sr=sampling)
        print("Blending noise with sample speech [{0} / {1}]".format(idx, total))

    print("Possible {0} samples".format(samples_count))
    return samples_count 

In [2]:
if not COLAB:
  noise_directory = "/home/michal/ESC-50/ESC-50-master/audio"
  speech_directory = "/home/michal/OpenSLR/LibriSpeech/dev-clean/"
  noisy_directory = "/home/michal/Documents/Speech-enhancement/data/noisy"
  clean_directory = "/home/michal/Documents/Speech-enhancement/data/clean"
  noisy_samples_directory="/home/michal/Documents/Speech-enhancement/data/noisy-samplified"
  clean_samples_directory="/home/michal/Documents/Speech-enhancement/data/clean-samplified"
  scaler_path = "/home/michal/Documents/Speech-enhancement/data/scaler.plk"
else:
  noise_directory = "drive/My Drive/Colab Notebooks/ESC-50-master/audio"
  speech_directory = "drive/My Drive/Colab Notebooks/LibriSpeech/dev-clean/"
  noisy_directory = os.path.join(WORKDIR, "noisy")
  clean_directory = os.path.join(WORKDIR, "clean")
  noisy_samples_directory = os.path.join(WORKDIR, "noisy-samplified")
  clean_samples_directory = os.path.join(WORKDIR, "clean-samplified")
  scaler_path = os.path.join(WORKDIR, "scaler.plk")
  checkpoints_path = os.path.join(WORKDIR, "checkpoints")
  logs_dir = os.path.join(WORKDIR, "logs")

npy_samples_count = 100000
frame_length = 16384
sampling = 16000

In [0]:
#create_audio_samples(noise_directory,
#                    speech_directory,
#                    noisy_directory,
#                    clean_directory,
#                    frame_length,
#                    sampling)

In [0]:
def samplify(audio_files: List[str], output_path: str, 
             frame_length: int, sampling: int, npy_samples_count: int):
    npy_frames = []
    npy_idx = 0
    samples_in_npy_frames = 0
    for audio_idx, audio_file in enumerate(audio_files):
        y, _ = librosa.load(audio_file, sr=sampling)
        frames = librosa.util.frame(
            y, frame_length=frame_length, hop_length=int(frame_length * 0.3), axis=0)
        samples_in_npy_frames += frames.shape[0]
        npy_frames.append(frames)
        if samples_in_npy_frames > npy_samples_count:
            output = os.path.join(output_path, "{}.npy".format(npy_idx))
            v = np.vstack(npy_frames)
            print("\nWriting {0} into {1}".format(v.shape, output))
            save(output, v)
            npy_idx += 1
            npy_frames = []
            samples_in_npy_frames = 0
        print("Samplifying [{0} / {1}]".format(audio_idx+1, len(audio_files)))
    if len(npy_frames) > 0:
        output = os.path.join(output_path, "{}.npy".format(npy_idx))
        v = np.vstack(npy_frames)
        print("\nWriting {0} into {1}".format(v.shape, output))
        save(output, v)

In [0]:
def fit_scaler(samples_npy: List[str], scaler_save_path: str):
    scaler = MinMaxScaler()
    for sample_idx, samples_path in enumerate(samples_npy):
        print("Fitting scaler [{0} / {1}]".format(sample_idx+1, len(samples_npy)))
        samples = np.load(samples_path, allow_pickle=True)
        shape = samples.shape
        scaler.partial_fit(samples.reshape(shape[0], -1))
    print("Saving scaler to %s" % scaler_save_path)
    with open(scaler_save_path, 'wb+') as f:
        pickle.dump(scaler, f)
        

def scale_it(samples_npy: List[str], scaler_path: str):
    with open(scaler_path, 'rb') as f:
        scaler = pickle.load(f)
    for sample_idx, samples_path in enumerate(samples_npy):
        print("Scaling samples [{0} / {1}]".format(sample_idx+1, len(samples_npy)))
        samples = np.load(samples_path)
        shape = samples.shape
        samples = samples.reshape(shape[0], -1)
        samples = scaler.transform(samples).reshape(shape)
        temp_output_file = samples_path.split('.')[0] + "_tmp.npy"
        save(temp_output_file, samples)
        os.rename(temp_output_file, samples_path)

In [0]:
#samplify(list(os.scandir(noisy_directory)), noisy_samples_directory, frame_length, sampling, npy_samples_count)
#samplify(list(os.scandir(clean_directory)), clean_samples_directory, frame_length, sampling, npy_samples_count)
#noise_npy_samples = [p.path for p in os.scandir(noisy_samples_directory)] 
#clean_npy_samples = [p.path for p in os.scandir(clean_samples_directory)]
#fit_scaler(noise_npy_samples+clean_npy_samples, scaler_path)
#scale_it(noise_npy_samples+clean_npy_samples, scaler_path)

In [0]:
class Generator(tf.keras.utils.Sequence):
    def __init__(self, x_npy_files: List[str], y_npy_files: List[str], batch_size: int, shuffle: bool = False):
        self.batch_size = batch_size
        self.shuffle = shuffle

        self.sample_count = 0
        for npy_file in x_npy_files:
            shape = np.load(npy_file).shape
            self.sample_count += shape[0]

        self.x_npy_files = x_npy_files
        self.y_npy_files = y_npy_files

        self._on_each_epoch()

    def __len__(self) -> int:
        return int(np.floor(self.sample_count / self.batch_size))

    def __getitem__(self, index):
        if index == 0:
            self._on_each_epoch()
        x_batch = []
        y_batch = []
        for _ in range(0, self.batch_size):
            x_batch.append(next(self.x_generator))
            y_batch.append(next(self.y_generator))
        if self.shuffle:
            indexes = list(range(0, len(x_batch)))
            random.shuffle(indexes)
            for i, j in enumerate(indexes):
                x_batch[i], x_batch[j] = x_batch[j], x_batch[i]
                y_batch[i], y_batch[j] = y_batch[j], y_batch[i]
        return np.array(x_batch), np.array(y_batch)

    def _on_each_epoch(self):
        self.x_samples_list = map(np.load, self.x_npy_files)
        self.x_samples_list = map(lambda m: m.reshape(
            m.shape[0], m.shape[1], 1), self.x_samples_list)

        self.y_samples_list = map(np.load, self.y_npy_files)
        self.y_samples_list = map(lambda m: m.reshape(
            m.shape[0], m.shape[1], 1), self.y_samples_list)

        def lazy_numpy_vstack(matrices):
            for matrix in matrices:
                yield from matrix

        self.x_generator = lazy_numpy_vstack(self.x_samples_list)
        self.y_generator = lazy_numpy_vstack(self.y_samples_list)

In [0]:
#G = Generator(list(os.scandir(noisy_samples_directory)), list(os.scandir(clean_samples_directory)), 64)

In [3]:
def LeakyReLU(x, alpha=0.3):
    return tf.maximum(alpha*x, x)

In [3]:
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Conv1D, Reshape, UpSampling1D, concatenate, BatchNormalization, Activation, LeakyReLU
def get_unet():
    initial_filters=24
    kernel_downsampling=15
    kernel_upsampling=5
    padding='same'
    layer_factors = [1, 2, 4, 8, 16]

    encoder_layers = []

    inputs = Input((16384,1))
    x = inputs
    for i in layer_factors[:-1]:
        x = Conv1D(initial_filters * i, kernel_downsampling, strides=1, padding=padding)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)
        encoder_layers.append(x)
        x = x[:,::2,:]

    x = tf.keras.layers.Conv1D(initial_filters * layer_factors[-1], kernel_downsampling, strides=1, padding=padding)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.2)(x)

    for i, el in zip(reversed(layer_factors[:-1]), reversed(encoder_layers)):
        x = UpSampling1D()(x)
        x = concatenate([x, el])
        x = tf.keras.layers.Conv1D(initial_filters * i, kernel_upsampling, strides=1, padding=padding)(x)
        x = BatchNormalization()(x)
        x = LeakyReLU(alpha=0.2)(x)

    x = tf.keras.layers.Conv1D(1, kernel_upsampling, strides=1, activation=tf.tanh, padding=padding)(x)

    outputs=x

    model = Model(inputs, outputs=outputs, name="example")
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999), loss='mse')
    model.summary()
    return model

In [4]:
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.InteractiveSession(config=config)

model = get_unet()
#model = tf.keras.models.load_model(os.path.join(WORKDIR, "checkpoints", "model-cp-epoch_0201.h5"), 
#                                   custom_objects={"LeakyReLU" : LeakyReLU})
model.summary()

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Model: "example"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 16384, 1)]   0                                            
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 16384, 24)    384         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 16384, 24)    96          conv1d[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 16384, 24)    0           batch_normalization[0][0]        
___________

In [5]:
X = np.load(os.path.join(noisy_samples_directory, "0.npy"))
X = X.reshape(X.shape[0],X.shape[1],1)
gc.collect()

3

In [6]:
Y = np.load(os.path.join(clean_samples_directory, "0.npy"))
Y = Y.reshape(Y.shape[0],Y.shape[1],1)
gc.collect()

3

In [7]:
indexes = list(range(0, X.shape[0]))
random.shuffle(indexes)
for i, j in enumerate(indexes):
  X[i], X[j] = X[j], X[i]
  Y[i], Y[j] = Y[j], Y[i]

In [8]:
callbacks = []

name = "model-cp-epoch_{epoch:04d}.h5"
path = os.path.join(checkpoints_path, name)
checkpoint = tf.keras.callbacks.ModelCheckpoint(path, verbose=1, monitor='val_loss', save_best_only=False, mode='auto', period=1)
#callbacks.append(checkpoint)

log_dir = logs_dir + "/model-{0}".format(int(time.time()))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='epoch', write_graph=True, profile_batch=0)
#callbacks.append(tensorboard_callback)
        
model.fit(x=X, y=Y, batch_size=16, epochs=1000, verbose=1, callbacks=callbacks, shuffle=True, initial_epoch=INITIAL_EPOCH)

Train on 51403 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
 3632/51403 [=>............................] - ETA: 7:56 - loss: 1.5731e-04

KeyboardInterrupt: 

# 