In [None]:
#!pip install --upgrade tables
#!pip install tqdm
!pip install --upgrade tensorflow-addons
#!pip install --upgrade conda 
#!pip install --upgrade tensorflow_probability==0.11.1

In [None]:
import sys, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras

import librosa
import librosa.display

import scipy
#import scipy.signal
#from scipy.io import wavfile as wav

from tqdm import tqdm

import IPython.display as ipd

print("Tensorflow Version")
print(tf.__version__)
import keras

In [None]:
# About Data & Refining
sample_rate = 16000

# Frame Length & Stride (sec)
frame_length = 0.032
frame_stride = 0.016

# Mel filter bands for VNR
mel_band_counts = 32

# N-FFT & N-Hop & N-Mels
NFFT = int(frame_length * sample_rate)
NHOP = int(frame_stride * sample_rate)
NMEL = int(mel_band_counts)

print('N-FFT:', NFFT)
print('N-HOP:', NHOP)
print('N-Mel:', NMEL)

# Window Function Decay (exponential) 0.3sec/60dB
TAU = -((0.3*sample_rate)-1) / np.log(0.001)
WINDOW_FN = scipy.signal.get_window(('exponential', 0, TAU), NFFT)
WINDOW_LENGTH_TIME = 0.2 # sec
WINDOW_LENGTH = int(WINDOW_LENGTH_TIME * sample_rate)
print(WINDOW_LENGTH)
print('Exponential Decay - Tau:', TAU)

# Bandpass filter freq range
BANDPASS_RANGE = [150, 5000]

In [None]:
TARGET_DIR = [
    'clean',
    'noise',
    'noisy',
]

class DatasetFiles:
    def __init__(self, files, labels, 
                 sample_rate=None, n_fft=NFFT, n_hop=NHOP, 
                 window_fn=WINDOW_FN, win_length = WINDOW_LENGTH,
                 bandpass_range=BANDPASS_RANGE, stft_center=True, **kwargs):
        self._data = { k: v for k, v in zip(labels, files) }
        self.n_fft = n_fft
        self.n_hop = n_hop
        self.sample_rate = sample_rate
        self.window_fn = window_fn
        self.win_length = win_length
        self.bandpass_range = bandpass_range
        self.center = stft_center
    
    def process_to_list(self):
        vad, vnr = self.process_targets()
        
        inputs = self.process_input()
        
        return vad.tolist(), vnr.tolist(), inputs.tolist()
        
        
    def process_targets(self):
        x = self._stft(self.clean)
        v = self._stft(self.noise)
        
        vad = self._vad(x)
        
        vnr = self._vnr(x, v)

        return vad, vnr
    
    def process_input(self, n_mels=64):
        _input = self._input(n_mels)
        
        return _input.T
        
    
    def _input(self, n_mels=64):  
        mel = librosa.feature.melspectrogram(self.noisy, sr=self.sr, n_mels=n_mels,
                                             n_fft=self.n_fft, hop_length=self.n_hop)
        
        return librosa.core.power_to_db(mel)

        
    def _vad(self, x_spectrogram):
        t_wx = self._bandpass_stft(x_spectrogram)
        
        amplitude = np.abs(t_wx) ** 2
        
        mean_frame = amplitude.mean(axis=1)
        
        max_frame = mean_frame.max()
        
        return np.array(mean_frame > (max_frame*0.01)).astype(float)
        
    def _vnr(self, x_spec, v_spec, db_range=(-15, 40)):
        melx = self._mel(x_spec, power=1)
        melv = self._mel(v_spec, power=1)
        
        x = (np.abs(melx)**2).mean(axis=0)
        v = (np.abs(melv)**2).mean(axis=0)
        
        result = []
        for a, b in zip(x, v):
            if b == 0:  # to avoide zero divide error
                result.append(np.float64(db_range[1]))
            else:
                z = np.divide(a, b)
                
                if z != 0:
                    result.append(np.log10(z) * 10)
                else:
                    result.append(np.float64(db_range[1]))
                    
        return np.interp(np.array(result).clip(*db_range), db_range, (0, 1))
    
    def _stft(self, signal, window_fn='hann', window_length=None):        
        # stft (Center = False)
        spectrogram = librosa.stft(signal, n_fft=self.n_fft, hop_length=self.n_hop,
                                   window =window_fn,
                                   win_length=window_length,
                                   center=self.center)

        return spectrogram # (bins, frames)
    
    def _mel(self, spectrogram, n_mels=32, power=2):
        # Mel Spectrogram
        mels = librosa.feature.melspectrogram(S=spectrogram, sr=self.sr, power=power, 
                                              n_fft=self.n_fft, n_mels=n_mels)
        return mels
    
    def _bandpass_stft(self, spectrogram, clip=False):
        start, end = self.bandpass_range
        
        if start is None:
            start = 0
        if end is None:
            end = self.sr // 2

        frequency_map = librosa.fft_frequencies(self.sr, self.n_fft)
        
        bandpass_filter = np.logical_and(start < frequency_map, frequency_map < end)
        
        if clip:
            bandpass_clip = [idx for idx, i in enumerate(bandpass_filter) if i > 0]
            
            return spectrogram[:,bandpass_clip]
        else:
            return (bandpass_filter * spectrogram.T)
        
    @property
    def clean(self):
        return librosa.load(self.clean_path, sr=self.sr)[0]
    
    @property
    def noise(self):
        return librosa.load(self.noise_path, sr=self.sr)[0]
    
    @property
    def noisy(self):
        return librosa.load(self.noisy_path, sr=self.sr)[0]
   
    @property
    def clean_path(self):
        return self._data['clean']
        
    @property
    def noise_path(self):
        return self._data['noise']
        
    @property
    def noisy_path(self):
        return self._data['noisy']
    
    @property
    def sr(self):
        return self.sample_rate
    
class Dataset:
    def __init__(self, root_dir, labels, validate=True, **kwargs):
        self.rootdir = root_dir
        self.labels = labels
        self.files = self._load_files(**kwargs)
            
    def _load_files(self, **kwargs):
        datafiles = [] # [{file_id: 'file_path', ...}, ...]
        for i in self.labels:
            path = os.path.join(self.rootdir, i)
            files = { k: v for k, v in sorted(self._scandir(path).items()) } # file_id: file_path
            datafiles.append(files)
            
        datafiles = [DatasetFiles(files, self.labels, **kwargs) for files in zip(*(i.values() for i in self._validate(datafiles)))]
            
        return datafiles
    
    def _scandir(self, path):
        return { self._fn_parser(i)[0]: i for i in os.scandir(path) }
        
    @staticmethod
    def _fn_parser(fn):
        name, ext = os.path.splitext(fn)
        name, file_id = name.rsplit('_', 1)
        return int(file_id), fn
    
    @staticmethod
    def _validate(datalists):
        _cache = None
        for idx, i in enumerate(datalists):
            if idx > 0:
                if (_cache != set(i)):
                    raise ValueError('Dataset Unmatched!!')
            _cache = set(i)
        print('Validate Complete')
        return datalists
    
dataset_root = "./DNS-Challenge-ds/dataset-50h"

dataset = Dataset(dataset_root, TARGET_DIR, sample_rate=16000)

print("Total", len(dataset.files), "data loaded.")

In [None]:
# example

example = dataset.files[0]

targets = example.process_targets()
inputs = example.process_input()

fig, axs = plt.subplots(3)
fig.suptitle('Clean, VAD, VNR')

fig.set_figheight(8)
fig.set_figwidth(15)


axs[0].plot(example.clean)
axs[1].plot(targets[0])
axs[2].plot(targets[1])
plt.show()

print(inputs.shape)


In [None]:
# Preprocessig VAD, VNR

import time
import datetime
import multiprocessing
from multiprocessing import Lock

process_worker_count = 10

manager = multiprocessing.Manager()
result_list = manager.list()

data = dataset.files

lock = Lock()

def worker(l, data):
    for idx, item in enumerate(data):
        result_list.append(item.process_to_list())
        l.acquire()
        try:
            print("Processing file ", idx+1,"/",len(data))
        finally:
            l.release()
        
process_pool = []
for i in range(process_worker_count):
    start = i*(len(data)//process_worker_count)
    end = (i+1)*(len(data)//process_worker_count)
    process_pool.append(
        multiprocessing.Process(target=worker, args=[lock, data[start:end]])
    )

for process in process_pool:
    process.start()
    
for process in process_pool:
    process.join()

print(len(result_list))

df = pd.DataFrame(result_list, columns=('VAD', 'VNR', 'INPUT'))

df.head(10)

In [None]:
# Save Dataset

df.head(10)

df.to_pickle('dataset_32.pk')

In [None]:
# Read Dataset

df = pd.read_pickle('dataset.pk')

df.head(10)

print("Data Count:", len(df))

In [None]:
test_ratio = 0.1

validation_ratio = 0.2

test_count  = int(len(df)*test_ratio)

valid_count = int(len(df)*validation_ratio)


# split dataframe
df_test = df.iloc[:test_count, :]

df_valid = df.iloc[test_count:valid_count, :]

df_train = df.iloc[valid_count:, :]


# splist datset
vad_test , vnr_test , x_test  = [np.array(i) for i in zip(*df_test.to_numpy())]

vad_valid, vnr_valid, x_valid = [np.array(i) for i in zip(*df_valid.to_numpy())]

vad_train, vnr_train, x_train = [np.array(i) for i in zip(*df_train.to_numpy())]

# print('Sample Shape is ', x_train[0].shape)

In [None]:
import tensorflow_probability as tfp

class AutoClipper:
    def __init__(self, clip_percentile, history_size=10000):
        self.clip_percentile = clip_percentile
        self.grad_history = tf.Variable(tf.zeros(history_size), trainable=False)
        self.i = tf.Variable(0, trainable=False)
        self.history_size = history_size

    def __call__(self, grads_and_vars):
        grad_norms = [self._get_grad_norm(g) for g, _ in grads_and_vars]
        total_norm = tf.norm(grad_norms)
        assign_idx = tf.math.mod(self.i, self.history_size)
        self.grad_history = self.grad_history[assign_idx].assign(total_norm)
        self.i = self.i.assign_add(1)
        clip_value = tfp.stats.percentile(self.grad_history[: self.i], q=self.clip_percentile)
        return [(tf.clip_by_norm(g, clip_value), v) for g, v in grads_and_vars]

    def _get_grad_norm(self, t, axes=None, name=None):
        values = tf.convert_to_tensor(t.values if isinstance(t, tf.IndexedSlices) else t, name="t")

        # Calculate L2-norm, clip elements by ratio of clip_norm to L2-norm
        l2sum = tf.math.reduce_sum(values * values, axes, keepdims=True)
        pred = l2sum > 0
        # Two-tap tf.where trick to bypass NaN gradients
        l2sum_safe = tf.where(pred, l2sum, tf.ones_like(l2sum))
        return tf.squeeze(tf.where(pred, tf.math.sqrt(l2sum_safe), l2sum))


In [None]:
from tensorflow.keras.layers import *
from tensorflow.keras import Model
import tensorflow_addons as tfa

_N_FRAMES = 1874

# about AdamW
_ADAM_W_LR = 0.00005
_ADAM_W_WD = 0.01

# about loss functions
alpha = 0.2

lossVADVNR1 = ['binary_crossentropy', 'mae']
lossVADVNR1_weight = [1-alpha, alpha]

def lossSUM2BCE(y_true, y_pred):
    tvad, tvnr = tf.split(y_true, [1, 1], axis=-1)
    pvad, pvnr = tf.split(y_pred, [1, 1], axis=-1)
    
    bce = tf.keras.losses.BinaryCrossentropy()
    return bce(tvad, pvad) + bce(tvnr, pvnr)

paddings = [
    [1,0],
    [1,1],
]


def CRNModel(input_shape=(5, 64)):
    x = Input(shape=input_shape, name='input')
    
    y = Reshape((5, 64, 1))(x)
    
    y = ZeroPadding2D(padding=paddings)(y)
    y = Conv2D(16, kernel_size=(2, 3), strides=(1,2), padding='valid')(y)
    y = PReLU()(y)
    
    y = ZeroPadding2D(padding=paddings)(y)
    y = Conv2D(32, kernel_size=(2, 3), strides=(1,2), padding='valid')(y)
    y = PReLU()(y)
    
    y = ZeroPadding2D(padding=paddings)(y)
    y = Conv2D(64, kernel_size=(2, 3), strides=(1,2), padding='valid')(y)
    y = PReLU()(y)
    
    y = ZeroPadding2D(padding=paddings)(y)
    y = Conv2D(128, kernel_size=(2, 3), strides=(1,2), padding='valid')(y)
    y = PReLU()(y)
    
    y = Reshape((5, 512))(y)
    
    y = GRU(512, activation="sigmoid", recurrent_activation="tanh")(y)
    
    y = Dense(256)(y)
    y = PReLU()(y)

    VAD_VNR = Dense(2, activation='sigmoid', name='VAD_VNR')(y)
    return Model(inputs=x, outputs=VAD_VNR)


adamW = tfa.optimizers.AdamW(learning_rate=_ADAM_W_LR, 
                             weight_decay=_ADAM_W_WD)
                             #gradient_transformers=[AutoClipper(10)])
    
#bce = tf.keras.losses.BinaryCrossentropy(reduction=tf.keras.losses.Reduction.SUM)


auc = tf.keras.metrics.AUC(name='AUC', summation_method='minoring')


model = CRNModel()
model.compile(optimizer=adamW, 
              loss=lossSUM2BCE,
              metrics=auc)
model.summary()

In [None]:
import math
from tensorflow.keras.utils import Sequence

class DataLoader(Sequence):
    def __init__(self, x, y, signal_per_batch, frame_size=5, shuffle=False, *args, **kwargs):
        super(DataLoader, self).__init__(*args, **kwargs)
        self.x, self.y = np.array(x), np.stack(y, axis=-1)
        self.signal_per_batch = signal_per_batch
        self.frame_size = frame_size
        self.shuffle=shuffle
        self.on_epoch_end()
        
    def __len__(self):
        total_length = self.data_per_signal * len(self.x)
        return math.ceil(total_length / self.batch_size)
    
    def __getitem__(self, idx):
        indices = self.indices[idx*self.signal_per_batch:(idx+1)*self.signal_per_batch]
        signal_batch_x = [self.x[i] for i in indices]
        signal_batch_y = [self.y[i] for i in indices]
        
        batch_x = self._generate_window_x(signal_batch_x)
        batch_y = self._generate_window_y(signal_batch_y)
        #return np.array(batch_x), np.split(batch_y, 2, axis=-1)
        return np.array(batch_x), np.array(batch_y)
    
    def on_epoch_end(self):
        self.indices = np.arange(len(self.x))
        if self.shuffle == True:
            np.random.shuffle(self.indices)
            
    def _generate_window_x(self, batch):
        new_batch = []
        for signal in batch:
            for idx in range(self.data_per_signal):
                new_batch.append(signal[idx:idx+self.frame_size])
        return np.array(new_batch)
    
    def _generate_window_y(self, batch):
        new_batch = []
        for signal in batch:
            for idx in range(self.data_per_signal):
                new_batch.append(signal[idx+self.frame_size-1])
        return np.array(new_batch)
            
    @property
    def batch_size(self):
        return self.signal_per_batch * self.data_per_signal
    
    @property
    def frames_per_signal(self):
        return len(self.x[0])
    
    @property
    def data_per_signal(self):
        return self.frames_per_signal - self.frame_size + 1

In [None]:
train_loader = DataLoader(x=x_train, y=[vad_train, vnr_train], shuffle=True, signal_per_batch=20)

valid_loader = DataLoader(x=x_valid, y=[vad_valid, vnr_valid], signal_per_batch=20)

test_loader = DataLoader(x=x_test, y=[vad_test, vnr_test], signal_per_batch=10)

x, y = train_loader[0]

x[0].shape


In [None]:
import math
from tensorflow.keras.utils import Sequence

model.fit(train_loader, epochs=50, validation_data=valid_loader)

print('\n\n', '=' * 50, '\n')


In [None]:
test_loss, test_acc = model.evaluate(test_loader, verbose=2)