### Prologue
An attempt at implementing [Denoising Induction Motor Sounds](https://arxiv.org/pdf/2208.04462.pdf) for this use case.

### IMPORTS

In [1]:
from __future__ import print_function, division
%matplotlib inline

import sys, time, random, glob, os, pandas
import numpy as np
import librosa
import pydub
import json
from pydub import AudioSegment
from pydub import effects

import tensorflow as tf
import keras

from keras.utils import np_utils
from keras import optimizers
from keras.layers import Convolution2D, MaxPooling2D

from tensorflow.keras import layers, losses

import keras.models
from keras.losses import mse as kmse
import keras.backend as K
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout, Add, Multiply, Lambda, UpSampling2D, Dot, Permute, RepeatVector
from keras.layers import BatchNormalization
from keras.layers import Conv2D
from keras.layers import Cropping2D
from keras.layers import Conv2DTranspose
from keras.layers import LeakyReLU
from keras.layers import Activation
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Reshape
from keras.layers import Input
from keras.models import Model

from tqdm.notebook import tqdm

from keras.callbacks import LambdaCallback, EarlyStopping, ModelCheckpoint
import noising
from common import *
from common import cal_midpoints, gen_mel_feature

2023-06-21 21:51:03.022391: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
INPUT_DIR = 'samples'
files = glob.glob(f'{INPUT_DIR}/*.flac')
noising.save_overlaid_dataset(files, 3, 15, 'dataset.json', 'overlaid', 150)

In [40]:
class FastGenerator(tf.keras.utils.Sequence):
    def __init__(self, dataset : str = 'dataset.json', 
                train_ratio : float = 0.8,
                batch_size : int = 8, gen_type = 'train', shuffle : bool = True):
        self.shuffle = shuffle
        self.gen_type = gen_type
        self.train_ratio = train_ratio
        self.test_ratio = self.train_ratio + (1-train_ratio)/2
        self.batch_size = batch_size
        self.epoch = 0
        self.samples_per_file = samples_per_file
        self.noise_types = ['white', 'pink', 'blue', 'brown', 'violet']
        self.map = json.loads(open(dataset).read())
        files = list(self.map.keys())
        idx = int(train_ratio*len(files))
        test_idx = int(self.test_ratio*len(files))
        if gen_type == 'train':
            self.files = files[0:idx]
        elif gen_type == 'test':
            self.files = files[idx:test_idx]
        else:
            self.files = files[test_idx:]
        print(f"{gen_type} loader created with {len(self.files)} samples")
        
        self.len = self.__len__()
        if self.gen_type != 'test':
            self.on_epoch_end()

    def __len__(self):
        '''Denotes the number of batches per epoch'''
        return (2*len(self.files)) // self.batch_size

    def on_epoch_end(self):
        if self.shuffle and self.gen_type != 'test' :
            random.shuffle(self.files)
        self.epoch += 1

    def __getitem__(self, index):
        #! generates self.samples_per_file noisy samples per file, can be of the
        #! same kind of noise, or can be of different kinds of noises.
        inp = []
        out = []
        const = int(self.batch_size//self.samples_per_file) #! const files make up a batch
        for path in self.files[index*const : (index+1)*const]:
            orig_sound = pydub.AudioSegment.from_file(path)
            orig_array = orig_sound.get_array_of_samples()
            for noisy_path in self.map[path]:
                noisy_sound = pydub.AudioSegment.from_file(noisy_path)
                noisy_array = noisy_sound.get_array_of_samples()
                min_len = min(len(noisy_array), len(orig_array))
                inp.append(noisy_array[0:min_len])
                out.append(orig_array[:min_len])
        inp = np.array(inp)
        out = np.array(out)
        return inp, out

In [8]:
class Generator(tf.keras.utils.Sequence):
    def __init__(self, directory : str = '/home/juggernautjha/Desktop/to_rahul/maruti/data/true_samples', 
                length : int = 200, train_ratio : float = 0.8,
                batch_size : int = 32, gen_type = 'train',
                samples_per_file : int = 3, shuffle : bool = True):
        self.shuffle = shuffle
        self.gen_type = gen_type
        self.train_ratio = train_ratio
        self.test_ratio = self.train_ratio + (1-train_ratio)/2
        self.batch_size = batch_size
        self.epoch = 0
        self.pad_to = length
        self.samples_per_file = samples_per_file
        self.noise_types = ['white', 'pink', 'blue', 'brown', 'violet']
        files = glob.glob(f"{directory}/*.flac")
        idx = int(train_ratio*len(files))
        test_idx = int(self.test_ratio*len(files))
        if gen_type == 'train':
            self.files = files[0:idx]
        elif gen_type == 'test':
            self.files = files[idx:test_idx]
        else:
            self.files = files[test_idx:]
        print(f"{gen_type} loader created with {len(self.files)} samples")

        self.len = self.__len__()
        if self.gen_type != 'test':
            self.on_epoch_end()

    def __len__(self):
        '''Denotes the number of batches per epoch'''
        return (2*len(self.files)) // self.batch_size

    def on_epoch_end(self):
        if self.shuffle and self.gen_type != 'test' :
            random.shuffle(self.files)
        self.epoch += 1

    def __getitem__(self, index):
        #! generates self.samples_per_file noisy samples per file, can be of the
        #! same kind of noise, or can be of different kinds of noises.
        inp = []
        out = []
        const = int(self.batch_size//self.samples_per_file) #! const files make up a batch
        for path in self.files[index*const : (index+1)*const]:
            colors = random.choices(self.noise_types, k =self.samples_per_file)
            for color in colors:
                noisy, clear = noising.overlay_noise(path, 15, color, self.pad_to)
                inp.append(noisy.get_array_of_samples())
                out.append(clear.get_array_of_samples())
        inp = np.array(inp)
        out = np.array(out)
        return inp, out
    

In [38]:
train_generator = FastGenerator('dataset.json', 50, 0.7)
val_generator = FastGenerator('dataset.json', 50, 0.7, gen_type='val')

train loader created with 14 samples
val loader created with 3 samples


In [39]:
len(train_generator.__getitem__(1)[0][1])

6239232

##### MODEL (finally??)

In [32]:
class Denoise(Model):
  def __init__(self, input_size, batch_size):
    super(Denoise, self).__init__()
    self.encoder = tf.keras.Sequential([
      layers.Input(shape = (input_size, 1), batch_size = batch_size),
      layers.Conv1D(128, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1D(32, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1D(16, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1D(8, kernel_size=3, activation='relu', kernel_initializer='he_uniform')
      ])

    self.decoder = tf.keras.Sequential([
      layers.Conv1DTranspose(8, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1DTranspose(16, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1DTranspose(32, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1DTranspose(128, kernel_size=3, activation='relu', kernel_initializer='he_uniform'),
      layers.Conv1DTranspose(1, kernel_size=3, activation='relu', kernel_initializer='he_uniform')
      ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

In [35]:
denoiser = Denoise(6239232, 8)

2023-06-21 22:01:09.194531: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-06-21 22:01:09.286646: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [36]:
denoiser.compile('adam', loss=tf.keras.losses.mean_squared_error)

In [37]:
denoiser.fit(train_generator, validation_data=val_generator)

2023-06-21 22:01:57.550100: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 74870784 exceeds 10% of free system memory.
2023-06-21 22:01:57.560775: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 74870784 exceeds 10% of free system memory.
2023-06-21 22:01:57.629623: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 149741568 exceeds 10% of free system memory.
2023-06-21 22:01:57.686114: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 19166914560 exceeds 10% of free system memory.
2023-06-21 22:01:57.686158: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at conv_ops.cc:629 : RESOURCE_EXHAUSTED: OOM when allocating tensor with shape[6,1,6239230,128] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu


ResourceExhaustedError: Exception encountered when calling layer 'conv1d' (type Conv1D).

{{function_node __wrapped__Conv2D_device_/job:localhost/replica:0/task:0/device:CPU:0}} OOM when allocating tensor with shape[6,1,6239230,128] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu [Op:Conv2D]

Call arguments received by layer 'conv1d' (type Conv1D):
  • inputs=tf.Tensor(shape=(6, 6239232, 1), dtype=float32)