## Uses signal as input to the model. 

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

import librosa # audio processing
import math

from tqdm import tqdm

In [2]:
SAMPLE_RATE = 22050
SEGMENT_SIZE = 5000

In [3]:
# 1. get clean segment MFCCs
# 2. for every other directory get noise segment MFCCs and append them to training set associated with target clean segment MFCC extracted earlier

clean_segment = np.array([])
noise_segment = np.array([])
audios_segments = [] #2d array with each row mentioning what segments are in single audio file
X = np.array([])
y = np.array([])
for dirname, _, filenames in os.walk('/kaggle/input/noizeus/noizeus_corpora-master/NOIZEUS/clean_noizeus/wav'):
    for filename in sorted(filenames):
        file_path = str(os.path.join(dirname, filename))
        signal, sr = librosa.load(file_path, sr = SAMPLE_RATE)
        
        for index in range(0, len(signal), SEGMENT_SIZE):
            start_sample = index
            end_sample = index + SEGMENT_SIZE
            if(end_sample > len(signal) + 1 ): # Making sure the last unfilled segment is truncated
                continue
            if clean_segment.size == 0:
                clean_segment = signal[start_sample:end_sample]
            else:
                clean_segment = np.vstack((clean_segment, signal[start_sample:end_sample]))
        

X_index = 0
for noise_dir in tqdm(os.listdir('/kaggle/input/noizeus/noizeus_corpora-master/NOIZEUS/')):
    if(noise_dir == 'clean_noizeus'): 
        continue
    data = np.array([])
    noise_segment = np.array([]) # make it empty 
    for dirname, _, filenames in os.walk(f'/kaggle/input/noizeus/noizeus_corpora-master/NOIZEUS/{noise_dir}/wav'):
        for filename in sorted(filenames):
            audio_segments = []
            file_path = str(os.path.join(dirname, filename))
            signal, sr = librosa.load(file_path, sr = SAMPLE_RATE)

            for segment in range(0, len(signal), SEGMENT_SIZE):
                start_sample = segment
                end_sample = segment + SEGMENT_SIZE
                if(end_sample > len(signal) + 1 ): # Making sure the last unfilled segment is truncated
                    continue
                if noise_segment.size == 0:
                    noise_segment = signal[start_sample:end_sample]
                else:
                    noise_segment = np.vstack((noise_segment, signal[start_sample:end_sample]))
                audio_segments.append(X_index)
                X_index +=1
            audios_segments.append(audio_segments)
    if X.size == 0:
        X = noise_segment
    else:
        X = np.concatenate((X, noise_segment), axis = 0)
    if y.size == 0:
        y = clean_segment
    else:
        y = np.concatenate((y, clean_segment), axis = 0)

print(X.shape) # num of segments, samples per segment 
print(y.shape)

100%|██████████| 33/33 [00:16<00:00,  2.05it/s]

(10784, 5000)
(10784, 5000)





In [4]:
import tensorflow as tf

# Parameters for spectrogram conversion
N_FFT = 1024
HOP_LENGTH = 512

# # Show print statements when there is an issue with the code
# tf.config.run_functions_eagerly(True)


def compute_spectrogram(signal):
    # Compute the Short-Time Fourier Transform (STFT)
    stft = tf.signal.stft(signal, frame_length=N_FFT, frame_step=HOP_LENGTH)
    # Compute the magnitude of the STFT
    spectrogram = tf.abs(stft)
    return spectrogram

def hybrid_loss(y_true, y_pred):
    # Compute spectrograms
    y_true_spectrogram = compute_spectrogram(y_true)
    y_pred_spectrogram = compute_spectrogram(y_pred)

    # Calculate MSE loss in the spectrogram domain
    spectrogram_loss = tf.reduce_mean(tf.square(tf.abs(y_pred_spectrogram) - tf.abs(y_true_spectrogram)), axis=[1, 2])


    # Optionally, calculate MSE loss in the time domain (on the raw signals)
    time_domain_loss = tf.reduce_mean(tf.square(y_pred - y_true), axis=-1)

    # Combine spectrogram and time domain losses
    total_loss = spectrogram_loss + time_domain_loss
    return total_loss

2024-07-14 19:25:43.242591: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-14 19:25:43.242703: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-14 19:25:43.539504: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [5]:
#Used from https://www.kaggle.com/code/residentmario/autoencoders
from keras.layers import Input, Dense,  Conv2D, Conv2DTranspose, GaussianNoise
from tensorflow.keras.layers import MaxPooling2D, UpSampling2D
from keras.models import Model
from keras import regularizers
from keras.losses import MeanSquaredError

input_segment = Input(shape=X.shape[1:])
encoded = Dense(3000, activation='relu')(input_segment)
encoded = Dense(1000, activation='relu')(encoded)
encoded = Dense(128, activation='relu')(encoded)
encoded = Dense(32, activation='relu')(encoded) 
decoded = Dense(128, activation='relu')(encoded)
decoded = Dense(1000, activation='relu')(decoded)
decoded = Dense(3000, activation='relu')(decoded)
decoded = Dense(5000, activation='sigmoid')(decoded)

# this model maps an input to its reconstruction
autoencoder = Model(input_segment, decoded)

autoencoder.compile(optimizer='adadelta', loss=hybrid_loss)

In [6]:
autoencoder.summary()

In [7]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, random_state= 69)

In [8]:
autoencoder.fit(X_train, y_train,
                epochs=30,
                batch_size=256,
                validation_data=(X_test, y_test),
                verbose=1)

Epoch 1/30


I0000 00:00:1720985167.166395     121 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1720985167.184743     121 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m 7/30[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 160.6825

W0000 00:00:1720985167.482855     121 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 93ms/step - loss: 160.6607

W0000 00:00:1720985170.176356     118 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1720985170.179069     118 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1720985171.280792     118 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 163ms/step - loss: 160.6607 - val_loss: 160.6624
Epoch 2/30
[1m 7/30[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m0s[0m 22ms/step - loss: 160.6090

W0000 00:00:1720985172.213728     119 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.6382 - val_loss: 160.6482
Epoch 3/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.6285 - val_loss: 160.6334
Epoch 4/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.6112 - val_loss: 160.6178
Epoch 5/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.6080 - val_loss: 160.6012
Epoch 6/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.5905 - val_loss: 160.5832
Epoch 7/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.5548 - val_loss: 160.5636
Epoch 8/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.5578 - val_loss: 160.5418
Epoch 9/30
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - loss: 160.5294 - val_loss: 160.5172
Epoch 10/30
[1m30/30[0m 

<keras.src.callbacks.history.History at 0x7d2a704eab00>

In [9]:
#generate audio for whole audio that is noisy, clean and predicted.
#for 69th audio
segment_indices =audios_segments[34]

y_pred = autoencoder.predict(X[segment_indices[0]: (segment_indices[-1] + 1)])
y_pred.shape

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 543ms/step


W0000 00:00:1720985195.605617     120 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


(9, 5000)

In [10]:
import IPython.display as ipd

original_signal = np.hstack([segment for segment in X[segment_indices[0]: (segment_indices[-1] + 1)]])
predicted_signal = np.hstack([segment for segment in y_pred])
ipd.Audio(predicted_signal, rate = SAMPLE_RATE)

In [11]:
#where as original noisy audio is this (reconstructed one)
ipd.Audio(original_signal, rate = SAMPLE_RATE)

## Whaaat the heck was that? Should have named the notebook to 'Beats generator'