# U-Net Audio Denoising Model

This is the script used to train the U-Net denoising model.


In [10]:
import tensorflow as tf
import tensorflow.keras
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, Concatenate, ZeroPadding2D
from tensorflow.keras.models import Model
import os
import numpy as np


In [11]:
def load_spectrograms_into_matrix(directory, print_paths=False):
    # Resolve the absolute path of the directory to handle relative paths correctly
    abs_directory = os.path.abspath(directory)
    data_matrix = []
    file_paths = []  # Store file paths for verification

    # Walk through the directory structure
    for root, dirs, files in sorted(os.walk(abs_directory)):
        dirs.sort()  # Sort directories to maintain consistent order
        files.sort()  # Sort files to ensure consistent ordering within the same directory
        for file in files:
            if file.endswith('.npy'):
                # Generate the full path to the file
                file_path = os.path.join(root, file)
                # Optionally print file paths for verification
                if print_paths:
                    print(file_path)
                # Load the spectrogram
                spectrogram = np.load(file_path)
                # Append the spectrogram and file path to their respective lists
                data_matrix.append(spectrogram)
                file_paths.append(file_path)

    # Convert list to a numpy array for easier manipulation later
    data_matrix = np.array(data_matrix)
    return data_matrix, file_paths

def verify_alignment(noisy_paths, clean_paths):
    # Verify that each path in the noisy_paths corresponds correctly to the path in the clean_paths
    misalignments = []
    for noisy, clean in zip(noisy_paths, clean_paths):
        # Strip paths to just the file names minus the "_mixed" and extensions for comparison
        if os.path.splitext(noisy.split('/')[-1].replace('_mixed', ''))[0] != os.path.splitext(clean.split('/')[-1])[0]:
            misalignments.append((noisy, clean))

    if misalignments:
        print("Misaligned files:")
        for mis in misalignments:
            print(mis)
    else:
        print("All files are correctly aligned.")

# Define relative paths to the noisy and clean directories using the 'count' variable
count = 1  # User can change this as needed
noisy_directory = f'../../dataset/iteration-{count}/data/mixed/spectrogram-128-frames'
clean_directory = f'../../dataset/iteration-{count}/data/clean/spectrogram-128-frames'

# Load data and file paths, resolving paths absolutely
X_noisy, noisy_paths = load_spectrograms_into_matrix(noisy_directory, print_paths=False)
Y_clean, clean_paths = load_spectrograms_into_matrix(clean_directory, print_paths=False)

# Verify alignment of loaded data
verify_alignment(noisy_paths, clean_paths)

# Example of how to use the data
print(f"Shape of noisy data matrix: {X_noisy.shape}")
print(f"Shape of clean data matrix: {Y_clean.shape}")


All files are correctly aligned.
Shape of noisy data matrix: (5868, 1024, 128, 2)
Shape of clean data matrix: (5868, 1024, 128, 2)


In [12]:
def unet_model(input_size=(1024, 128, 2)):
    inputs = Input(input_size)

    # Encoder
    c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(inputs)
    c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)

    c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(p3)
    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(c4)
    p4 = MaxPooling2D((2, 2))(c4)

    # Bottleneck
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(p4)
    c5 = Conv2D(256, (3, 3), activation='relu', padding='same')(c5)

    # Decoder
    u4 = Conv2DTranspose(128, (3, 3), strides=(2, 2), padding='same')(c5)
    u4 = Concatenate()([u4, c4])

    u3 = Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(u4)
    u3 = Concatenate()([u3, c3])

    u2 = Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(u3)
    u2 = Concatenate()([u2, c2])

    u1 = Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(u2)
    u1 = Concatenate()([u1, c1])

    outputs = Conv2D(2, (1, 1), activation='sigmoid')(u1)
    model = Model(inputs=inputs, outputs=outputs)
    return model

model = unet_model()
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

# Print the model summary to check the architecture
model.summary()

In [13]:
# Assuming X_noisy and Y_clean are already numpy arrays properly shaped ##TODO rename X_clean
history = model.fit(X_noisy, Y_clean, batch_size=4, epochs=10, validation_split=0.2)

Epoch 1/10
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m560s[0m 474ms/step - loss: 1.4624 - mean_squared_error: 1.4624 - val_loss: 1.5170 - val_mean_squared_error: 1.5170
Epoch 2/10
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m541s[0m 461ms/step - loss: 1.4666 - mean_squared_error: 1.4666 - val_loss: 1.5170 - val_mean_squared_error: 1.5170
Epoch 3/10
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m576s[0m 490ms/step - loss: 1.4543 - mean_squared_error: 1.4543 - val_loss: 1.5170 - val_mean_squared_error: 1.5170
Epoch 4/10
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m579s[0m 493ms/step - loss: 1.4728 - mean_squared_error: 1.4728 - val_loss: 1.5170 - val_mean_squared_error: 1.5170
Epoch 5/10
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m549s[0m 467ms/step - loss: 1.4815 - mean_squared_error: 1.4815 - val_loss: 1.5170 - val_mean_squared_error: 1.5170
Epoch 6/10
[1m1174/1174[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

In [32]:
# denoised_spectrograms = model.predict(X_noisy, batch_size=4)

denoised_spectrograms = model.predict(X_noisy, batch_size=4)
print(denoised_spectrograms[0])  # Print the first denoised spectrogram to check



[1m1467/1467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m193s[0m 131ms/step
[[[0. 0.]
  [0. 0.]
  [0. 0.]
  ...
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  ...
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  ...
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 ...

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  ...
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  ...
  [0. 0.]
  [0. 0.]
  [0. 0.]]

 [[0. 0.]
  [0. 0.]
  [0. 0.]
  ...
  [0. 0.]
  [0. 0.]
  [0. 0.]]]


In [30]:
# Assuming 'denoised_spectrograms' is your array of spectrograms after prediction
# and 'noisy_paths' contains the full file paths to the original noisy spectrograms.

for i, denoised_spectrogram in enumerate(denoised_spectrograms):
    # The original file path
    original_path = noisy_paths[i]
    
    # Strip off the original directory and extension, then append '_denoised.npy'
    new_filename = os.path.splitext(os.path.basename(original_path))[0] + '_denoised.npy'
    
    # You may want to save in a new directory, so construct the path as needed
    save_directory = '/Users/Leo/Developer/Local/senior-project/dataset/iteration-1/data/output/spectrogram-128-frames'
    save_path = os.path.join(save_directory, new_filename)
    
    # Save the denoised spectrogram
    np.save(save_path, denoised_spectrogram)


In [31]:
import os
import shutil

# Define the list of instruments
instruments = ["flute", "clarinet", "oboe", "saxophone", "english-horn"]

# Path to the directory containing the files
base_directory = "/Users/Leo/Developer/Local/senior-project/dataset/iteration-1/data/output/spectrogram-128-frames"  # Update this to your actual path

# Check and create instrument subdirectories
for instrument in instruments:
    instrument_path = os.path.join(base_directory, instrument)
    if not os.path.exists(instrument_path):
        os.makedirs(instrument_path)
        print(f"Created directory for {instrument}")

# Move files into corresponding instrument subdirectories
for filename in os.listdir(base_directory):
    file_path = os.path.join(base_directory, filename)
    if os.path.isfile(file_path):  # Only deal with files
        moved = False
        for instrument in instruments:
            if instrument in filename.lower():  # Case-insensitive match
                dst = os.path.join(instrument_path, filename)
                shutil.move(file_path, dst)
                print(f"Moved {filename} to {dst}")
                moved = True
                break
        if not moved:
            print(f"No match found for {filename}")

# Organize files within each instrument directory
for instrument in instruments:
    instrument_path = os.path.join(base_directory, instrument)
    for filename in os.listdir(instrument_path):
        file_path = os.path.join(instrument_path, filename)
        if filename.endswith('_mixed_denoised.npy') and os.path.isfile(file_path):
            # Extract subdirectory name
            part_of_interest = filename.split('_mixed_denoised')[0].split('_', 1)[1]
            subdirectory_path = os.path.join(instrument_path, part_of_interest)
            if not os.path.exists(subdirectory_path):
                os.makedirs(subdirectory_path)
                print(f"Created subdirectory {subdirectory_path}")
            dst = os.path.join(subdirectory_path, filename)
            shutil.move(file_path, dst)
            print(f"Moved {filename} to {dst}")

print("File organization complete.")


Created directory for flute
Created directory for clarinet
Created directory for oboe
Created directory for saxophone
Created directory for english-horn
Moved 1_english-horn_Ds5_1_mezzo-piano_normal_mixed_denoised.npy to /Users/Leo/Developer/Local/senior-project/dataset/iteration-1/data/output/spectrogram-128-frames/english-horn/1_english-horn_Ds5_1_mezzo-piano_normal_mixed_denoised.npy
Moved 1_clarinet_D5_025_fortissimo_normal_mixed_denoised.npy to /Users/Leo/Developer/Local/senior-project/dataset/iteration-1/data/output/spectrogram-128-frames/english-horn/1_clarinet_D5_025_fortissimo_normal_mixed_denoised.npy
Moved 11_english-horn_As5_very-long_piano_normal_mixed_denoised.npy to /Users/Leo/Developer/Local/senior-project/dataset/iteration-1/data/output/spectrogram-128-frames/english-horn/11_english-horn_As5_very-long_piano_normal_mixed_denoised.npy
Moved 8_flute_Fs4_very-long_cresc-decresc_normal_mixed_denoised.npy to /Users/Leo/Developer/Local/senior-project/dataset/iteration-1/data/