In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, Concatenate, ZeroPadding2D
from tensorflow.keras.models import Model
import os
import numpy as np


In [13]:
def load_spectrograms_into_matrix(directory, print_paths=False):
    # Resolve the absolute path of the directory to handle relative paths correctly
    abs_directory = os.path.abspath(directory)
    data_matrix = []
    file_paths = []  # Store file paths for verification

    # Walk through the directory structure
    for root, dirs, files in sorted(os.walk(abs_directory)):
        dirs.sort()  # Sort directories to maintain consistent order
        files.sort()  # Sort files to ensure consistent ordering within the same directory
        for file in files:
            if file.endswith('.npy'):
                # Generate the full path to the file
                file_path = os.path.join(root, file)
                # Optionally print file paths for verification
                if print_paths:
                    print(file_path)
                # Load the spectrogram
                spectrogram = np.load(file_path)
                # Append the spectrogram and file path to their respective lists
                data_matrix.append(spectrogram)
                file_paths.append(file_path)

    # Convert list to a numpy array for easier manipulation later
    data_matrix = np.array(data_matrix)
    return data_matrix, file_paths

def verify_alignment(noisy_paths, clean_paths):
    # Verify that each path in the noisy_paths corresponds correctly to the path in the clean_paths
    misalignments = []
    for noisy, clean in zip(noisy_paths, clean_paths):
        # Strip paths to just the file names minus the "_mixed" and extensions for comparison
        if os.path.splitext(noisy.split('/')[-1].replace('_mixed', ''))[0] != os.path.splitext(clean.split('/')[-1])[0]:
            misalignments.append((noisy, clean))

    if misalignments:
        print("Misaligned files:")
        for mis in misalignments:
            print(mis)
    else:
        print("All files are correctly aligned.")

# Define relative paths to the noisy and clean directories using the 'count' variable
count = 1  # User can change this as needed
noisy_directory = f'../../dataset/iteration-{count}/data/mixed/spectrogram-128-frames'
clean_directory = f'../../dataset/iteration-{count}/data/clean/spectrogram-128-frames'

# Load data and file paths, resolving paths absolutely
X_noisy, noisy_paths = load_spectrograms_into_matrix(noisy_directory, print_paths=False)
Y_clean, clean_paths = load_spectrograms_into_matrix(clean_directory, print_paths=False)

# Verify alignment of loaded data
verify_alignment(noisy_paths, clean_paths)

# Example of how to use the data
print(f"Shape of noisy data matrix: {X_noisy.shape}")
print(f"Shape of clean data matrix: {X_clean.shape}")


All files are correctly aligned.
Shape of noisy data matrix: (5868, 1025, 173, 2)
Shape of clean data matrix: (5868, 1025, 173, 2)


In [24]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, Concatenate, ZeroPadding2D
from tensorflow.keras.models import Model

def make_divisible_by(x, factor):
    return factor * ((x + factor - 1) // factor)

def pad_to_size(bottom_layer, size):
    h_diff = size[0] - bottom_layer.shape[1]
    w_diff = size[1] - bottom_layer.shape[2]
    padding = ((h_diff // 2, h_diff - h_diff // 2), (w_diff // 2, w_diff - w_diff // 2))
    return ZeroPadding2D(padding=padding)(bottom_layer)

def unet_model(input_size=(1025, 173, 2)):
    inputs = Input(input_size)

    target_size = (make_divisible_by(inputs.shape[1], 16), make_divisible_by(inputs.shape[2], 16))
    x = pad_to_size(inputs, target_size)

    # Encoder
    c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
    c1 = Conv2D(16, (3, 3), activation='relu', padding='same')(c1)
    p1 = MaxPooling2D((2, 2))(c1)
    
    c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(p1)
    c2 = Conv2D(32, (3, 3), activation='relu', padding='same')(c2)
    p2 = MaxPooling2D((2, 2))(c2)

    c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(p2)
    c3 = Conv2D(64, (3, 3), activation='relu', padding='same')(c3)
    p3 = MaxPooling2D((2, 2))(c3)

    # Bottleneck
    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(p3)
    c4 = Conv2D(128, (3, 3), activation='relu', padding='same')(c4)

    # Decoder
    u3 = Conv2DTranspose(64, (3, 3), strides=(2, 2), padding='same')(c4)
    u3 = Concatenate()([u3, c3])

    c5 = Conv2D(64, (3, 3), activation='relu', padding='same')(u3)
    c5 = Conv2D(64, (3, 3), activation='relu', padding='same')(c5)

    u2 = Conv2DTranspose(32, (3, 3), strides=(2, 2), padding='same')(c5)
    u2 = Concatenate()([u2, c2])

    c6 = Conv2D(32, (3, 3), activation='relu', padding='same')(u2)
    c6 = Conv2D(32, (3, 3), activation='relu', padding='same')(c6)

    u1 = Conv2DTranspose(16, (3, 3), strides=(2, 2), padding='same')(c6)
    u1 = Concatenate()([u1, c1])

    c7 = Conv2D(16, (3, 3), activation='relu', padding='same')(u1)
    c7 = Conv2D(16, (3, 3), activation='relu', padding='same')(c7)

    # Output
    outputs = Conv2D(2, (1, 1), activation='sigmoid')(c7)
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Instantiate and compile the model
# model = unet_model()
# model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])


In [25]:
model = unet_model()
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])


In [26]:
# Assuming X_noisy and Y_clean are already numpy arrays properly shaped ##TODO rename X_clean
history = model.fit(X_noisy, X_clean, batch_size=4, epochs=10, validation_split=0.2)


Epoch 1/10


ValueError: Dimensions must be equal, but are 1025 and 1040 for '{{node compile_loss/mean_squared_error/sub}} = Sub[T=DT_FLOAT](data_1, functional_1_1/conv2d_70_1/Sigmoid)' with input shapes: [?,1025,173,2], [?,1040,176,2].