In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, LeakyReLU, UpSampling1D, Concatenate, Subtract
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Cropping1D
from tensorflow.keras.layers import Reshape
from tensorflow.nn import sigmoid
import os
import librosa
import numpy as np
import random
from concurrent.futures import ProcessPoolExecutor


In [2]:
models_folder = "/Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Models/"


In [3]:
def crop(tensor, target_shape, match_feature_dim=True):
    shape = tf.shape(tensor)
    diff = shape - target_shape
    assert diff[1] >= 0 # Only positive difference allowed
    if diff[1] == 0:
        return tensor
    crop_start = diff // 2
    crop_end = diff - crop_start
    return tensor[:, crop_start[1]:-crop_end[1], :]

def AudioClip(x, training):
    if training:
        return x
    else:
        return tf.maximum(tf.minimum(x, 1.0), -1.0)

def difference_output(input_mix, featuremap, source_names, num_channels, filter_width, padding, activation, training):
    outputs = dict()
    sum_source = 0
    for name in source_names[:-1]:
        out = tf.keras.layers.Conv1D(num_channels, filter_width, activation=activation, padding=padding)(featuremap)
        outputs[name] = out
        sum_source += out

    last_source = crop(input_mix, sum_source.shape) - sum_source
    last_source = AudioClip(last_source, training)
    outputs[source_names[-1]] = last_source
    return outputs
def learned_interpolation_layer(input, padding, level):
    features = input.shape[2]
    weights = tf.Variable(tf.initializers.GlorotUniform()(shape=[features]), dtype=tf.float32, name="interp_" + str(level))
    weights_scaled = tf.nn.sigmoid(weights)
    counter_weights = 1.0 - weights_scaled

    conv_weights = tf.linalg.diag(weights_scaled)
    conv_weights = tf.expand_dims(conv_weights, axis=0)
    intermediate_vals = tf.linalg.matmul(input, conv_weights)
    
    counter_conv_weights = tf.linalg.diag(counter_weights)
    counter_conv_weights = tf.expand_dims(counter_conv_weights, axis=0)
    counter_intermediate_vals = tf.linalg.matmul(input, counter_conv_weights)

    output = tf.concat([intermediate_vals, counter_intermediate_vals], axis=1)
    
    if padding == "valid":
        output = output[:, :-1, :]

    return output


def create_encoder(input, num_layers, num_initial_filters, filter_size, input_filter_size, padding, dropout_rate=0.3):
    enc_outputs = []
    current_layer = input
    current_layer = tf.keras.layers.Conv1D(num_initial_filters, input_filter_size, strides=1, activation=LeakyReLU(), padding=padding)(current_layer)
    current_layer = tf.keras.layers.Dropout(dropout_rate)(current_layer)  # Adding dropout here
    enc_outputs.append(current_layer)

    for i in range(num_layers - 1):
        current_layer = tf.keras.layers.Conv1D(num_initial_filters + (num_initial_filters * i), filter_size, strides=1, activation=LeakyReLU(), padding=padding)(current_layer)
        current_layer = tf.keras.layers.Dropout(dropout_rate)(current_layer)  # Adding dropout here
        current_layer = current_layer[:, ::2, :]  # Decimate by factor of 2
        enc_outputs.append(current_layer)

    return enc_outputs

def create_decoder(enc_outputs, num_layers, num_initial_filters, filter_size, merge_filter_size, padding, upsampling):
    current_layer = enc_outputs[-1]

    for i in range(num_layers - 1, 0, -1):
        if upsampling == 'linear':
            current_layer = tf.keras.layers.UpSampling1D(size=2)(current_layer)
        elif upsampling == 'learned':
            current_layer = learned_interpolation_layer(current_layer, padding=padding, level=i)

        current_layer = tf.concat([current_layer, enc_outputs[i - 1]], axis=2)
        current_layer = tf.keras.layers.Conv1D(num_initial_filters * (num_layers - i), merge_filter_size, strides=1, activation=LeakyReLU(), padding=padding)(current_layer)

    return current_layer


def get_output_layer(current_layer, output_type, source_names, num_channels, output_filter_size, padding, activation, training):
    if output_type == "direct":
        return independent_outputs(current_layer, source_names, num_channels, output_filter_size, padding, activation)
    elif output_type == "difference":
        cropped_input = crop(input, current_layer.get_shape().as_list(), match_feature_dim=False)
        return difference_output(cropped_input, current_layer, source_names, num_channels, output_filter_size, padding, activation, training)
    else:
        raise NotImplementedError("Unknown output type")

def independent_outputs(featuremap, source_names, num_channels, filter_width, padding, activation):
    outputs = dict()
    for name in source_names:
        outputs[name] = tf.keras.layers.Conv1D(num_channels, filter_width, activation=activation, padding=padding)(featuremap)
    return outputs


In [5]:

def build_model():
    # Input
    input_mix = Input(shape=(num_frames, num_channels), name="input")

    # Encoder
    enc_outputs = create_encoder(input_mix, num_layers, num_initial_filters, filter_size, input_filter_size, padding)

    # Decoder
    current_layer = create_decoder(enc_outputs, num_layers, num_initial_filters, filter_size, merge_filter_size, padding, upsampling)

    # Output Layer
    outputs = get_output_layer(current_layer, output_type, source_names, num_channels, output_filter_size, padding, activation, training)

    # Build Model
    model = Model(inputs=input_mix, outputs=outputs)
    return model



In [6]:
num_frames = 16384 #* 2
num_channels = 1
num_layers = 12 #12
num_initial_filters = 24 #24
filter_size = 15 #15
merge_filter_size = 5
input_filter_size = 15
output_filter_size = 1
padding = 'same'  
upsampling = 'linear'  # or 'learned'
output_type = 'direct'  # or 'difference'
source_names = ["accompaniment", "vocals"]
activation = 'tanh'
training = True

learning_rate = 0.004
BATCH_SIZE = 128
EPOCHS = 200

# Building the model
model = build_model()

# Compile the model (if needed)
model.compile(optimizer='adam', loss='mse') # or other appropriate loss and optimizer

# Summary of the model
model.summary()


2023-08-12 11:50:08.002071: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2023-08-12 11:50:08.002092: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2023-08-12 11:50:08.002099: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2023-08-12 11:50:08.002160: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-08-12 11:50:08.002390: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input (InputLayer)          [(None, 16384, 1)]           0         []                            
                                                                                                  
 conv1d (Conv1D)             (None, 16384, 24)            384       ['input[0][0]']               
                                                                                                  
 dropout (Dropout)           (None, 16384, 24)            0         ['conv1d[0][0]']              
                                                                                                  
 conv1d_1 (Conv1D)           (None, 16384, 24)            8664      ['dropout[0][0]']             
                                                                                              

In [7]:
SAMPLE_RATE = 22050
SNIPPET_LENGTH = num_frames
tfRecord_Datasets = '/Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/tf_Record'


def parse_tfrecord_fn(example):
    feature_description = {
        'mixed_signal': tf.io.FixedLenFeature([SNIPPET_LENGTH], tf.float32),
        'vocal_signal': tf.io.FixedLenFeature([SNIPPET_LENGTH], tf.float32)
    }
    example = tf.io.parse_single_example(example, feature_description)
    return example['mixed_signal'], example['vocal_signal']

def load_dataset(filename):
    raw_dataset = tf.data.TFRecordDataset(filename)
    return raw_dataset.map(parse_tfrecord_fn)

# Load your training, validation and test data
train_dataset = load_dataset(os.path.join(tfRecord_Datasets, 'long_train.tfrecord'))
val_dataset = load_dataset(os.path.join(tfRecord_Datasets, 'long_val.tfrecord'))
test_dataset = load_dataset(os.path.join(tfRecord_Datasets, 'long_test.tfrecord'))


In [38]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
callbacks = [reduce_lr]


In [8]:
import wandb
from wandb.keras import WandbCallback

wandb.init(project='Shaking_Through_Unet_model_lr_GPU')


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
wandb: Network error (ReadTimeout), entering retry loop.
[34m[1mwandb[0m: Network error (ReadTimeout), entering retry loop.


Problem at: /Users/rei/anaconda3/envs/TflowGPU/lib/python3.11/site-packages/wandb/sdk/wandb_init.py 829 getcaller


CommError: Run initialization has timed out after 60.0 sec. 
Please refer to the documentation for additional information: https://docs.wandb.ai/guides/track/tracking-faq#initstarterror-error-communicating-with-wandb-process-

In [None]:

callbacks.append(WandbCallback(log_weights=True))
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

checkpoint = ModelCheckpoint('model.h5', save_best_only=True, monitor='val_loss')
early_stopping = EarlyStopping(monitor='val_loss', patience=3)

callbacks.extend([checkpoint, early_stopping])



In [9]:
initial_lr = 1e-8
max_lr = 0.04  # You can adjust this depending on how high you want to test
lr_multiplier = (max_lr/initial_lr)**(1/EPOCHS)

# Custom Callback for Learning Rate Test
class LearningRateRangeTest(tf.keras.callbacks.Callback):
    def __init__(self, max_lr):
        self.max_lr = max_lr
        self.lr_logs = []
        self.loss_logs = []

    def on_epoch_begin(self, epoch, logs=None):
        # Adjust the learning rate of the optimizer
        lr = initial_lr * (lr_multiplier ** epoch)
        tf.keras.backend.set_value(self.model.optimizer.lr, lr)

    def on_epoch_end(self, epoch, logs=None):
        # Record the learning rate and loss
        lr = tf.keras.backend.get_value(self.model.optimizer.lr)
        self.lr_logs.append(lr)
        self.loss_logs.append(logs["loss"])
        if lr > self.max_lr:
            self.model.stop_training = True

lr_test = LearningRateRangeTest(max_lr=max_lr)

# Compiling and Training the Model
model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=initial_lr), loss='mse')

history = model.fit(train_dataset.batch(BATCH_SIZE), 
                    epochs=EPOCHS, 
                    validation_data=val_dataset.batch(BATCH_SIZE),
                    callbacks=[lr_test])  # Only use the LR test callback

# Plot the results
import matplotlib.pyplot as plt

plt.semilogx(lr_test.lr_logs, lr_test.loss_logs)
plt.xlabel("Learning Rate")
plt.ylabel("Loss")
plt.show()

Epoch 1/200


2023-08-12 11:52:34.898804: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


In [40]:
# Define batch size and number of epochs


model.compile(optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=learning_rate), loss='mse', )

history = model.fit(train_dataset.batch(BATCH_SIZE), 
          epochs=EPOCHS, 
          validation_data=val_dataset.batch(BATCH_SIZE),
          callbacks=callbacks)


Epoch 1/200
      9/Unknown - 2593s 102s/step - loss: 1.8845 - conv1d_560_loss: 0.9403 - conv1d_561_loss: 0.9442

  saving_api.save_model(


INFO:tensorflow:Assets written to: /Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Notebooks/wandb/run-20230811_232609-l0b6az3u/files/model-best/assets


INFO:tensorflow:Assets written to: /Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Notebooks/wandb/run-20230811_232609-l0b6az3u/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Notebooks/wandb/run-20230811_232609-l0b6az3u/files/model-best)... Done. 0.4s


Epoch 2/200


  saving_api.save_model(




INFO:tensorflow:Assets written to: /Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Notebooks/wandb/run-20230811_232609-l0b6az3u/files/model-best/assets
[34m[1mwandb[0m: Adding directory to artifact (/Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Notebooks/wandb/run-20230811_232609-l0b6az3u/files/model-best)... Done. 0.3s


Epoch 3/200


  saving_api.save_model(


Epoch 4/200

KeyboardInterrupt: 

In [41]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

callbacks.extend([checkpoint, early_stopping])
history = model.fit(train_dataset.batch(BATCH_SIZE), 
          epochs=EPOCHS, 
          validation_data=val_dataset.batch(BATCH_SIZE),
          callbacks=callbacks)
