In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, LeakyReLU, UpSampling1D, Concatenate, Subtract
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Cropping1D
from tensorflow.keras.layers import Reshape
from tensorflow.nn import sigmoid
import os
import librosa
import numpy as np
import random
from concurrent.futures import ProcessPoolExecutor


In [2]:
models_folder = "/Users/rei/Documents/Machine_Learning/MODELS/Unet/Unet_Sound_Seperation/Unet-Sound-Seperation/Models/"


In [3]:
def crop(tensor, target_shape, match_feature_dim=True):
    shape = tf.shape(tensor)
    diff = shape - target_shape
    assert diff[1] >= 0 # Only positive difference allowed
    if diff[1] == 0:
        return tensor
    crop_start = diff // 2
    crop_end = diff - crop_start
    return tensor[:, crop_start[1]:-crop_end[1], :]

def AudioClip(x, training):
    if training:
        return x
    else:
        return tf.maximum(tf.minimum(x, 1.0), -1.0)

def difference_output(input_mix, featuremap, source_names, num_channels, filter_width, padding, activation, training):
    outputs = dict()
    sum_source = 0
    for name in source_names[:-1]:
        out = tf.keras.layers.Conv1D(num_channels, filter_width, activation=activation, padding=padding)(featuremap)
        outputs[name] = out
        sum_source += out

    last_source = crop(input_mix, sum_source.shape) - sum_source
    last_source = AudioClip(last_source, training)
    outputs[source_names[-1]] = last_source
    return outputs

In [4]:
def learned_interpolation_layer(input, padding, level):
    features = input.shape[2]
    weights = tf.Variable(tf.initializers.GlorotUniform()(shape=[features]), dtype=tf.float32, name="interp_" + str(level))
    weights_scaled = tf.nn.sigmoid(weights)
    counter_weights = 1.0 - weights_scaled

    conv_weights = tf.linalg.diag(weights_scaled)
    conv_weights = tf.expand_dims(conv_weights, axis=0)
    intermediate_vals = tf.linalg.matmul(input, conv_weights)
    
    counter_conv_weights = tf.linalg.diag(counter_weights)
    counter_conv_weights = tf.expand_dims(counter_conv_weights, axis=0)
    counter_intermediate_vals = tf.linalg.matmul(input, counter_conv_weights)

    output = tf.concat([intermediate_vals, counter_intermediate_vals], axis=1)
    
    if padding == "valid":
        output = output[:, :-1, :]

    return output



In [5]:
def create_encoder(input, num_layers, num_initial_filters, filter_size, input_filter_size, padding, dropout_rate=0.3):
    enc_outputs = []
    current_layer = input
    current_layer = tf.keras.layers.Conv1D(num_initial_filters, input_filter_size, strides=1, activation=LeakyReLU(), padding=padding)(current_layer)
    current_layer = tf.keras.layers.Dropout(dropout_rate)(current_layer)  # Adding dropout here
    enc_outputs.append(current_layer)

    for i in range(num_layers - 1):
        current_layer = tf.keras.layers.Conv1D(num_initial_filters + (num_initial_filters * i), filter_size, strides=1, activation=LeakyReLU(), padding=padding)(current_layer)
        current_layer = tf.keras.layers.Dropout(dropout_rate)(current_layer)  # Adding dropout here
        current_layer = current_layer[:, ::2, :]  # Decimate by factor of 2
        enc_outputs.append(current_layer)

    return enc_outputs


In [6]:
def create_decoder(enc_outputs, num_layers, num_initial_filters, filter_size, merge_filter_size, padding, upsampling, dropout_rate=0.3):
    current_layer = enc_outputs[-1]

    for i in range(num_layers - 1, 0, -1):
        if upsampling == 'linear':
            current_layer = tf.keras.layers.UpSampling1D(size=2)(current_layer)
        elif upsampling == 'learned':
            current_layer = learned_interpolation_layer(current_layer, padding=padding, level=i)

        current_layer = tf.concat([current_layer, enc_outputs[i - 1]], axis=2)
        current_layer = tf.keras.layers.Conv1D(num_initial_filters * (num_layers - i), merge_filter_size, strides=1, activation=LeakyReLU(), padding=padding)(current_layer)
        current_layer = tf.keras.layers.Dropout(dropout_rate)(current_layer)  # Adding dropout here

    return current_layer


In [7]:
def get_output_layer(current_layer, output_type, source_names, num_channels, output_filter_size, padding, activation, training):
    if output_type == "direct":
        return independent_outputs(current_layer, source_names, num_channels, output_filter_size, padding, activation)
    elif output_type == "difference":
        cropped_input = crop(input, current_layer.get_shape().as_list(), match_feature_dim=False)
        return difference_output(cropped_input, current_layer, source_names, num_channels, output_filter_size, padding, activation, training)
    else:
        raise NotImplementedError("Unknown output type")

def independent_outputs(featuremap, source_names, num_channels, filter_width, padding, activation):
    outputs = dict()
    for name in source_names:
        outputs[name] = tf.keras.layers.Conv1D(num_channels, filter_width, activation=activation, padding=padding)(featuremap)
    return outputs


In [8]:
def build_model(num_layers, num_initial_filters, filter_size, input_filter_size):
    # Input
    input_mix = Input(shape=(num_frames, num_channels), name="input")

    # Encoder
    enc_outputs = create_encoder(input_mix, num_layers, num_initial_filters, filter_size, input_filter_size, padding)

    # Decoder
    current_layer = create_decoder(enc_outputs, num_layers, num_initial_filters, filter_size, merge_filter_size, padding, upsampling)

    # Output Layer
    outputs = get_output_layer(current_layer, output_type, source_names, num_channels, output_filter_size, padding, activation, training)

    # Build Model
    model = Model(inputs=input_mix, outputs=outputs)
    return model
  

In [9]:
SAMPLE_RATE = 22050
SNIPPET_LENGTH = 16384
tfRecord_Datasets = '/Users/rei/Documents/Machine_Learning/Data/Audio/Shaking_Through/Generated_datasets/tf_Record'


def parse_tfrecord_fn(example):
    feature_description = {
        'mixed_signal': tf.io.FixedLenFeature([SNIPPET_LENGTH], tf.float32),
        'vocal_signal': tf.io.FixedLenFeature([SNIPPET_LENGTH], tf.float32)
    }
    example = tf.io.parse_single_example(example, feature_description)
    return example['mixed_signal'], example['vocal_signal']

def load_dataset(filename):
    raw_dataset = tf.data.TFRecordDataset(filename)
    return raw_dataset.map(parse_tfrecord_fn)

# Load your training, validation and test data
train_dataset = load_dataset(os.path.join(tfRecord_Datasets, 'train.tfrecord'))
val_dataset = load_dataset(os.path.join(tfRecord_Datasets, 'val.tfrecord'))
test_dataset = load_dataset(os.path.join(tfRecord_Datasets, 'test.tfrecord'))


In [10]:
sweep_config = {
    'name': 'unet-separator-sweep',
    'method': 'random',
    'metric': {
        'name': 'val_loss',
        'goal': 'minimize'
    },
    'parameters': {
        'num_layers': {
            'values': [6, 12, 15]
        },
        'num_initial_filters': {
            'values': [12, 24, 48]
        },
        'filter_size': {
            'values': [7, 15, 30]
        },
        'BATCH_SIZE': {
            'values': [16, 32, 64]
        },
        'learning_rate': {
            'values': [0.002, 0.001, 0.0005]
        },
        'lr_scheduler': {
            'values': ['ReduceLROnPlateau', 'ExponentialDecay']
        }
    }
}
import wandb
wandb.init(config=sweep_config)

wandb.log({'loss': train_loss, 'val_loss': val_loss, 'accuracy': accuracy})
from tensorflow.keras.callbacks import EarlyStopping

earlystop_callback = EarlyStopping(
  monitor='val_loss', min_delta=0.0001, patience=3)
from tensorflow.keras.callbacks import ReduceLROnPlateau

lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3)
import tensorflow as tf
from tensorflow.keras.callbacks import LearningRateScheduler

def exponential_decay(lr0, s):
    def exponential_decay_fn(epoch):
        return lr0 * 0.1**(epoch / s)
    return exponential_decay_fn

exponential_decay_fn = exponential_decay(0.01, 20)
lr_callback = LearningRateScheduler(exponential_decay_fn)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mreinert-wasserman[0m. Use [1m`wandb login --relogin`[0m to force relogin


NameError: name 'train_loss' is not defined

In [None]:
num_frames = 16384
num_channels = 1
num_layers = wandb.config.num_layers
num_initial_filters = wandb.config.num_initial_filters
filter_size = wandb.config.filter_size
merge_filter_size = 5
input_filter_size = 15
output_filter_size = 1
padding = 'same'  
upsampling = 'linear'  # or 'learned'
output_type = 'direct'  # or 'difference'
source_names = ["accompaniment", "vocals"]
activation = 'tanh'
training = True

In [None]:
sweep_id = wandb.sweep(sweep_config, project="Shaking_Through_Unet_model")
wandb.agent(sweep_id, function=train_function)  # Replace train_function with the name of your training function



In [None]:
def train_function():
    # Initialize wandb
    run = wandb.init()
    config = run.config

    # Define your model using the hyperparameters from wandb's config
model = build_model(config.num_layers, config.num_initial_filters, config.filter_size, config.input_filter_size)

    # Define your optimizer and compile model
    optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=config.learning_rate)
    model.compile(optimizer=optimizer, loss='mse', metrics=['mse', 'mae', 'accuracy'])  # Added Mean Absolute Error (mae) and accuracy as metrics

    # Define your callbacks including EarlyStopping and LearningRateScheduler
    callbacks_list = [wandb.keras.WandbCallback()]  # WandB's callback to log metrics
    earlystop_callback = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=3)
    callbacks_list.append(earlystop_callback)

    if config.lr_scheduler == 'ReduceLROnPlateau':
        lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3)
    elif config.lr_scheduler == 'ExponentialDecay':
        exponential_decay_fn = exponential_decay(0.01, 20)
        lr_callback = LearningRateScheduler(exponential_decay_fn)
    callbacks_list.append(lr_callback)

    # Train your model
    model.fit(train_dataset.batch(config.BATCH_SIZE),
              validation_data=val_dataset.batch(config.BATCH_SIZE), 
              epochs=10, 
              batch_size=config.BATCH_SIZE, 
              callbacks=callbacks_list)

    # Close the run
    run.finish()
