In [5]:
%load_ext autoreload
%autoreload 2

import os, yaml
import sys

PROJECT_DIR = os.path.dirname(os.getcwd())
sys.path.append(PROJECT_DIR)

import numpy as np

import tensorflow as tf

from encoders import SupervisedEncoder
from decoders import DecoderWithoutLatent, DecoderWithLatent
from models import SupervisedAutoencoder, UnsupervisedAutoencoder
from losses import SpectralLoss, MultiLoss
from preprocessing import F0LoudnessPreprocessor, MidiF0LoudnessPreprocessor
from dataloader import make_unsupervised_dataset, make_supervised_dataset

from train_utils import make_supervised_model, make_unsupervised_model, create_callbacks, make_optimizer
from train_utils import make_supervised_dataset_from_config, make_unsupervised_dataset_from_config
from feature_extraction import process_track

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
with open('../configs/Supervised_Latent_Violin_Timesteps.yaml', 'r') as file:
    config = dict(yaml.load(file, Loader=yaml.FullLoader))

In [None]:
train_set, validation_set, _ = make_supervised_dataset('../audio_clips/Violin_short', #config['data']['path']
                                                # extract mfcc if there is an encoder
                                                mfcc=config['model']['encoder'],
                                                batch_size=config['training']['batch_size'],
                                                sample_rate=config['data']['sample_rate'])

In [None]:
train_set, validation_set, _ = make_supervised_dataset_from_config(config)

## Supervised DDSP

In [None]:
model = make_supervised_model(config)
optimizer = make_optimizer(config)

model.compile(optimizer)
print('Model Compiled.')
history = model.fit(train_set,
                    validation_data=validation_set,
                    #callbacks = callbacks,
                    epochs=config['training']['epochs'])

In [None]:
preprocessing_timesteps = 250
decoder_timesteps = 1000

preprocessor = F0LoudnessPreprocessor(timesteps=preprocessing_timesteps)

# Without Latent
encoder = None
decoder = DecoderWithoutLatent(timesteps=decoder_timesteps)

# With Latent
#encoder = SupervisedEncoder()
#decoder = DecoderWithLatent(timesteps=decoder_timesteps)

# Choose a loss
loss = SpectralLoss(logmag_weight=1.0)
#loss = MultiLoss()

tracker_names = ['spec_loss'] if loss.name=='spectral_loss' else ['spec_loss', 'perc_loss', 'total_loss']

model = SupervisedAutoencoder(preprocessor=preprocessor,
                            encoder=encoder,
                            decoder=decoder,
                            loss_fn=loss,
                            tracker_names=tracker_names,
                            add_reverb=True)
                                
adam = Adam(learning_rate=ExponentialDecay(1e-3, decay_steps=10000, decay_rate=0.98))

model_dir = "model_checkpoints/yeah_{}".format(config['run_name'])
os.makedirs(model_dir, exist_ok=True)

#callbacks = [ModelCheckpoint(filepath=os.path.join(model_dir, 'model.ckpt'),
#                              monitor='val_spec_loss' if loss.name=='spectral_loss' else 'val_total_loss',
#                              save_best_only=True)]

callbacks = [ModelCheckpoint(model_dir,
               monitor='val_spec_loss' if loss.name=='spectral_loss' else 'val_total_loss')]

#csv_logger = tf.keras.callbacks.CSVLogger("logs/{}.csv".format(RUN_NAME), separator=",", append=False)
#callbacks = [ModelCheckpoint(model, RUN_NAME), csv_logger]
#callbacks.append(CustomWandbCallback(RUN_NAME)) # uncomment for WANDB

model.compile(adam)

In [None]:
history = model.fit(train_set, validation_data=validation_set, epochs=10, callbacks=callbacks)

In [None]:
preprocessing_timesteps = 250
decoder_timesteps = 1000

preprocessor = F0LoudnessPreprocessor(timesteps=preprocessing_timesteps)

# Without Latent
encoder = None
decoder = DecoderWithoutLatent(timesteps=decoder_timesteps)

# With Latent
#encoder = SupervisedEncoder()
#decoder = DecoderWithLatent(timesteps=decoder_timesteps)

# Choose a loss
loss = SpectralLoss(logmag_weight=1.0)
#loss = MultiLoss()

tracker_names = ['spec_loss'] if loss.name=='spectral_loss' else ['spec_loss', 'perc_loss', 'total_loss']

model = SupervisedAutoencoder(preprocessor=preprocessor,
                            encoder=encoder,
                            decoder=decoder,
                            loss_fn=loss,
                            tracker_names=tracker_names,
                            add_reverb=True)

## Unsupervised DDSP

In [None]:
from metrics import f0_midi_scaled_L1_loss

In [17]:
with open('../configs/Unsupervised_Violin.yaml', 'r') as file:
    config = dict(yaml.load(file, Loader=yaml.FullLoader))

In [75]:
train, val, _ = make_unsupervised_dataset_from_config(config)

  - 0.5 * np.log10(f_sq + const[3])


In [80]:
model = make_unsupervised_model(config)
optimizer = make_optimizer(config)

model.compile(optimizer) #, metrics = [f0_midi_scaled_L1_loss]

In [81]:
history = model.fit(train, 
                    validation_data=val, 
                    epochs=10)

Epoch 1/10
z: (None, 250, 32)
ld_scaled: (None, 250, 1)
freq_weights: (None, 125, 1, 128)


ValueError: in user code:

    /kuacc/users/hbalim15/.local/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /scratch/users/hbalim15/ddsp/models.py:85 train_step  *
        x_pred = self(x, training=True)
    /scratch/users/hbalim15/ddsp/models.py:75 call  *
        features = self.encode(features)
    /scratch/users/hbalim15/ddsp/models.py:166 encode  *
        outputs = self.encoder(features)
    /scratch/users/hbalim15/ddsp/encoders.py:49 call  *
        f0_hz = self.encoder_f(features)
    /scratch/users/hbalim15/ddsp/encoders.py:117 call  *
        freq_weights = self.resample(freq_weights)
    /scratch/users/hbalim15/ddsp/encoders.py:147 resample  *
        return resample(x, self.timesteps, method="window")
    /scratch/users/hbalim15/ddsp/dsp_utils/core.py:543 resample  *
        outputs = upsample_with_windows(inputs, n_timesteps, add_endpoint)
    /scratch/users/hbalim15/ddsp/dsp_utils/core.py:583 upsample_with_windows  *
        raise ValueError('Upsample_with_windows() only supports 3 dimensions, '

    ValueError: Upsample_with_windows() only supports 3 dimensions, not (None, 125, 1, 128).


In [None]:
timesteps = 250

encoder_z = Encoder_z(timesteps=timesteps)
encoder_f0 = Encoder_f(timesteps=timesteps)

decoder = DecoderWithLatent()

preprocessor = MidiF0LoudnessPreprocessor(timesteps=timesteps)

#loss = SpectralLoss(logmag_weight=1.0)
loss = MultiLoss(logmag_weight=1.0, perceptual_loss_weight=38)

metric_fns = {"F0_recons_L1": f0_midi_scaled_L1_loss}

model = UnsupervisedAutoencoder(preprocessor=preprocessor,
                                encoder_f0=encoder_f0,
                                encoder_z=encoder_z,
                                decoder=decoder,
                                loss_fn=loss,
                                tracker_names=["total_loss", "spec_loss", "perc_loss","F0_recons_L1"],
                                metric_fns=metric_fns)

decay = ExponentialDecay(1e-3, decay_steps=10000, decay_rate=0.98)
adam = Adam(learning_rate=decay)

csv_logger = tf.keras.callbacks.CSVLogger("logs/{}.csv".format(RUN_NAME), separator=",", append=False)

callbacks = [ModelCheckpoint(model, RUN_NAME), csv_logger, CustomWandbCallback(RUN_NAME)]

model.compile(adam, metrics = [f0_midi_scaled_L1_loss])

In [None]:
history = model.fit(train_set, 
                    validation_data=validation_set,
                    callbacks=callbacks, 
                    epochs=1000,
                    steps_per_epoch=train_set.my_len,
                    validation_steps=validation_set.my_len)

# Unsupervised

In [None]:
train, val, test = make_unsupervised_dataset('../audio_clips/Violin_short')

In [None]:
for batch in train:
    break

In [None]:
from preprocessing import UnsupervisedPreprocessor
from train_utils import make_unsupervised_model

In [None]:
preprocessor = UnsupervisedPreprocessor(timesteps=250)

In [None]:
b = preprocessor(batch)

In [None]:
b.keys()

In [None]:
with open('../configs/Unsupervised_Violin.yaml') as file:
    config = dict(yaml.load(file, Loader=yaml.FullLoader))

In [None]:
model = make_unsupervised_model(config)
optimizer = Adam(learning_rate=ExponentialDecay(config['optimizer']['lr'],
                            decay_steps=config['optimizer']['decay_steps'],
                            decay_rate=config['optimizer']['decay_rate']))

# Model Saving and Experiment Tracking
if config['loss']['type'] == 'spectral':
    monitor = 'val_spec_loss'
else:
    monitor = 'val_total_loss' 
#callbacks = create_callbacks(config, monitor)

# Compile and train
model.compile(optimizer)
print('Model Compiled.')
history = model.fit(train,
                    validation_data=val,
                    #callbacks=callbacks,
                    epochs=config['training']['epochs']) 

# TODO:

supervised data\
supervised model

interpolation/extrapolation (optional) \
test metrics


In [None]:
#k_filters = [128]*2 + [256]*3 + [512]*4 + [1024]*3
#s_freqs = [1,1,2]*2 + [1,1,1,2] + [1,1]



In [None]:
model.load("SupervisedViolinModel/300/model.ckpt")

In [None]:
from dsp_utils.spectral_ops import compute_mfcc, compute_logmel, compute_loudness, compute_f0
def calculate_recons_f0_error(dataset):
    it = iter(dataset)
    preds,truth = [],[]
    for batch in it:
        pred = model(batch)
        preds.append(pred["audio_synth"].numpy())
        truth.append(pred["inputs"]["f0_hz"].numpy())
    pred_f0 = [compute_f0(p[0], 16000, 250, viterbi=True)[0] for p in preds]
    error = np.mean(np.abs(np.array(truth)[:,0,:,0]-np.array(pred_f0)))
    hz_to_midi = core.hz_to_midi
    F0_RANGE = spectral_ops.F0_RANGE
    return hz_to_midi(error)/F0_RANGE
calculate_recons_f0_error(train_set)