# Loading Data

In [None]:
!pwd

In [None]:
%load_ext autoreload
%autoreload 2

from ddsp.colab import jupyter_utils
import ddsp.training
from matplotlib import pyplot as plt
import numpy as np
import os

TRAIN_TFRECORD_FILEPATTERN = os.environ.get("URMP_MONO")
data_provider = ddsp.training.data.TFRecordProvider(TRAIN_TFRECORD_FILEPATTERN)
print(data_provider is ddsp.training.data.DataProvider)
dataset = data_provider.get_batch(4)

for i in iter(dataset):
    for k, v in i.items():
        print(k, v.shape)
    break

try:
    ex = next(iter(dataset))
except StopIteration:
    raise ValueError(
      'TFRecord contains no examples. Please try re-running the pipeline with '
      'different audio file(s).')

ex = next(iter(dataset))
jupyter_utils.show_audio(ex['audio'][0])

f, ax = plt.subplots(3, 1, figsize=(14, 4))
x = np.linspace(0, 4.0, 1000)
ax[0].set_ylabel('loudness_db')
ax[0].plot(x, ex['loudness_db'][0])
ax[1].set_ylabel('F0_Hz')
ax[1].set_xlabel('seconds')

ax[1].plot(x, ex['f0_hz'][0])
ax[2].set_ylabel('F0_confidence')
ax[2].set_xlabel('seconds')
ax[2].plot(x, ex['f0_confidence'][0])

# Init Model

In [None]:
%reload_ext tensorboard
import tensorboard as tb
import warnings
warnings.filterwarnings("ignore")

import time

import ddsp
from ddsp.training import (data, decoders, encoders, models, preprocessing, 
                           train_util, trainers, discriminator)
from ddsp.training.models.lsgan import LSGAN
import gin
import matplotlib.pyplot as plt
import numpy as np
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
from ddsp.synths import TensorToAudio

gin.enter_interactive_mode()



SAVE_DIR = "artifacts/tmp"

# tb.notebook.start('--logdir "{}"'.format(SAVE_DIR))

sample_rate = 16000
n_samples = 4*sample_rate

strategy = train_util.get_strategy()

with strategy.scope():
    TIME_STEPS = 1000

    # Create Neural Networks.
    preprocessor = preprocessing.F0LoudnessPreprocessor(time_steps=TIME_STEPS)

    decoder = decoders.UntitledGAN(input_keys = ('ld_scaled', 'f0_hz', 'z'))

    # Create ProcessorGroup.
    dag = [(TensorToAudio(), ['audio_tensor'])]

    processor_group = ddsp.processors.ProcessorGroup(dag=dag,
                                                    name='processor_group')


    # Loss_functions
    spectral_loss = ddsp.losses.SpectralLoss(loss_type='L1',
                                            mag_weight=1.0,
                                            logmag_weight=1.0)
    
    encoder = encoders.MfccTimeConstantRnnEncoder(rnn_channels = 512, rnn_type = 'gru', z_dims = 16, z_time_steps = 125)
    
    critic = discriminator.ParallelWaveGANDiscriminator(input_keys=['discriminator_audio', 'f0_hz', 'loudness_db'])
    model = LSGAN(preprocessor=preprocessor,
                                encoder=encoder,
                                decoder=decoder,
                                processor_group=processor_group,
                                discriminator=critic,
                                losses=[spectral_loss])
    trainer = trainers.Trainer(model, strategy, checkpoints_to_keep=1)
    dataset = trainer.distribute_dataset(dataset)
    trainer.build(next(iter(dataset)))
    dataset_iter = iter(dataset)
    
    for i in range(10):
        losses = trainer.train_step(dataset_iter)
        res_str = 'step: {}\t'.format(i)
        for k, v in losses.items():
            res_str += '{}: {:.2f}\t'.format(k, v)
        print(res_str)

        if i % 300 == 1:
            print(f"Step {i}")
            trainer.save(SAVE_DIR)

In [None]:
ex = next(dataset_iter)
noise = model(ex)
jupyter_utils.show_audio(noise['audio_synth'][3])