In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Install and import

In [None]:
%tensorflow_version 2.x
!pip install -qU ddsp==0.14.0

In [None]:
import os
import glob
import gin

import numpy as np
import tensorflow as tf

import ddsp.training
from ddsp.colab import colab_utils
from ddsp.colab.colab_utils import play, specplot

from matplotlib import pyplot as plt
%config InlineBackend.figure_format='retina'

%reload_ext tensorboard
import tensorboard as tb

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

# Define folders

In [None]:
INSTRUMENT = 'eva' 

DRIVE_BASE_DIR = '/content/drive/My Drive/SMC 09/DDSP/eval'
DRIVE_DATASET_DIR = DRIVE_BASE_DIR + '/' + INSTRUMENT + '_dataset' 

assert os.path.exists(DRIVE_DATASET_DIR)

TRAIN_TFRECORD_FILEPATTERN = 'data/' + INSTRUMENT + '_dataset/train.tfrecord*'

# Copy dataset to Colab

In [None]:
dataset_files = glob.glob(DRIVE_DATASET_DIR + '/*')

#Copy the dataset from Drive to Colab
print("Restoring dataset for " + INSTRUMENT)
!mkdir -p 'data/'$INSTRUMENT'_dataset'
!cp "$DRIVE_DATASET_DIR"/* 'data/'$INSTRUMENT'_dataset/'

# Sanity check 1
Choose a frame, plot it, play and show F0, confidence and loudness values.


In [None]:
data_provider = ddsp.training.data.TFRecordProvider(TRAIN_TFRECORD_FILEPATTERN)
dataset = data_provider.get_dataset(shuffle=False)

try:
  ex = next(iter(dataset))
except StopIteration:
  raise ValueError(
      'TFRecord contains no examples. Please try re-running the pipeline with '
      'different audio file(s).')

colab_utils.specplot(ex['audio'])
colab_utils.play(ex['audio'])

f, ax = plt.subplots(3, 1, figsize=(12, 8))
x = np.linspace(0, 4.0, 1000)
ax[0].set_ylabel('loudness_db')
ax[0].plot(x, ex['loudness_db'])
ax[1].set_ylabel('F0_Hz')
ax[1].set_xlabel('seconds')
ax[1].plot(x, ex['f0_hz'])
ax[2].set_ylabel('F0_confidence')
ax[2].set_xlabel('seconds')
_ = ax[2].plot(x, ex['f0_confidence'])

#Set loop

In [None]:
max_steps = 20000
steps_per_loop = 6500


for hyperparams in [(20,10),(20,35),(20,65),(60,10),(60,35),(60,65),(100,10),(100,35),(100,65)]:

    harmonic_distribution, noise_magnitudes = hyperparams
  
    DRIVE_CHECKPOINTS_DIR = DRIVE_BASE_DIR + '/' + INSTRUMENT + \
                            '_h' + str(harmonic_distribution) + \
                            '_n' + str(noise_magnitudes) + \
                            '_checkpoints'

    CHECKPOINT_ZIP = INSTRUMENT +\
                            '_h' + str(harmonic_distribution) + \
                            '_n' + str(noise_magnitudes) + \
                            '_instrument.zip'

    latest = tf.train.latest_checkpoint(DRIVE_CHECKPOINTS_DIR)
    if(latest is None):
      latest = 0
    else:
      latest = int(os.path.basename(latest)[5:])

    if (latest >= max_steps):
      print("Skipping h: " + str(harmonic_distribution) + \
                            " n: " + str(noise_magnitudes) + "\n")
      continue

    num_steps = min(steps_per_loop, max_steps-latest)
    print("Training for {} steps".format(num_steps))

    !ddsp_run \
      --mode=train \
      --alsologtostderr \
      --save_dir="$DRIVE_CHECKPOINTS_DIR" \
      --gin_file='$DRIVE_BASE_DIR/singing.gin' \
      --gin_file=datasets/tfrecord.gin \
      --gin_param="TFRecordProvider.file_pattern='$TRAIN_TFRECORD_FILEPATTERN'" \
      --gin_param="batch_size=32" \
      --gin_param="train_util.train.num_steps=$num_steps" \
      --gin_param="train_util.train.steps_per_save=500" \
      --gin_param="train_util.train.steps_per_summary=500" \
      --gin_param="trainers.Trainer.checkpoints_to_keep=5" \
      --gin_param="RnnFcDecoder.output_splits = (('amps', 1),('harmonic_distribution', $harmonic_distribution),('noise_magnitudes', $noise_magnitudes))"

    latest_checkpoint_fname = os.path.basename(tf.train.latest_checkpoint(DRIVE_CHECKPOINTS_DIR))

    !cp "$DRIVE_DATASET_DIR/dataset_statistics.pkl" "$DRIVE_CHECKPOINTS_DIR"
    !cd "$DRIVE_CHECKPOINTS_DIR" && zip $CHECKPOINT_ZIP $latest_checkpoint_fname* operative_config-0.gin dataset_statistics.pkl    
    !mv "$DRIVE_CHECKPOINTS_DIR/$CHECKPOINT_ZIP" "$DRIVE_BASE_DIR/" 
    #tb.notebook.start('--logdir "{}"'.format(DRIVE_CHECKPOINTS_DIR))    

# Sanity check 2
Resynthesis of a existing frame

In [None]:
data_provider = ddsp.training.data.TFRecordProvider(TRAIN_TFRECORD_FILEPATTERN)
dataset = data_provider.get_batch(batch_size=1, shuffle=False)

try:
  batch = next(iter(dataset))
except OutOfRangeError:
  raise ValueError(
      'TFRecord contains no examples.')

# Parse the gin config.
gin_file = os.path.join(DRIVE_CHECKPOINTS_DIR, 'operative_config-0.gin')
gin.parse_config_file(gin_file)

# Load model
model = ddsp.training.models.Autoencoder()
model.restore(DRIVE_CHECKPOINTS_DIR)

# Original audio
audio_orig = batch['audio']

# Resynthesized audio

#v0.14
outputs = model(batch, training=False)
audio_gen = model.get_audio_from_outputs(outputs)

#v0.13
#audio_gen = model(batch, training=False)
#audio_gen = audio_gen['add']['signal']
#audio_gen = audio_gen['additive']['signal']
#audio_gen = audio_gen['filtered_noise']['signal']

print('Original Audio')
specplot(audio_orig)
play(audio_orig)

print('Resynthesis')
specplot(audio_gen)
play(audio_gen)