In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
DRIVE_BASE_DIR = '/SMC 09/DDSP' #@param {type: "string"}
INSTRUMENT = 'violin' #@param {type: "string"}

# Install and import
Run this cell to install the DDSP libraries (ignore apache-beam errors), import the Python libraries and check if the notebook is running on a GPU.

In [None]:
%tensorflow_version 2.x
!pip install -qU ddsp[data_preparation]==1.6.3

In [None]:
import os
import glob
import gin

import numpy as np
import tensorflow as tf

import ddsp.training
from ddsp.colab import colab_utils
from ddsp.colab.colab_utils import play, specplot

from matplotlib import pyplot as plt

%config InlineBackend.figure_format='retina'

%reload_ext tensorboard
import tensorboard as tb

# FRAME_RATE = 10
FRAME_RATE = 250

In [None]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

In [None]:
def get_frame(fromDataset = True, frame_rate=250):
  data_provider = ddsp.training.data.TFRecordProvider(TRAIN_TFRECORD_FILEPATTERN, frame_rate=frame_rate)
  if fromDataset:
    dataset = data_provider.get_dataset(shuffle=False)
  else:
    dataset = data_provider.get_batch(batch_size=1, shuffle=True)

  try:
    frame = next(iter(dataset))
    return frame
  except StopIteration:
    raise ValueError(
        'TFRecord contains no examples. Please try re-running the pipeline with '
        'different audio file(s).')
  except OutOfRangeError:
    raise ValueError(
        'TFRecord contains no examples. Please try re-running the pipeline with '
        'different audio file(s).')    

# Define folders
Point DRIVE_BASE_DIR to a folder in Google Drive. DRIVE_BASE_DIR should contain a folder called "audio". Create a folder inside "audio" for each instrument, and put there the mp3 and wav files that will be used to train the model. 

*For example, to train the model with flute sounds, create DRIVE_BASE_DIR/audio/flute and put there around 15-20 minutes worth of flute sounds. It is better to split the audio files into smaller pieces (2-4 minutes are ok) than having a single 20-minute long audio file.*

In [None]:
DRIVE_BASE_DIR = '/content/drive/My Drive' + DRIVE_BASE_DIR
DRIVE_AUDIO_DIR = DRIVE_BASE_DIR + '/audio'
assert os.path.exists(DRIVE_AUDIO_DIR + '/' + INSTRUMENT)

COLAB_AUDIO_DIR = 'data/' + INSTRUMENT + '_audio' 
!mkdir -p "$COLAB_AUDIO_DIR"
AUDIO_FILEPATTERN = COLAB_AUDIO_DIR + '/*'

DRIVE_CHECKPOINTS_DIR = os.path.join(DRIVE_AUDIO_DIR, INSTRUMENT+'_checkpoints')
!mkdir -p "$DRIVE_CHECKPOINTS_DIR"
DRIVE_DATASET_DIR = DRIVE_AUDIO_DIR + '/' + INSTRUMENT + '_dataset' 

COLAB_TRAIN_TFRECORD = 'data/' + INSTRUMENT + '_dataset/train.tfrecord'
TRAIN_TFRECORD_FILEPATTERN = COLAB_TRAIN_TFRECORD + '*'

!mkdir -p "$DRIVE_BASE_DIR/instruments/"

# Prepare dataset
If no dataset is present (i.e. folder DRIVE_BASE_DIR/audio/INSTRUMENT_dataset is empty) the audio files are copied to Colab, the dataset is created, and copied back to Drive for safekeeping.

If the dataset is present, the dataset files are copied to Colab.

In [None]:
dataset_files = glob.glob(DRIVE_DATASET_DIR + '/*')

if len(dataset_files) == 0:  

  #Copy audio files
  mp3_files = glob.glob(os.path.join(DRIVE_AUDIO_DIR + '/' + INSTRUMENT, '*.mp3'))
  wav_files = glob.glob(os.path.join(DRIVE_AUDIO_DIR + '/' + INSTRUMENT, '*.wav'))
  audio_files = mp3_files + wav_files
  #TODO: remove parenthesis in filenames
  for fname in audio_files:
    target_name = os.path.join(COLAB_AUDIO_DIR, 
                               os.path.basename(fname).replace(' ', '_').replace('\'', '_'))
    print('Copying {} to {}'.format(fname, target_name))
    !cp "$fname" $target_name
  

  #Create dataset
  print("Creating " + INSTRUMENT + " dataset")
  if not glob.glob(AUDIO_FILEPATTERN):
    raise ValueError('No audio files found')
  
  !ddsp_prepare_tfrecord \
    --input_audio_filepatterns=$AUDIO_FILEPATTERN \
    --output_tfrecord_path=$COLAB_TRAIN_TFRECORD \
    --num_shards=10 \
    --alsologtostderr \
    --frame_rate=$FRAME_RATE
  

  #Copy dataset to drive for safe-keeping.
  !mkdir "$DRIVE_DATASET_DIR"/
  print('Saving to {}'.format(DRIVE_DATASET_DIR))
  !cp $TRAIN_TFRECORD_FILEPATTERN "$DRIVE_DATASET_DIR"/
  
  data_provider = ddsp.training.data.TFRecordProvider(TRAIN_TFRECORD_FILEPATTERN, frame_rate=FRAME_RATE)
  dataset = data_provider.get_dataset(shuffle=False)
  
  PICKLE_FILE_PATH = os.path.join(DRIVE_CHECKPOINTS_DIR, 'dataset_statistics.pkl')
  colab_utils.save_dataset_statistics(data_provider, PICKLE_FILE_PATH)

else:

  #Copy the dataset from Drive to Colab
  print("Restoring dataset for " + INSTRUMENT)
  !mkdir -p 'data/'$INSTRUMENT'_dataset'
  !cp "$DRIVE_DATASET_DIR"/* 'data/'$INSTRUMENT'_dataset/'

# Sanity check 1
Choose a frame, plot it, play and show F0, confidence and loudness values.


In [None]:
frame = get_frame(frame_rate=FRAME_RATE)

colab_utils.specplot(frame['audio'])
colab_utils.play(frame['audio'])

f, ax = plt.subplots(3, 1, figsize=(12, 8))
x = np.linspace(0, 4.0, frame['loudness_db'].shape[0])
ax[0].set_ylabel('loudness_db')
ax[0].plot(x, frame['loudness_db'])
ax[1].set_ylabel('f0_Hz')
ax[1].plot(x, frame['f0_hz'])
ax[2].set_ylabel('f0_confidence')
ax[2].set_xlabel('seconds')
_ = ax[2].plot(x, frame['f0_confidence'])

# Launch TensorBoard

In [None]:
tb.notebook.start('--logdir "{}"'.format(DRIVE_CHECKPOINTS_DIR))

# Train the model

In [None]:
#TODO: make an easier way to choose gin file for training

tf_file_pattern = f"TFRecordProvider.file_pattern='{TRAIN_TFRECORD_FILEPATTERN}'"
tf_frame_rate = f"TFRecordProvider.frame_rate={FRAME_RATE}"

!ddsp_run \
  --mode=train \
  --alsologtostderr \
  --save_dir="$DRIVE_CHECKPOINTS_DIR" \
  --gin_file="$DRIVE_AUDIO_DIR/singing_z.gin" \
  --gin_file=datasets/tfrecord.gin \
  --gin_param="$tf_file_pattern" \
  --gin_param="$tf_frame_rate" \
  --gin_param="batch_size=32" \
  --gin_param="train_util.train.num_steps=3000" \
  --gin_param="train_util.train.steps_per_save=100" \
  --gin_param="train_util.train.steps_per_summary=100" \
  --gin_param="trainers.Trainer.checkpoints_to_keep=5"

#  --gin_file='$DRIVE_AUDIO_DIR/singing.gin' \
#  --gin_file='$DRIVE_AUDIO_DIR/singing_z.gin' \

# Sanity check 2
Resynthesis of a existing frame.

*Since the v1.0 update, an error regarding not been able to find the gin file appears from time to time. Just rerun the cell and it will be fine (!)*

In [None]:
batch = get_frame(fromDataset=False, frame_rate=FRAME_RATE)


# Parse the gin config.
gin_file = os.path.join(DRIVE_CHECKPOINTS_DIR, 'operative_config-0.gin')
gin.parse_config_file(gin_file)

# Load model
model = ddsp.training.models.Autoencoder()
model.restore(DRIVE_CHECKPOINTS_DIR)

# Original audio
audio_orig = batch['audio']

# Resynthesized audio
outputs = model(batch, training=False)
audio_gen = model.get_audio_from_outputs(outputs)

print('Original Audio')
specplot(audio_orig)
play(audio_orig)

print('Resynthesis')
specplot(audio_gen)
play(audio_gen)

#Export instrument
The model is zipped and copied back to Google Drive.

In [None]:
CHECKPOINT_ZIP = INSTRUMENT + '_instrument.zip'
latest_checkpoint_fname = os.path.basename(tf.train.latest_checkpoint(DRIVE_CHECKPOINTS_DIR))
!cd "$DRIVE_CHECKPOINTS_DIR" && zip $CHECKPOINT_ZIP $latest_checkpoint_fname* operative_config-0.gin dataset_statistics.pkl
!cp "$DRIVE_CHECKPOINTS_DIR/$CHECKPOINT_ZIP" "$DRIVE_BASE_DIR/instruments/" 

#Colab clean-up
Useful if running out of space in Colab. Be sure to save everything to local before uncommenting and executing: there is no undo.

In [None]:
#!rm -r /content/data
