In [1]:
!pip install -U -q PyDrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('/content/sounds')
try:
  os.makedirs(local_download_path)
except: pass

# obtain the training data from google drive
download = drive.CreateFile({'id': '1lKb0ORrlnTkFG9PXRvU7LYW2svy6K9w_'})
name = os.path.join(local_download_path, 'split-mp3.zip')
download.GetContentFile(name)


[?25l[K    1% |▎                               | 10kB 21.7MB/s eta 0:00:01[K    2% |▋                               | 20kB 1.8MB/s eta 0:00:01[K    3% |█                               | 30kB 2.7MB/s eta 0:00:01[K    4% |█▎                              | 40kB 1.7MB/s eta 0:00:01[K    5% |█▋                              | 51kB 2.1MB/s eta 0:00:01[K    6% |██                              | 61kB 2.5MB/s eta 0:00:01[K    7% |██▎                             | 71kB 2.9MB/s eta 0:00:01[K    8% |██▋                             | 81kB 3.3MB/s eta 0:00:01[K    9% |███                             | 92kB 3.7MB/s eta 0:00:01[K    10% |███▎                            | 102kB 2.8MB/s eta 0:00:01[K    11% |███▋                            | 112kB 2.9MB/s eta 0:00:01[K    12% |████                            | 122kB 4.0MB/s eta 0:00:01[K    13% |████▎                           | 133kB 4.0MB/s eta 0:00:01[K    14% |████▋                           | 143kB 7.7MB/s eta 0:00:01[

In [0]:
# !rm -r /content/split

In [0]:
!unzip -q ./sounds/split-mp3.zip

In [3]:
!pip install -q cadl

[?25l[K    6% |██▏                             | 10kB 17.0MB/s eta 0:00:01[K    13% |████▍                           | 20kB 1.9MB/s eta 0:00:01[K    20% |██████▋                         | 30kB 2.7MB/s eta 0:00:01[K    27% |████████▉                       | 40kB 1.7MB/s eta 0:00:01[K    34% |███████████                     | 51kB 2.2MB/s eta 0:00:01[K    41% |█████████████▎                  | 61kB 2.6MB/s eta 0:00:01[K    48% |███████████████▌                | 71kB 3.0MB/s eta 0:00:01[K    55% |█████████████████▋              | 81kB 3.3MB/s eta 0:00:01[K    62% |███████████████████▉            | 92kB 3.7MB/s eta 0:00:01[K    69% |██████████████████████          | 102kB 2.9MB/s eta 0:00:01[K    75% |████████████████████████▎       | 112kB 2.9MB/s eta 0:00:01[K    82% |██████████████████████████▌     | 122kB 4.0MB/s eta 0:00:01[K    89% |████████████████████████████▊   | 133kB 4.0MB/s eta 0:00:01[K    96% |███████████████████████████████ | 143kB 7.6MB/s eta 0:

In [0]:
!unzip -q ./DTTModel.zip

In [0]:
!mv ./content/DTT-wavenet/ DTT_wavenet

In [7]:
import os
import sys
import subprocess
from glob import glob
import numpy as np
import tensorflow as tf
from cadl import wavenet, vctk
from cadl import wavenet_utils as wnu
from cadl.utils import sample_categorical
from scipy.io import wavfile

Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.


In [0]:
def get_dataset(saveto='/content/split-mp3', convert_mp3_to_16khzwav=False):
    """Convert MP3 files in 'saveto' directory to wav files.
    subfolders under the 'saveto' directory are considered chapters
    Each file name should be formatted CHAPTERNAME-UTTERANCE-DESCRIPTION.mp3
    ffmpeg must be installed to convert the files.
    Parameters
    ----------
    saveto : str, optional
        Directory to save the resulting dataset ['sounds']
    convert_to_16khz : bool, optional
        Description
    Returns
    -------
        dataset
    """
    if not os.path.exists(saveto):
        sys.exit("Error: '" + saveto + "' folder does not exist")

    wavs = glob('{}/**/*.wav'.format(saveto), recursive=True)
    if not wavs and convert_mp3_to_16khzwav:
        wavs = glob('{}/**/*.mp3'.format(saveto), recursive=True)
        for wav_i in wavs:
            subprocess.check_call(
                ['ffmpeg', '-i', wav_i, '-f', 'wav', '-ac', '1', '-ar', '16000', '-y', '%s.16khz.wav' % wav_i])

    wavs = glob('{}/**/*.wav'.format(saveto), recursive=True)

    if not wavs:
        sys.exit("Error: No 16khz wav files were found in '" + saveto + "'")        

    dataset = []
    for wav_i in wavs:
        chapter_i, utter_i = wav_i.split('/')[-2:]
        dataset.append({
            'name': wav_i,
            'chapter': chapter_i,
            'utterance': utter_i.split('-')[-2].strip('.wav')})
    return dataset

In [0]:
get_dataset(convert_mp3_to_16khzwav=True)

In [0]:
def train():
    loss_lst = []
    batch_size = 4
    filter_length = 2
    n_stages = 7
    n_layers_per_stage = 9
    n_hidden = 48
    n_skip = 384

    dataset = get_dataset()
    it_i = 0
    n_epochs = 1000
    sequence_length = wavenet.get_sequence_length(n_stages, n_layers_per_stage)
    ckpt_path = '/content/DTT-wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}'.format(
        filter_length, batch_size, sequence_length, n_stages,
        n_layers_per_stage, n_hidden, n_skip)
    with tf.Graph().as_default(), tf.Session() as sess:
        net = wavenet.create_wavenet(
            batch_size=batch_size,
            filter_length=filter_length,
            n_hidden=n_hidden,
            n_skip=n_skip,
            n_stages=n_stages,
            n_layers_per_stage=n_layers_per_stage)
        saver = tf.train.Saver()
        init_op = tf.group(tf.global_variables_initializer(),
                            tf.local_variables_initializer())
        sess.run(init_op)
        if tf.train.latest_checkpoint(ckpt_path) is not None:
            saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        batch = vctk.batch_generator
        with tf.variable_scope('optimizer'):
            opt = tf.train.AdamOptimizer(
                learning_rate=0.0002).minimize(net['loss'])
        var_list = [
            v for v in tf.global_variables() if v.name.startswith('optimizer')
        ]
        sess.run(tf.variables_initializer(var_list))
        writer = tf.summary.FileWriter(ckpt_path)
        for epoch_i in range(n_epochs):
            for batch_xs in batch(dataset, batch_size, sequence_length):
                loss, quantized, _ = sess.run(
                    [net['loss'], net['quantized'], opt],
                    feed_dict={net['X']: batch_xs})
                loss_lst.append(loss)
                if it_i % 100 == 0:
                    summary = sess.run(
                        net['summaries'], feed_dict={net['X']: batch_xs})
                    writer.add_summary(summary, it_i)
                    # save
                    saver.save(
                        sess,
                        os.path.join(ckpt_path, 'model.ckpt'),
                        global_step=it_i)

    #                     file_path = os.path.join(ckpt_path, 'model.ckpt'
    #                     file_tmp = drive.CreateFile()
    #                     file_tmp.SetContentFile(file_path)
    #                     file_tmp.Upload()

                it_i += 1
    return loss_lst

In [10]:
train()

SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [0]:
# !zip DTTModel.zip -r /content/DTT-wavenet 

In [0]:
def synthesis():
    batch_size = 4
    filter_length = 2
    n_stages = 7
    n_layers_per_stage = 9
    n_hidden = 48
    n_skip = 384
    total_length = 16000
    sequence_length = wavenet.get_sequence_length(n_stages, n_layers_per_stage)
    prime_length = sequence_length
    ckpt_path = '/content/DTT-wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}/'.format(
        filter_length, batch_size, sequence_length, n_stages,
        n_layers_per_stage, n_hidden, n_skip)

    dataset = get_dataset()
    batch = next(
        vctk.batch_generator(dataset, batch_size, prime_length))[0]

    with tf.Session() as sess:
      net = wavenet.create_wavenet(
          batch_size=batch_size,
          filter_length=filter_length,
          n_hidden=n_hidden,
          n_skip=n_skip,
          n_layers_per_stage=n_layers_per_stage,
          n_stages=n_stages,
          shift=False)
      saver = tf.train.Saver()
      init_op = tf.group(tf.global_variables_initializer(),
                         tf.local_variables_initializer())
      sess.run(init_op)
      if tf.train.latest_checkpoint(ckpt_path) is not None:
          saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
      else:
          print('Could not find checkpoint')

      synth = np.zeros([batch_size, total_length], dtype=np.float32)
      synth[:, :prime_length] = batch

      print('Synthesize...')
      print(total_length - prime_length)
      
      for sample_i in range(0, total_length - prime_length):
          if sample_i % 10 == 0:
              print(f'{sample_i}/{prime_length}/{total_length}')
          probs = sess.run(
              net["probs"],
              feed_dict={net["X"]: synth[:, sample_i:sample_i + sequence_length]})
          idxs = sample_categorical(probs)
          idxs = idxs.reshape((batch_size, sequence_length))
          if sample_i == 0:
              audio = wnu.inv_mu_law_numpy(idxs - 128)
              synth[:, :prime_length] = audio
          else:
              audio = wnu.inv_mu_law_numpy(idxs[:, -1] - 128)
              synth[:, prime_length + sample_i] = audio

      for i in range(batch_size):
          wavfile.write('synthesis-{}.wav'.format(i), 16000, synth[i])



In [0]:
synthesis()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /content/DTT-wavenet/wavenet_filterlen2_batchsize4_sequencelen7168_stages7_layers9_hidden48_skips384/model.ckpt-9600
Synthesize...
8832
0/7168/16000
10/7168/16000
20/7168/16000
30/7168/16000
40/7168/16000
50/7168/16000
60/7168/16000
70/7168/16000
80/7168/16000
90/7168/16000
100/7168/16000
110/7168/16000
120/7168/16000
130/7168/16000
140/7168/16000
150/7168/16000
160/7168/16000
170/7168/16000
180/7168/16000
190/7168/16000
200/7168/16000
210/7168/16000
220/7168/16000
230/7168/16000
240/7168/16000
250/7168/16000
260/7168/16000
270/7168/16000
280/7168/16000
290/7168/16000
300/7168/16000
310/7168/16000
320/7168/16000
330/7168/16000
340/7168/16000
350/7168/16000
360/7168/16000
370/7168/16000
380/7168/16000
390/7168/16000
400/7168/16000
410/7168/16000
420/7168/16000
430/7168/16000
440/7168/16000