In [10]:
!pip install -U -q PyDrive
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# 1. Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# choose a local (colab) directory to store the data.
local_download_path = os.path.expanduser('/content/sounds')
try:
  os.makedirs(local_download_path)
except: pass

# 2. Auto-iterate using the query syntax
#    https://developers.google.com/drive/v2/web/search-parameters
file_list = drive.ListFile(
    {'q': "'1OjVacH4rm_aR03nzUksrt86vJNrBen_k' in parents"}).GetList()

for f in file_list:
  # 3. Create & download by id.
  print('title: %s, id: %s' % (f['title'], f['id']))
  fname = os.path.join(local_download_path, f['title'])
  print('downloading to {}'.format(fname))
  f_ = drive.CreateFile({'id': f['id']})
  f_.GetContentFile(fname)


title: split.zip, id: 1w3arJaiwy5F2tbHZAuiz4JrUVse9ioy0
downloading to /content/sounds/split.zip
title: split.zip, id: 1WKqDd4B_LqQuor6NJat-J66_mNffAajd
downloading to /content/sounds/split.zip


In [0]:
!rm -r /content/split

In [0]:
!unzip -q ./sounds/split.zip

In [0]:
!pip install -q cadl

In [0]:
import os
import sys
import subprocess
from glob import glob
import numpy as np
import tensorflow as tf
from cadl import wavenet, vctk
from cadl import wavenet_utils as wnu
from cadl.utils import sample_categorical
from scipy.io import wavfile

In [0]:
def get_dataset(saveto='/content/split', convert_mp3_to_16khzwav=False):
    """Convert MP3 files in 'saveto' directory to wav files.
    subfolders under the 'saveto' directory are considered chapters
    Each file name should be formatted CHAPTERNAME-UTTERANCE-DESCRIPTION.mp3
    ffmpeg must be installed to convert the files.
    Parameters
    ----------
    saveto : str, optional
        Directory to save the resulting dataset ['sounds']
    convert_to_16khz : bool, optional
        Description
    Returns
    -------
        dataset
    """
    if not os.path.exists(saveto):
        sys.exit("Error: '" + saveto + "' folder does not exist")

    wavs = glob('{}/**/*.wav'.format(saveto), recursive=True)
    if not wavs and convert_mp3_to_16khzwav:
        wavs = glob('{}/**/*.mp3'.format(saveto), recursive=True)
        for wav_i in wavs:
            subprocess.check_call(
                ['ffmpeg', '-i', wav_i, '-f', 'wav', '-ac', '1', '-ar', '16000', '-y', '%s.16khz.wav' % wav_i])

    wavs = glob('{}/**/*.wav'.format(saveto), recursive=True)

    if not wavs:
        sys.exit("Error: No 16khz wav files were found in '" + saveto + "'")        

    dataset = []
    for wav_i in wavs:
        chapter_i, utter_i = wav_i.split('/')[-2:]
        dataset.append({
            'name': wav_i,
            'chapter': chapter_i,
            'utterance': utter_i.split('-')[-2].strip('.wav')})
    return dataset

In [0]:
def train():
    """Train WaveNet on sound files
    Returns
    -------
        loss
    """    
    batch_size = 4
    filter_length = 2
    n_stages = 7
    n_layers_per_stage = 9
    n_hidden = 48
    n_skip = 384

    dataset = get_dataset()
    it_i = 0
    n_epochs = 1000
    sequence_length = wavenet.get_sequence_length(n_stages, n_layers_per_stage)
    ckpt_path = '/content/DTT-wavenet/wavenet_filterlen{}_batchsize{}_sequencelen{}_stages{}_layers{}_hidden{}_skips{}'.format(
        filter_length, batch_size, sequence_length, n_stages,
        n_layers_per_stage, n_hidden, n_skip)
    with tf.Graph().as_default(), tf.Session() as sess:
        net = wavenet.create_wavenet(
            batch_size=batch_size,
            filter_length=filter_length,
            n_hidden=n_hidden,
            n_skip=n_skip,
            n_stages=n_stages,
            n_layers_per_stage=n_layers_per_stage)
        saver = tf.train.Saver()
        init_op = tf.group(tf.global_variables_initializer(),
                            tf.local_variables_initializer())
        sess.run(init_op)
        if tf.train.latest_checkpoint(ckpt_path) is not None:
            saver.restore(sess, tf.train.latest_checkpoint(ckpt_path))
        batch = vctk.batch_generator
        with tf.variable_scope('optimizer'):
            opt = tf.train.AdamOptimizer(
                learning_rate=0.0002).minimize(net['loss'])
        var_list = [
            v for v in tf.global_variables() if v.name.startswith('optimizer')
        ]
        sess.run(tf.variables_initializer(var_list))
        writer = tf.summary.FileWriter(ckpt_path)
        for epoch_i in range(n_epochs):
            for batch_xs in batch(dataset, batch_size, sequence_length):
                loss, quantized, _ = sess.run(
                    [net['loss'], net['quantized'], opt],
                    feed_dict={net['X']: batch_xs[:,:,0]})
                print(loss)
                if it_i % 100 == 0:
                    summary = sess.run(
                        net['summaries'], feed_dict={net['X']: batch_xs[:,:,0]})
                    writer.add_summary(summary, it_i)
                    # save
                    saver.save(
                        sess,
                        os.path.join(ckpt_path, 'model.ckpt'),
                        global_step=it_i)
                    
#                     file_path = os.path.join(ckpt_path, 'model.ckpt'
#                     file_tmp = drive.CreateFile()
#                     file_tmp.SetContentFile(file_path)
#                     file_tmp.Upload()
                    
                it_i += 1
    return loss

In [0]:
train()



52.306824
47.577663
43.43491
39.321568
31.832424
30.177965
29.913082
26.37685
25.658813
24.082653
22.508572
19.15745
19.552624
21.12784
17.790228
19.37264
17.809788
15.127951
14.587156
14.402204
13.31369
10.709483
10.863184
11.502953
10.7517185
10.166029
9.4656725
8.784109
9.758048
8.313398
8.669794
8.162658
8.459819
7.314669
7.3411984
9.47465
7.3938932
7.7704935
7.270576
6.9673357
6.938872
7.1335764
6.060617
5.926423
6.5615225
5.561482
5.8370624
5.5114255
6.4230585
5.4142
5.0582147
5.484192
5.7510095
5.568065
5.673754
5.543397
5.152883
5.6550145
5.1202745
5.600133
5.611402
4.8373466
5.5408783
5.1878424
5.035938
5.3769546
5.6439004
5.5550504
5.106635
5.3525248
5.508561
4.6402817
5.1628747
4.8282294
4.98192
4.4699373
5.0993347
4.8251114
4.312373
4.487525
4.518612
4.818572
5.1688514
4.6611214
4.578715
4.7184143
4.6144753
4.917338
4.5854278
5.136281
5.2257843
4.8558784
4.6293716
5.035847
4.183111
4.83464
4.497415
4.3425775
4.1790533
4.686104
4.5410237
4.301599
4.4109836
4.403029
4.313579


In [0]:
!zip /content/DTT_wavenet DTT_model.zip

In [0]:
file_tmp = drive.CreateFile()
file_tmp.SetContentFile("/content/DTT-wavenet.zip")
file_tmp.Upload()