In [1]:
# Run this cell to mount your Google Drive.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
PRJ_DIR = '/content/drive/My Drive/Colab Notebooks/Waven_up'

import sys
sys.path.append(PRJ_DIR)

import os
import datetime

import pandas as pd
import seaborn as sns
import numpy as np
from tqdm import tqdm, tqdm_notebook
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader

!pip install tensorboardcolab
from tensorboardcolab import TensorBoardColab
tb = TensorBoardColab()


from LJSpeechDataset import LJSpeechDataset
from hparams import hparams, Struct
from model import Generator as AEUpsampler

plt.ion()   # interactive mode
plt.style.use(['seaborn-poster'])

torch.set_num_threads(4)
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:0" if use_cuda else "cpu")
print(device)

seed=42
np.random.seed(seed)



Using TensorFlow backend.


Wait for 8 seconds...
TensorBoard link:
https://d203a360.ngrok.io
wavenet_vocoder
cuda:0


In [0]:
from math import ceil

def pad_seq(x, base=32):
    len_out = int(base * ceil(float(x.shape[0])/base))
    len_pad = len_out - x.shape[0]
    assert len_pad >= 0
    return np.pad(x, ((0,len_pad),(0,0)), 'constant'), len_pad
  
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    print("## checkpoin {} saved!".format(filename))
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')
        
def load_checkpoint(model, optimizer, filename='checkpoint.pth.tar'):
        if os.path.isfile(filename):
            print("=> loading checkpoint '{}'".format(filename))
            checkpoint = torch.load(filename)
            epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(filename, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(filename)) 
        return model, optimizer, epoch

In [4]:
%%time

args = Struct(**{
    'checkpoint_dir': PRJ_DIR + '/checkpoints/',
    'checkpoint_path' : PRJ_DIR + '/checkpoints/checkpoint_4000_1600_ep51_31.pth.tar',
    'dataset_dir': PRJ_DIR+'/dataset/ljspeech4000_16000'
})

ljspeech = LJSpeechDataset(args.dataset_dir)
model = AEUpsampler(32,0,512,32).to(device)


criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
start_epoch = 0

# Load model from last checkpoint
if (args.checkpoint_path):
  model, optimizer, start_epoch = load_checkpoint(model, optimizer, args.checkpoint_path)  
    
inputs, labels = ljspeech.__getitem__(0)

x, _ = pad_seq(np.array(inputs[0]).T)
y, _ = pad_seq(np.array(labels[0]).T)
                    
x = torch.from_numpy(x[np.newaxis, :, :]).to(device)
y = torch.from_numpy(y[np.newaxis, np.newaxis, :, :]).to(device)

print('-'*100)
print('X SHAPE, Y SHAPE')
print(x.shape, y.shape)
print('-'*100)
print('MEL_OUT SHAPE, MEL_OUT_POST SHAPE, CODES SHAPE')

mel_outputs, mel_outputs_postnet, codes = model(x)
print(mel_outputs.shape, mel_outputs_postnet.shape, codes.shape)
print('-'*100)

=> loading checkpoint '/content/drive/My Drive/Colab Notebooks/Waven_up/checkpoints/checkpoint_4000_1600_ep51_31.pth.tar'
=> loaded checkpoint '/content/drive/My Drive/Colab Notebooks/Waven_up/checkpoints/checkpoint_4000_1600_ep51_31.pth.tar' (epoch 52)
----------------------------------------------------------------------------------------------------
X SHAPE, Y SHAPE
torch.Size([1, 32, 80]) torch.Size([1, 1, 128, 80])
----------------------------------------------------------------------------------------------------
MEL_OUT SHAPE, MEL_OUT_POST SHAPE, CODES SHAPE
torch.Size([1, 1, 128, 80]) torch.Size([1, 1, 128, 80]) torch.Size([1, 64])
----------------------------------------------------------------------------------------------------
CPU times: user 3 s, sys: 1.25 s, total: 4.25 s
Wall time: 4.63 s


In [0]:
%%time

train_samples_index, testval_samples_index = ljspeech.getRandomSamplesIndex(seed, 0.4)

testval_samples_index = np.split(testval_samples_index, 2)
train_samples_index = train_samples_index.astype(np.int32)
val_samples_index = testval_samples_index[0].astype(np.int32)
test_samples_index = testval_samples_index[1].astype(np.int32)

print('TRAIN n_samples {}'.format(len(train_samples_index)))
print('VAL n_samples {}'.format(len(val_samples_index)))

print(train_samples_index.shape, val_samples_index.shape, test_samples_index.shape)

for epoch in tqdm(range(10)):  # loop over the dataset multiple times
  epoch = start_epoch + epoch
  running_loss = 0.0
  batch_running_loss = 0.0
  val_running_loss = 0.0
  
  ### TRAIN ###
  model = model.train()
  for i_step, i in enumerate(train_samples_index,0):
    # get the inputs; data is a list of [inputs, labels]

    inputs, labels = ljspeech.__getitem__(i)
    
    for j,_ in enumerate(inputs):  
      x, _ = pad_seq(np.array(inputs[j]).T)
      y, _ = pad_seq(np.array(labels[j]).T)
                    
      x = torch.from_numpy(x[np.newaxis, :, :]).to(device)
      y = torch.from_numpy(y[np.newaxis, np.newaxis, :, :]).to(device)

      # zero the parameter gradients
      optimizer.zero_grad()

      # forward + backward + optimize
      mel_outputs, mel_outputs_postnet, codes = model(x)
      loss = criterion(mel_outputs_postnet, y)
      loss.backward()
      optimizer.step()      

      # print statistics
      running_loss += loss.item()
      batch_running_loss += loss.item()
      
    # print every 1000 mini-batches
    n_mb = 1000
    if (i_step % n_mb == (n_mb-1)):
      print('[%d, %5d] loss: %.3f' % (epoch + 1, i_step + 1, batch_running_loss/n_mb))  
      batch_running_loss = 0
      
  ### VALIDATION ###
  model = model.eval()
  val_running_loss = 0
  for i_step, i in enumerate(val_samples_index[:100]):
    # get the inputs; data is a list of [inputs, labels]
    inputs, labels = ljspeech.__getitem__(i)
        
    for j,_ in enumerate(inputs):  
      x, _ = pad_seq(np.array(inputs[j]).T)
      y, _ = pad_seq(np.array(labels[j]).T)
      
      x = torch.from_numpy(x[np.newaxis, :, :]).to(device)
      y = torch.from_numpy(y[np.newaxis, np.newaxis, :, :]).to(device)

      # forward + backward + optimize
      mel_outputs, mel_outputs_postnet, codes = model(x)
      loss = criterion(mel_outputs_postnet, y)    

      # print statistics
      val_running_loss += loss.item() 
  print('-----> [%d, %5d] acc_loss: %.3f' % (epoch + 1, i_step + 1, running_loss/len(val_samples_index)))          
      
      
      
  tb.save_value('Train Loss', 'train_loss', epoch+1, running_loss/len(train_samples_index))
  tb.save_value('Acc Loss', 'acc_loss', epoch+1, val_running_loss/len(val_samples_index))
  
  running_loss = 0
  val_running_loss = 0
  
  save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'optimizer' : optimizer.state_dict(),
        }, False, filename=os.path.join(args.checkpoint_dir,'checkpoint_4000_1600_ep{}_31.pth.tar'.format(epoch)) )

print('Finished Training')





  0%|          | 0/10 [00:00<?, ?it/s][A[A[A[A

TRAIN n_samples 7729
VAL n_samples 2620
(7729,) (2620,) (2620,)
[53,  1000] loss: 0.041
[53,  2000] loss: 0.039
[53,  3000] loss: 0.037
[53,  4000] loss: 0.037
[53,  5000] loss: 0.037
[53,  6000] loss: 0.037
[53,  7000] loss: 0.036


In [0]:
%%time

inputs, labels = ljspeech.__getitem__(0)

for j,_ in enumerate(inputs):  
      x, _ = pad_seq(np.array(inputs[j]).T)
      y, _ = pad_seq(np.array(labels[j]).T)
                    
      x = torch.from_numpy(x[np.newaxis, :, :]).to(device)
      y = torch.from_numpy(y[np.newaxis, np.newaxis, :, :]).to(device)
      
      with torch.no_grad():
        mel_outputs, mel_outputs_postnet, codes = model(x)
        loss = criterion(mel_outputs_postnet, y) 
        print("loss: {}".format(loss))
        
      print(mel_outputs, mel_outputs_postnet)
      mel_outputs, mel_outputs_postnet = mel_outputs.cpu().numpy(), mel_outputs_postnet.cpu().numpy()
      np.save(PRJ_DIR+'/dataset/mel_out_{}__acc_loss{}.npy'.format(j, loss*100), mel_outputs)
      np.save(PRJ_DIR+'/dataset/mel_out_pos_{}_acc_loss{}.npy'.format(j, loss*100), mel_outputs_postnet)      