<a href="https://colab.research.google.com/github/kareem-g/TACOTRON-2-SPEECH-SYNTHESIS/blob/main/Tacotron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## First Things First

#### Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: ignored

#### Check GPU

In [None]:
!nvidia-smi -L
!nvidia-smi
# You may need to reduce batch-size for T4, P4 and K80.



---



## Install Tacotron2

#### Setup Tacotron2

In [None]:
%cd /content/

# Install Tacotron2
!git clone https://github.com/NVIDIA/tacotron2.git
%cd /content/tacotron2

# Initialize submodules
!git submodule init; git submodule update

# Install dependencies
!pip install tensorflow==1.15.2
!pip install -q unidecode tensorboardX

#### Download Default Tacotron Model

In [None]:
%cd /content/tacotron2

# tacotron2_statedict.pt
!gdown --id 1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA



---



## Generate Audio

#### Load Libraries

In [None]:
import matplotlib
%matplotlib inline
import matplotlib.pylab as plt

import IPython.display as ipd

import sys
sys.path.append('waveglow')
import numpy as np
import torch

from hparams import create_hparams
from model import Tacotron2
from layers import TacotronSTFT, STFT
from audio_processing import griffin_lim
from train import load_model
from text import text_to_sequence
from denoiser import Denoiser

In [None]:
def plot_data(data, figsize=(16, 4)):
    fig, axes = plt.subplots(1, len(data), figsize=figsize)
    for i in range(len(data)):
        axes[i].imshow(data[i], aspect='auto', origin='bottom', 
                       interpolation='none')

#### Setup hparams

In [None]:
hparams = create_hparams()
hparams.max_wav_value=32768.0
hparams.sampling_rate = 22050
hparams.filter_length=1024
hparams.hop_length=256
hparams.win_length=1024

#### Load Tacotron2 Model

In [None]:
#checkpoint_path = '/content/tacotron2/tacotron2_statedict.pt'
checkpoint_path = '/content/drive/My Drive/tacotron2_checkpoints/tacotron_femalenord'
model = load_model(hparams)
model.load_state_dict(torch.load(checkpoint_path)['state_dict'])
_ = model.cuda().eval().half()

#### Load Waveglow Model

In [None]:
#waveglow_path = '/content/tacotron2/waveglow_256channels_universal_v5.pt'
waveglow_path = '/content/drive/My Drive/waveglow_checkpoints/waveglow_femalenord'
waveglow = torch.load(waveglow_path)['model']
waveglow.cuda().eval().half()
for k in waveglow.convinv:
    k.float()
denoiser = Denoiser(waveglow)

#### Text Input

In [None]:
text = "This is just a test.|~"

sequence = np.array(text_to_sequence(text, ['english_cleaners']))[None, :]
sequence = torch.autograd.Variable(
    torch.from_numpy(sequence)).cuda().long()

#### Generate Mel Outputs

In [None]:
mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
plot_data((mel_outputs.float().data.cpu().numpy()[0],
           mel_outputs_postnet.float().data.cpu().numpy()[0],
           alignments.float().data.cpu().numpy()[0].T))

#### Synthesize Audio

In [None]:
with torch.no_grad():
    audio = waveglow.infer(mel_outputs_postnet, sigma=0.85)     # sigma=1
ipd.Audio(audio[0].data.cpu().numpy(), rate=hparams.sampling_rate)

#### Denoise

In [None]:
audio_denoised = denoiser(audio, strength=0.006)[:, 0]    # strength=0.1
ipd.Audio(audio_denoised.cpu().numpy(), rate=hparams.sampling_rate) 



---



## Tacotron2 Training

#### Set Model Name

In [None]:
# Removing {} will set checkpoints to be automatically overwritten.
!sed -i -- 's,checkpoint_{},tacotron_femalenord,g' '/content/tacotron2/train.py'

#### Configure hparams.py

In [None]:
# Training list file.
!sed -i -- 's,filelists/ljs_audio_text_train_filelist.txt,/content/drive/MyDrive/filelists/femalenord_training.txt,g' '/content/tacotron2/hparams.py'

# Validation list file.
!sed -i -- 's,filelists/ljs_audio_text_val_filelist.txt,/content/drive/MyDrive/filelists/femalenord_validation.txt,g' '/content/tacotron2/hparams.py'

# Increase epochs
!sed -i -- 's,epochs=500,epochs=10000,g' '/content/tacotron2/hparams.py'

# Save checkpoints every 200 steps.
!sed -i -- 's,iters_per_checkpoint=1000,iters_per_checkpoint=200,g' '/content/tacotron2/hparams.py'

# Reduce batch_size. If you get an 'Out Of Memory' error, reduce this further.
!sed -i -- 's,batch_size=64,batch_size=35,g' '/content/tacotron2/hparams.py'

# Reduce learning rate
#!sed -i -- 's,learning_rate=1e-3,learning_rate=1e-4,g' '/content/tacotron2/hparams.py'

#### Warm Start

In [None]:
# This trains a new model off the default tacotron2_statedict.pt model.
%cd /content/tacotron2
!python3 train.py --output_directory='/content/drive/MyDrive/tacotron2_checkpoints' \
                     --log_directory='/content/drive/MyDrive/tacotron2_checkpoints/logdir' \
                     -c '/content/tacotron2/tacotron2_statedict.pt' --warm_start

#### Resume Checkpoint

In [None]:
# This continues training a model.
%cd '/content/tacotron2'

!python3 train.py --output_directory='/content/drive/MyDrive/tacotron2_checkpoints' \
                     --log_directory='/content/drive/MyDrive/tacotron2_checkpoints/logdir' \
                     -c '/content/drive/MyDrive/tacotron2_checkpoints/tacotron_femalenord'



---



## WaveGlow Training

#### Setup WaveGlow

In [None]:
%cd /content/

# Install WaveGlow
!git clone https://github.com/NVIDIA/waveglow.git
%cd /content/waveglow/
!git clone https://github.com/NVIDIA/tacotron2

# Initialize submodules
!git submodule init; git submodule update

# Install dependencies
!pip install tensorflow==1.15.2
!pip install -q unidecode tensorboardX

#### Download Default WaveGlow Model

In [None]:
%cd /content/waveglow/

# waveglow_256channels_ljs_v2.pt
!gdown --id 1WsibBTsuRg_SF2Z6L6NFRTT-NjEy1oTx

#### Set Model Name

In [None]:
# Removing {} will set checkpoints to be automatically overwritten.
!sed -i -- 's,{}/waveglow_{},{}/waveglow_femalenord,g' '/content/waveglow/train.py'

#### Set Things Up

In [None]:
# No Apex.
!sed -i -- 's,fp16_run": true,fp16_run": false,g' '/content/waveglow/config.json'

# Save checkpoints every 200 steps.
!sed -i -- 's,"iters_per_checkpoint": 2000,"iters_per_checkpoint": 200,g' '/content/waveglow/config.json'

# Make a list of .wav files in a text file.
%cd /content/drive/MyDrive/femalenord
!ls wavs/*.wav > /content/waveglow/train_files.txt

# Save log files for Tensorbord.
!sed -i -- 's,"with_tensorboard": false,"with_tensorboard": true,g' '/content/waveglow/config.json'

# Make sure the text files contain the full paths to the .wav files.
%cd /content/waveglow/
!sed -i -- 's,wavs/,/content/drive/MyDrive/femalenord/wavs/,g' '/content/waveglow/train_files.txt'

# Set train_files location.
!sed -i -- 's,"training_files": "train_files.txt","training_files": "/content/waveglow/train_files.txt",g' '/content/waveglow/config.json'

# Set output directory of waveglow model.
!sed -i -- 's,"output_directory": "checkpoints","output_directory": "/content/drive/MyDrive/waveglow_checkpoints",g' '/content/waveglow/config.json'

#### Run This If Warm Start Training

In [None]:
# waveglow_256channels_ljs_v2.pt needs to be converted first because it was trained on an older version of waveglow.
%cd /content/waveglow/
!python3 -W ignore convert_model.py '/content/waveglow/waveglow_256channels_ljs_v2.pt' '/content/waveglow/waveglow_256channels_ljs_v2.pt'

In [None]:
!sed -i -- 's,"checkpoint_path": "","checkpoint_path": "/content/waveglow/waveglow_256channels_ljs_v2.pt",g' '/content/waveglow/config.json'

In [None]:
# Modify train.py to start iteration at 0 because waveglow_256channels_universal_v5.pt lacks this info.
#!sed -i -- 's,iteration = checkpoint_dict['iteration'],iteration = 0,g' '/content/waveglow/train.py'
#!sed -i -- 's,optimizer.load_state_dict(checkpoint_dict['optimizer']),#optimizer.load_state_dict(checkpoint_dict[\'optimizer\']),g' '/content/waveglow/train.py'

#### Run This If Resuming Training

In [None]:
# Set checkpoint_path to the existing model to resume training.
!sed -i -- 's,"checkpoint_path": "","checkpoint_path": "/content/drive/MyDrive/waveglow_checkpoints/waveglow_femalenord",g' '/content/waveglow/config.json'

#### Start Training

In [None]:
%cd /content/waveglow
!python3 -W ignore train.py -c '/content/waveglow/config.json'



---



## Tensorboard

#### Load Tensorboard Extension

In [None]:
%load_ext tensorboard

#### Import Tensorflow and Datetime

In [None]:
import tensorflow as tf
import datetime, os

#### Run Tensorboard for Tacotron

In [None]:
%tensorboard --logdir '/content/drive/MyDrive/tacotron2_checkpoints/logdir'

#### Run Tensorboard for WaveGlow

In [None]:
%tensorboard --logdir '/content/drive/MyDrive/waveglow_checkpoints/logs'