<a href="https://colab.research.google.com/github/nvjob/colab.google/blob/main/nb/piper_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Google Colab Anti-Disconnect

import IPython
js_code = '''
function ClickConnect(){
console.log("Working");
document.querySelector("colab-toolbar-button#connect").click()
}
setInterval(ClickConnect,60000)
'''
display(IPython.display.Javascript(js_code))

In [None]:
# Google drive connect

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Check GPU type

!nvidia-smi

In [None]:
# Install software

!git clone -q https://github.com/rhasspy/piper
%cd /content/piper/src/python
!wget -q "https://raw.githubusercontent.com/coqui-ai/TTS/dev/TTS/bin/resample.py"
!pip install -q cython>=0.29.0 piper-phonemize==1.1.0 librosa>=0.9.2 numpy>=1.19.0 onnxruntime>=1.11.0 pytorch-lightning==1.7.0 torch==1.11.0
!pip install -q torchtext==0.12.0 torchvision==0.12.0
!pip install -q torchaudio==0.11.0 torchmetrics==0.11.4
!bash build_monotonic_align.sh
!apt-get install -q espeak-ng
%cd /content

In [None]:
# Configuration
config = {
    "language": "ru",
    "single_speaker": True,
    "dataset": "/content/dataset",
    "output": "/content/output",
    "resume_from_checkpoint": "/content/irina_baba.ckpt",
    "quality": "medium",
    "max_epochs": 5000,
    "batch_size": 20,
    "checkpoint_epochs": 100,
    "max_phoneme_ids": 800,
    "resample": False,
    "sample_rate": "22050",
    "validation_split": 0.0,
    "num_test_examples": 0,
    "dataset_format": "ljspeech",
    "precision": 32
}

In [None]:
# Preprocess dataset

import os

if not os.path.exists(config['output']):
  os.makedirs(config['output'])

force_sp = " --single-speaker" if config['single_speaker'] else ""

%cd /content/piper/src/python

if config['resample']:
  !python resample.py --input_dir "/content/dataset/wavs" --output_dir "/content/dataset/wavs_resampled" --output_sr {config['sample_rate']} --file_ext "wav"
  !mv /content/dataset/wavs_resampled/* /content/dataset/wavs

!python -m piper_train.preprocess \
  --language {config['language']} \
  --input-dir {config['dataset']} \
  --output-dir {config['output']} \
  --dataset-format {config['dataset_format']} \
  --sample-rate {config['sample_rate']} \
  --max-workers 4 \
  {force_sp}


In [None]:
# Train

get_ipython().system(f'''
python -m piper_train \
--dataset-dir {config['output']} \
--accelerator 'gpu' \
--devices 1 \
--batch-size {config['batch_size']} \
--validation-split {config['validation_split']} \
--num-test-examples {config['num_test_examples']} \
--quality "{config['quality']}" \
--checkpoint-epochs {config['checkpoint_epochs']} \
--max_epochs {config['max_epochs']} \
--precision {config['precision']} \
--max-phoneme-ids {config['max_phoneme_ids']} \
--resume_from_checkpoint "{config['resume_from_checkpoint']}"
''')

In [None]:
# Export model to ONNX

import os
import shutil

# Find latest checkpoint
checkpoint_dir = os.path.join(config['output'], "lightning_logs", "version_0", "checkpoints")
checkpoints = [os.path.join(checkpoint_dir, f) for f in os.listdir(checkpoint_dir) if f.endswith(".ckpt")]
latest_checkpoint = max(checkpoints, key=os.path.getctime)

# Export path
export_path = os.path.join(config['output'], "model.onnx")

!python -m piper_train.export_onnx {latest_checkpoint} {export_path}
shutil.copy(
    os.path.join(config['output'], "config.json"),
    f"{export_path}.json"
)

print(f"Model exported to {export_path}")