# Configuration options
#### Make sure to configure the settings in the `Configuration Settings` section below before running further cells.

##### Dataset
* dataset_name: name of the dataset (default 'MyDataset')
* dataset_location: absolute path to the prepared dataset folder (default: '/content/output_dataset')
* speaker_name: name of the speaker in raw_data folder (default: 'universal')
* config_dir: absolute path to EfficientSpeech configuration directory
* lexicon_path: absolute path to a .txt file with the lexicon/dictionary the dataset is prepared for (defaults to `librispeech-lexicon.txt`)

##### Output
* output_dir: A path to save all generated .ckpt files + logs to. A folder with your dataset name will be created in this folder.
* infer_device: Device used for inference after training. One of 'cuda', 'cpu' (default: 'cuda')

##### Model training options
* accelerator: One of `cpu`, `gpu`
* devices: Will be mapped to either `gpus`, `tpu_cores`, `num_processes` or `ipus`, based on the accelerator type, per pytorch-lightning documentation.
* batch_size: (default: 128)
* num_workers: (default: 4)
* precision: (default: 16-mixed)
* model_size_to_train: One of 'tiny', 'small', 'base' (default: tiny)
* max_epochs: The number of epochs to stop training at (default: 5000)

In [13]:
# !pip install tensorboard
# !pip install -r requirements.txt
# !wget --continue -nv -O /home/atlas/code/l2i/experiment/checkpoints/tiny_eng_266k.ckpt  https://github.com/roatienza/efficientspeech/releases/download/pytorch2.0.1/tiny_eng_266k.ckpt 

In [14]:

dataset_name = 'CML_Polish' 
dataset_location = '../output_dataset' 
speaker_name = 'universal' 
config_dir = './configs/CML_Polish' 
output_dir = '../checkpoints' 
cmd_line_opts = ""
!mkdir /home/atlas/code/l2i/new_experiment/checkpoints


# cmd_line_opts = "--accelerator gpu --devices 1 --num_workers 4 --precision 16-mixed --batch-size 8 --head 1 --reduction 4 --expansion 1 --kernel-size 3 --n-blocks 2 --block-depth 2 --max_epochs 100 --infer-device cuda"
pp_config_path = os.path.join("/home/atlas/code/l2i/new_experiment/output_dataset/configs/CML_Polish/", 'preprocess.yaml')
pp_config_arg = f'--preprocess-config {pp_config_path}'
training_opts = ' '.join([pp_config_arg, cmd_line_opts])


### Launch TensorBoard

In [None]:
%load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir ../lightning_logs/

### Run training

In [None]:


print(f'Running training with arguments: {training_opts}')

%cd /content/efficientspeech/
!python /content/efficientspeech/train.py $training_opts

# Run inference on latest trained checkpoint

In [None]:
from IPython.display import Audio, display
import os

sentence = 'The quick brown fox jumped over the lazy dog.' #@param {type:'string'}

%cd /content/efficientspeech/

# Get latest run checkpoint
latest_run_folder = !ls -td -- lightning_logs/* | head -n 1
latest_run_folder = latest_run_folder[0]
latest_run_name = os.path.basename(latest_run_folder)
ckpt_folder = os.path.join(latest_run_folder, 'checkpoints')
latest_ckpt = !ls -td -- $ckpt_folder\/* | head -n 1
latest_ckpt = os.path.abspath(latest_ckpt[0])
latest_ckpt_name = os.path.basename(latest_ckpt)
# Output wav 
output_wav_name = latest_run_name + '.wav'

print(f'Found checkpoint "{latest_ckpt}')

# Run inference with latest checkpoint
inference_args = f'--checkpoint {latest_ckpt} {model_opts} ' \
  f'--infer-device {infer_device} --text "{sentence}" ' \
  f'--wav-filename {output_wav_name}'
print(f'Running inference with arguments: {inference_args}')
!python demo.py $inference_args

# Display inference result
output_wav_path = os.path.join('/content/efficientspeech/outputs', output_wav_name)
print(f'\nInference result: {output_wav_path}')
display(Audio(os.path.abspath(output_wav_path)))