# Train StyleTTS2 light model on ilspeech

In [None]:
# Prepare dependencies

import os

!sudo apt-get install espeak-ng -y
!git clone https://github.com/thewh1teagle/StyleTTS2-lite -b hebrew
%cd StyleTTS2-lite

# Don't use uv venv but use global uv
os.environ["UV_CONSTRAINT"] = ""
os.environ["UV_BUILD_CONSTRAINT"] = ""
os.environ["UV_PRERELEASE"] = "if-necessary-or-explicit"
os.environ["UV_SYSTEM_PYTHON"] = "false"
!uv venv
!uv pip install -r requirements.txt

In [None]:
# Prepare models
!wget https://huggingface.co/dangtr0408/StyleTTS2-lite/resolve/main/base_model.pth -O ./Models/Finetune/base_model.pth
!wget https://huggingface.co/dangtr0408/StyleTTS2-lite/resolve/main/config.yaml -O ./Configs/config.yaml

In [None]:
# Prepare dataset

!wget https://huggingface.co/datasets/thewh1teagle/ILSpeech/resolve/main/ilspeech_2025_04_v1.zip
!unzip ilspeech_2025_04_v1.zip

In [None]:
# Convert LJSpeech format to LibriTTS format

from pathlib import Path
import random

base = Path('./ilspeech_2025_04_21_v1')
lines = (base / 'metadata.csv').read_text(encoding='utf-8').splitlines()
random.shuffle(lines)

val_size = max(1, int(0.04 * len(lines)))
val_lines, train_lines = lines[:val_size], lines[val_size:]

def format_lines(lines):
    formatted = []
    for line in lines:
        utt_id, phonemes = line.strip().split('|')
        wav_path = (base / 'wavs' / f'{utt_id}.wav').resolve()
        formatted.append(f"{wav_path}|{phonemes}")
    return formatted

(base / 'val.txt').write_text('\n'.join(format_lines(val_lines)), encoding='utf-8')
(base / 'train.txt').write_text('\n'.join(format_lines(train_lines)), encoding='utf-8')



In [None]:
# Point config to new dataset

import yaml
from pathlib import Path

config_path = Path('./Configs/config.yaml')
data_dir = Path('./ilspeech_2025_04_05_v1')

with config_path.open('r', encoding='utf-8') as f:
    config = yaml.safe_load(f)

# Set paths relative to root_path = ./
config['data_params']['root_path'] = './'
config['data_params']['train_data'] = str(data_dir / 'train.txt')
config['data_params']['val_data'] = str(data_dir / 'val.txt')

# Save changes
with config_path.open('w', encoding='utf-8') as f:
    yaml.dump(config, f, sort_keys=False, allow_unicode=True)


In [None]:
!uv run python train.py