# Kokoro TTS Alternative

This notebook provides an alternative text-to-speech (TTS) implementation using the Kokoro library instead of OpenAI's TTS models used in the main project (`gutenberg_to_audio_gpt4o.ipynb`).

## Purpose
- This is NOT the main project workflow
- Serves as an alternative TTS solution when OpenAI's TTS is not preferred
- Uses the open-source Kokoro TTS library instead of OpenAI's API

## Integration with Main Project
This notebook is designed to work with the same file structure as the main project:
- Uses the same book ID system
- Processes text with similar chapter/section organization
- Outputs files that can be used interchangeably with the main project

## Requirements
- Kokoro library (`pip install kokoro>=0.3.4`)
- soundfile
- espeak-ng (for phoneme processing)
- pydub (for audio concatenation)

## Usage
1. Set the `BOOK_ID` variable to match your Project Gutenberg book
2. Adjust chapter ranges with `CHAPTER`, `CHUNK`, `CHUNK_START`, and `CHUNK_END` variables
3. Run cells sequentially to process the book with Kokoro TTS


In [None]:
import os
import glob

BOOK_ID = 34167
CHAPTER_START = 0
CHAPTER_STOP = 99

# Check if cleaned text files exist
txt_dir = f"books/{BOOK_ID}/txt"
if not os.path.exists(txt_dir):
    raise FileNotFoundError(f"Directory {txt_dir} not found. Please run the text cleaning process in gutenberg_to_audio_gpt4o.ipynb first.")

# Get all cleaned text files
cleaned_files = sorted(glob.glob(f"{txt_dir}/clean_text_*.txt"))
if not cleaned_files:
    raise FileNotFoundError(f"No cleaned text files found in {txt_dir}. Please run the text cleaning process in gutenberg_to_audio_gpt4o.ipynb first.")

print(f"Found {len(cleaned_files)} cleaned text files.")

import os
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf

pipeline = KPipeline(lang_code='a')


def text_to_wav_kokoro(pipeline, text: str, book_id: str, chapter_id: int, section_id: int):
    """Convert text to speech using Kokoro TTS and save as a WAV file."""
    # Remove line breaks and whitespace from the beginning and end of the text
    text = text.strip()
    text = text + " "

    # Format IDs with leading zeros
    chapter_id_str = str(chapter_id).zfill(3)
    section_id_str = str(section_id).zfill(3)

    # Create directory if it doesn't exist
    os.makedirs(f"books/{book_id}/kokoro_audio", exist_ok=True)

    # Generate audio with Kokoro TTS
    generator = pipeline(text, voice='af_heart', speed=1)

    # Process each sentence
    for i, (gs, ps, audio) in enumerate(generator):
        # Create a unique sentence ID
        sentence_id = str(i).zfill(3)

        # Create filename
        filename = f"books/{book_id}/kokoro_audio/gutenberg_{book_id}_{chapter_id_str}_{section_id_str}_{sentence_id}.wav"

        # Save the audio file
        sf.write(filename, audio, 24000)

    print(f"Generated audio for chapter {chapter_id}, section {section_id}")
    return True


# Process each cleaned text file with Kokoro TTS
def process_cleaned_files_with_kokoro(book_id, chapter_start, chapter_stop):
    """Process all cleaned text files within the specified chapter range with Kokoro TTS."""
    print(f"Processing chapters {chapter_start} to {chapter_stop} with Kokoro TTS")

    # Get all available text files
    txt_dir = f"books/{book_id}/txt"

    processed_files = 0
    for chapter_id in range(chapter_start, chapter_stop + 1):
        # Find all section files for this chapter
        section_files = [f for f in os.listdir(txt_dir)
                         if f.startswith(f"clean_text_{chapter_id:03d}_") and f.endswith(".txt")]

        if not section_files:
            print(f"No sections found for chapter {chapter_id}")
            continue

        section_files.sort()
        print(f"Found {len(section_files)} sections for chapter {chapter_id}")

        # Process each section
        for section_file in section_files:
            # Extract section ID from filename
            section_id = int(section_file.split('_')[3].split('.')[0])

            # Load the section text
            with open(os.path.join(txt_dir, section_file), "r") as f:
                section_text = f.read()

            print(f"Processing chapter {chapter_id}, section {section_id}")
            result = text_to_wav_kokoro(
                pipeline=pipeline,
                text=section_text,
                book_id=book_id,
                chapter_id=chapter_id,
                section_id=section_id
            )

            if result:
                processed_files += 1

    print(f"Finished processing {processed_files} sections across chapters {chapter_start} to {chapter_stop}")


# Run the processing
process_cleaned_files_with_kokoro(BOOK_ID, CHAPTER_START, CHAPTER_STOP)

# Combine all wav files for each chapter into one mp3 file
from pydub import AudioSegment
import glob
import os


def concatenate_wav_to_chapters(book_id, add_silence_ms=1500):
    """
    Concatenate all WAV files for each chapter into a single MP3 file.
    Add specified amount of silence at the end of each chapter.
    """
    # Create output directory for concatenated chapters
    output_dir = f"books/{book_id}/chapters"
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Get all WAV files in the audio directory
    audio_dir = f"books/{book_id}/kokoro_audio"
    if not os.path.exists(audio_dir):
        print(f"Audio directory {audio_dir} not found")
        return

    # Group files by chapter and section
    chapter_files = {}
    for file_path in glob.glob(f"{audio_dir}/gutenberg_{book_id}_*.wav"):
        # Extract chapter and section IDs from filename
        filename = os.path.basename(file_path)
        parts = filename.split('_')
        if len(parts) >= 5:
            chapter_id = parts[2]
            section_id = parts[3]
            sentence_id = parts[4].split('.')[0]

            if chapter_id not in chapter_files:
                chapter_files[chapter_id] = {}

            if section_id not in chapter_files[chapter_id]:
                chapter_files[chapter_id][section_id] = []

            chapter_files[chapter_id][section_id].append((file_path, int(sentence_id)))

    # Process each chapter
    for chapter_id in sorted(chapter_files.keys()):
        print(f"Processing chapter {chapter_id}")

        # Concatenate all sections for this chapter
        combined = AudioSegment.empty()

        # Add silence between sentences and sections
        sentence_pause = AudioSegment.silent(duration=300)
        section_pause = AudioSegment.silent(duration=800)

        # Process each section in order
        for section_id in sorted(chapter_files[chapter_id].keys()):
            # Sort files by sentence ID
            files = sorted(chapter_files[chapter_id][section_id], key=lambda x: x[1])

            # Add section pause if not the first section
            if combined.duration_seconds > 0:
                combined += section_pause

            # Process each sentence in the section
            for file_path, _ in files:
                print(f"  Adding {os.path.basename(file_path)}")
                audio = AudioSegment.from_wav(file_path)
                combined += audio
                combined += sentence_pause

        # Add silence at the end of the chapter
        silence = AudioSegment.silent(duration=add_silence_ms)
        combined += silence

        # Export the combined audio
        output_file = f"{output_dir}/gutenberg_{book_id}_kokoro_chapter_{chapter_id}.mp3"
        combined.export(output_file, format="mp3", parameters=["-b:a", "192k", "-ar", "44100"])

        print(f"Created chapter file: {output_file} ({len(combined) / 1000:.2f} seconds)")

    print(f"Finished concatenating {len(chapter_files)} chapters")


# Run the concatenation process
concatenate_wav_to_chapters(BOOK_ID)

Found 80 cleaned text files.
Processing chapters 0 to 99 with Kokoro TTS
Found 1 sections for chapter 0
Processing chapter 0, section 0
Generated audio for chapter 0, section 0
Found 1 sections for chapter 1
Processing chapter 1, section 0
Generated audio for chapter 1, section 0
Found 6 sections for chapter 2
Processing chapter 2, section 0
