<a href="https://colab.research.google.com/github/noodlepopllc/ScriptReader/blob/main/Kokoro-tts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!apt install espeak-ng

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
espeak-ng is already the newest version (1.50+dfsg-10ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 1 not upgraded.


In [2]:
!pip install kokoro-onnx soundfile librosa requests



In [3]:
import sys, os, requests, librosa
from kokoro_onnx import Kokoro
from pathlib import Path
import gradio as gr
import soundfile as sf
from kokoro_onnx.tokenizer import Tokenizer
import numpy as np


# uv run spacy download en_core_web_sm

class Voice(object):

    required = {'kokoro-v1.0.onnx':
                'https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.onnx',
                'voices-v1.0.bin':
                'https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin'}

    def __init__(self):
        self.check_required()
        self.kokoro = Kokoro("kokoro-v1.0.onnx", "voices-v1.0.bin")
        self.tokenizer = Tokenizer()

    def download_file_requests(self, url, filename):
        with requests.get(url, stream=True) as r:
            r.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
            with open(filename, 'wb') as f:
                for chunk in r.iter_content(chunk_size=8192):
                    # If you have a large file, setting a chunk size is a good idea
                    f.write(chunk)
        print(f"Downloaded '{filename}' successfully.")

    def check_required(self):
        for k in Voice.required:
            if not os.path.exists(k):
                self.download_file_requests(Voice.required[k],k)

    def gui(self):
        app = self.create_app()
        app.launch()


    def create(self, text: str, voice: str, blend_voice_name: str = None, blend_voice_slider: float = 0.0, speed: float = 1.0):
        phonemes = self.tokenizer.phonemize(text, lang="en-us")

        # Blending
        if blend_voice_name:
            first_voice = self.kokoro.get_voice_style(voice)
            second_voice = self.kokoro.get_voice_style(blend_voice_name)
            voice = np.add(first_voice * ((blend_voice_slider) / 100), second_voice * ((100 - blend_voice_slider) / 100))
        samples, sample_rate = self.kokoro.create(
            phonemes, voice=voice, speed=speed, is_phonemes=True
        )
        return [(sample_rate, samples), phonemes]



    def create_app(self):
        with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto")])) as ui:
            with gr.Row():
                with gr.Column():
                    text_input = gr.TextArea(
                        label="Input Text",
                        rtl=False,
                        value="Kokoro TTS. Turning words into emotion, one voice at a time!",
                    )
                    voice_input = gr.Dropdown(
                        label="Voice", value="af_sky", choices=sorted(self.kokoro.get_voices())
                    )
                    blend_voice_input = gr.Dropdown(
                        label="Blend Voice (Optional)",
                        value=None,
                        choices=sorted(self.kokoro.get_voices()) + [None],
                    )
                    blend_voice_slider = gr.Slider(
                        label="Blend Voice Amount",
                        value=100.0,
                        minimum=0.0,
                        maximum=100.0
                    )
                    speed = gr.Slider(
                        label="Speed",
                        value=1.0,
                        minimum=0.5,
                        maximum=1.0
                        )
                    submit_button = gr.Button("Create")

                with gr.Column():
                    phonemes_output = gr.Textbox(label="Phonemes")
                    audio_output = gr.Audio()

                submit_button.click(
                    fn=self.create,
                    inputs=[text_input, voice_input, blend_voice_input, blend_voice_slider,speed],
                    outputs=[audio_output, phonemes_output],
                )

        return ui








In [4]:
    voice = Voice()
    voice.gui()

  with gr.Blocks(theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Roboto")])) as ui:


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e05490c923bd3672fa.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
