### INIT PACS

In [12]:
! pip install -qU kokoro soundfile pydub
! apt-get -qq -y install espeak-ng > /dev/null 2>&1
! mkdir -p src-wav wav-mixer


source_dir = "./src-wav"
target_dir = "./wav-mixer"

### LOAD LIBS

In [13]:
from kokoro import KPipeline
from IPython.display import display, Audio
import soundfile as sf
import os
from pydub import AudioSegment

### GENERATE SPEECH

In [4]:

def generate_speech(text, src_dir, show_samples=False):
    try:
        pipeline = KPipeline(lang_code='a', repo_id='hexgrad/Kokoro-82M')
        generator = pipeline(
            text=text,
            voice='af_heart',
            speed=1,
            split_pattern=r'\n+'
        )

        os.makedirs(src_dir, exist_ok=True)

        for i, (gs, ps, audio) in enumerate(generator):
            print(i)
            print(gs)
            if show_samples:
                display(Audio(data=audio, rate=24000, autoplay=i == 0))

            file_path = os.path.join(src_dir, f"{i}.wav")
            sf.write(file_path, audio, 24000)
        return True

    except Exception as e:
        print(f"Error: {e}")
        return False


def wav_mixer_to_mp3(source_path, target_path, output_filename, bitrate="320k"):

    try:
        wav_files = sorted(
            [f for f in os.listdir(source_path) if f.endswith(".wav")],
            key=lambda x: int(x.split('.')[0])
        )
        if not wav_files:
            raise Exception("No WAV files found in the source directory!")

        merged_audio = AudioSegment.from_wav(os.path.join(source_path, wav_files[0]))
        for file in wav_files[1:]:
            sound = AudioSegment.from_wav(os.path.join(source_path, file))
            merged_audio += sound

        os.makedirs(target_path, exist_ok=True)

        output_path = os.path.join(target_path, output_filename)
        merged_audio.export(output_path, format="mp3", bitrate=bitrate)
        print(f"✅ Merged MP3 saved at: {output_path} with {bitrate} quality.")

    except Exception as e:
        print(f"❌ Error: {e}")

### SIMPLE TEXT


In [21]:
raw_text = """ place your text here """

### FILE UPLOAD

In [None]:
from google.colab import files

uploaded = files.upload()
filename = list(uploaded.keys())[0]

with open(filename, 'r') as file:
    raw_text = file.read()


### GENERATE SPEECH

In [None]:
k = generate_speech(text=raw_text, src_dir=source_dir, show_samples=True)

### WAV MIXER

In [None]:
wav_mixer_to_mp3(source_path=source_dir, target_path=target_dir, output_filename="my_audio.mp3")

### PLAY AND DOWNLOAD MP3

In [None]:
from pydub import AudioSegment
from IPython.display import Audio

file_path = "/content/wav-mixer/my_audio.mp3"

audio = AudioSegment.from_file(file_path, format="mp3")
samples = audio.get_array_of_samples()

Audio(samples, rate=audio.frame_rate)
