In [None]:
pip install pdfplumber

In [None]:
from google.colab import files

uploaded = files.upload()

In [None]:
import os
import re
import time
import pathlib
import requests
import pdfplumber

API_KEY = "c59d7647f50ca3f4ffca47e99a5959ab7033f688"
MODEL = "aura-3-thalia-en"
PDF_PATH = "life3.0.pdf"

# Extract text from PDF
def extract_text_from_pdf(path):
    text = ""
    with pdfplumber.open(path) as pdf:
        for page in pdf.pages:
            if page_text := page.extract_text():
                text += page_text + "\n"
    return text

# Split text into manageable chunks
def chunk_text(text, max_len=2000):
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks, current = [], ""
    for s in sentences:
        if len(current) + len(s) < max_len:
            current = (current + " " + s).strip()
        else:
            if current:
                chunks.append(current)
            current = s
    if current:
        chunks.append(current)
    return chunks

# Convert text chunk to speech
def tts_chunk(chunk, idx):
    url = "https://api.deepgram.com/v1/speak"
    headers = {"Authorization": f"Token {API_KEY}", "Content-Type": "application/json"}
    response = requests.post(url, params={"model": MODEL}, headers=headers, json={"text": chunk})

    if response.status_code != 200:
        print(f"Error {response.status_code}: {response.text}")
        return None

    filename = f"audio_{idx:03d}.mp3"
    with open(filename, "wb") as f:
        f.write(response.content)
    print(f"Saved {filename}")
    return filename

def main():
    text = extract_text_from_pdf(PDF_PATH)
    parts = []

    for i, chunk in enumerate(chunk_text(text), start=1):
        filename = tts_chunk(chunk, i)
        if filename:
            parts.append(filename)
        time.sleep(0.2)

    with open("parts.txt", "w") as f:
        for p in parts:
            f.write(f"file '{pathlib.Path(p).as_posix()}'\n")

    print("\n✅ All chunks processed.")

if __name__ == "__main__":
    main()

In [None]:
import os
import subprocess

output_dir = "/content/drive/MyDrive/OpenVoice/cloned_chunks"
merge_list = os.path.join(output_dir, "file_list.txt")
merged_file = "/content/drive/MyDrive/OpenVoice/merged_50.mp3"

# Create the file list for ffmpeg
with open(merge_list, "w") as f:
    for idx in range(50):  # first 50 chunks only
        file_path = os.path.join(output_dir, f"audio_{idx:03d}.mp3")
        f.write(f"file '{file_path}'\n")

# Run ffmpeg merge
subprocess.run([
    "ffmpeg",
    "-y",
    "-f", "concat",
    "-safe", "0",
    "-i", merge_list,
    "-c", "copy",
    merged_file
])

In [None]:
# Clone OpenVoice repository if not exists
import os
from pathlib import Path

openvoice_dir = Path("/content/OpenVoice")
if not openvoice_dir.exists():
    !git clone https://github.com/myshell-ai/OpenVoice.git {openvoice_dir}

%cd {openvoice_dir}

In [None]:
# Install dependencies
!pip install -q unidecode eng_to_ipa inflect pypinyin jieba cn2an \
               librosa pyworld gradio ffmpeg-python faster_whisper \
               whisper_timestamped wavmark pydub

In [None]:
ckpt_dir = openvoice_dir / "assets/converter_ckpt"
ckpt_dir.mkdir(parents=True, exist_ok=True)

!wget -q -O {ckpt_dir}/config.json https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/config.json
!wget -q -O {ckpt_dir}/weights.pth https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth


In [None]:
 Add OpenVoice path to Python path
import sys
sys.path.append(str(openvoice_dir / "OpenVoice"))

# Mount Google Drive and define file paths
from google.colab import drive
drive.mount('/content/drive')

ref_clip    = "/content/drive/MyDrive/OpenVoice/myvoice.m4a"   # Reference voice
input_file  = "/content/drive/MyDrive/OpenVoice/life3_ai.mp3"  # Input audio
output_file = "/content/drive/MyDrive/OpenVoice/final_cloned.wav"  # Output cloned audio

# Load ToneColorConverter model
import torch
from openvoice.api import ToneColorConverter
from openvoice import se_extractor

device = "cuda" if torch.cuda.is_available() else "cpu"
converter = ToneColorConverter(str(ckpt_dir / "config.json"), device=device)
converter.load_ckpt(str(ckpt_dir / "weights.pth"))

# Extract speaker embedding from reference clip
target_se, _ = se_extractor.get_se(ref_clip, converter)

# Convert full audio to cloned voice
converter.convert(
    audio_src_path=input_file,
    src_se=None,
    tgt_se=target_se,
    output_path=output_file
)

print(f"🎉 Done! Cloned audio saved at: {output_file}")