In [None]:
pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.7-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20250506 (from pdfplumber)
  Downloading pdfminer_six-20250506-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.7-py3-none-any.whl (60 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer_six-20250506-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from google.colab import files

uploaded = files.upload()

Saving life3.0.pdf to life3.0.pdf
Saving my voice recording .opus to my voice recording .opus


In [None]:
import os
import re
import time
import pathlib
import requests
import pdfplumber

# 🔑 API and File Settings
API_KEY = "c59d7647f50ca3f4ffca47e99a5959ab7033f688"
MODEL = "aura-2-thalia-en"
PDF_PATH = "/content/drive/MyDrive/life3.0.pdf"

# 📂 Output folder in Drive
OUTPUT_DIR = pathlib.Path("/content/drive/MyDrive/tts_outputs")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)


def extract_text_from_pdf(pdf_path: str) -> str:
    """Extract all text from a PDF file."""
    print(f"📖 Extracting text from: {pdf_path} ...")
    text = []
    with pdfplumber.open(pdf_path) as pdf:
        for i, page in enumerate(pdf.pages, start=1):
            page_text = page.extract_text()
            if page_text:
                text.append(page_text)
                print(f"   ✔ Page {i} extracted")
            else:
                print(f"   ⚠ Page {i} has no text")
    print("✅ PDF extraction completed.\n")
    return "\n".join(text)


def chunk_sentences(text: str, max_len: int = 2000) -> list[str]:
    """Split text into chunks under max_len characters."""
    print("✂️ Splitting text into manageable chunks ...")
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks, current = [], ""

    for sentence in sentences:
        if len(current) + len(sentence) < max_len:
            current = (current + " " + sentence).strip()
        else:
            if current:
                chunks.append(current)
            current = sentence

    if current:
        chunks.append(current)

    print(f"✅ Text split into {len(chunks)} chunks.\n")
    return chunks


def tts_one(chunk: str, index: int) -> str | None:
    """Send text chunk to Deepgram API and save as MP3, skipping if already exists."""
    output_file = OUTPUT_DIR / f"part_{index:03d}.mp3"

    if output_file.exists():
        print(f"⚡ Chunk {index} already processed → {output_file.name}")
        return str(output_file)

    print(f"🎙 Converting chunk {index} to speech ...")

    url = "https://api.deepgram.com/v1/speak"
    headers = {
        "Authorization": f"Token {API_KEY}",
        "Content-Type": "application/json"
    }

    response = requests.post(
        url,
        params={"model": MODEL},
        headers=headers,
        json={"text": chunk}
    )

    if response.status_code != 200:
        print(f"   ❌ Error {response.status_code}: {response.text}")
        return None

    with open(output_file, "wb") as f:
        f.write(response.content)

    print(f"   ✔ Saved as {output_file.name}\n")
    return str(output_file)


def main():
    print("🚀 Starting PDF-to-Speech pipeline...\n")

    # Step 1: Extract PDF
    text = extract_text_from_pdf(PDF_PATH)

    # Step 2: Split into Chunks
    chunks = chunk_sentences(text)

    # Step 3: Convert Chunks to Speech
    parts = []
    for i, chunk in enumerate(chunks, start=1):
        filename = tts_one(chunk, i)
        if filename:
            parts.append(filename)
        time.sleep(0.2)  # avoid hitting API too fast

    # Step 4: Write parts.txt for joining
    parts_file = OUTPUT_DIR / "parts.txt"
    with open(parts_file, "w") as f:
        for part in parts:
            f.write(f"file '{pathlib.Path(part).as_posix()}'\n")

    print(f"📂 Parts list saved to {parts_file}")
    print("\n🎉 All chunks processed successfully!")


if __name__ == "__main__":
    main()


🚀 Starting PDF-to-Speech pipeline...

📖 Extracting text from: /content/drive/MyDrive/life3.0.pdf ...
   ⚠ Page 1 has no text
   ✔ Page 2 extracted
   ⚠ Page 3 has no text
   ✔ Page 4 extracted
   ✔ Page 5 extracted
   ✔ Page 6 extracted
   ✔ Page 7 extracted
   ✔ Page 8 extracted
   ✔ Page 9 extracted
   ✔ Page 10 extracted
   ✔ Page 11 extracted
   ✔ Page 12 extracted
   ✔ Page 13 extracted
   ✔ Page 14 extracted
   ✔ Page 15 extracted
   ✔ Page 16 extracted
   ✔ Page 17 extracted
   ✔ Page 18 extracted
   ✔ Page 19 extracted
   ✔ Page 20 extracted
   ✔ Page 21 extracted
   ✔ Page 22 extracted
   ✔ Page 23 extracted
   ✔ Page 24 extracted
   ✔ Page 25 extracted
   ✔ Page 26 extracted
   ✔ Page 27 extracted
   ✔ Page 28 extracted
   ✔ Page 29 extracted
   ✔ Page 30 extracted
   ✔ Page 31 extracted
   ✔ Page 32 extracted
   ✔ Page 33 extracted
   ✔ Page 34 extracted
   ✔ Page 35 extracted
   ✔ Page 36 extracted
   ✔ Page 37 extracted
   ✔ Page 38 extracted
   ✔ Page 39 extracted
   ✔ Pa

In [None]:
import pathlib
import subprocess

# 📂 Input folder (where part_XXX.mp3 are stored)
INPUT_DIR = pathlib.Path("/content/drive/MyDrive/tts_outputs")

# 📂 Output folder (final merged file goes here)
OUTPUT_DIR = pathlib.Path("/content/drive/MyDrive/OpenVoice")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

# 📜 Parts list file (temporary, inside output dir)
PARTS_FILE = OUTPUT_DIR / "parts.txt"

# 🎧 Final merged file
FINAL_OUTPUT = OUTPUT_DIR / "final_book.mp3"


def create_parts_file(input_dir: pathlib.Path, parts_file: pathlib.Path):
    """Scan input_dir for part_*.mp3 and write them into parts.txt."""
    print("📝 Generating parts.txt ...")
    mp3_files = sorted(input_dir.glob("part_*.mp3"))
    if not mp3_files:
        print(f"❌ No audio parts found in {input_dir}")
        return False

    with open(parts_file, "w") as f:
        for mp3 in mp3_files:
            f.write(f"file '{mp3.as_posix()}'\n")

    print(f"✔ parts.txt created at {parts_file}")
    return True


def merge_audio(parts_file: pathlib.Path, output_file: pathlib.Path):
    """Merge all audio parts into a single MP3 file using ffmpeg."""
    print("🔗 Merging audio parts ...")
    cmd = [
        "ffmpeg", "-y", "-f", "concat", "-safe", "0",
        "-i", str(parts_file),
        "-c", "copy",
        str(output_file)
    ]
    subprocess.run(cmd, check=True)
    print(f"🎧 Final audio saved as {output_file}\n")


if __name__ == "__main__":
    if create_parts_file(INPUT_DIR, PARTS_FILE):
        merge_audio(PARTS_FILE, FINAL_OUTPUT)


📝 Generating parts.txt ...
✔ parts.txt created at /content/drive/MyDrive/OpenVoice/parts.txt
🔗 Merging audio parts ...
🎧 Final audio saved as /content/drive/MyDrive/OpenVoice/final_book.mp3



In [4]:
print("📦 Installing OpenVoice dependencies...")

!rm -rf OpenVoice
!git clone https://github.com/myshell-ai/OpenVoice.git
%cd OpenVoice

!pip install -q unidecode eng_to_ipa inflect pypinyin jieba cn2an \
               librosa pyworld gradio ffmpeg-python faster_whisper \
               whisper_timestamped wavmark pydub

print("✅ Installation complete!")


📦 Installing OpenVoice dependencies...
Cloning into 'OpenVoice'...
remote: Enumerating objects: 460, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 460 (delta 11), reused 7 (delta 7), pack-reused 445 (from 2)[K
Receiving objects: 100% (460/460), 3.85 MiB | 7.14 MiB/s, done.
Resolving deltas: 100% (218/218), done.
/content/OpenVoice
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m39.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.0/261.0 kB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB[0m [31m4.2 MB/s[0m eta [36m0:00:0

In [5]:
print("⬇️ Downloading pretrained checkpoints...")

!mkdir -p assets/converter_ckpt
!wget -q -O assets/converter_ckpt/config.json \
       https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/config.json
!wget -q -O assets/converter_ckpt/weights.pth \
       https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth

print("✅ Converter model downloaded!")


⬇️ Downloading pretrained checkpoints...
✅ Converter model downloaded!


In [6]:
import sys, os
sys.path.append("/content/OpenVoice/OpenVoice")

print("🔧 Import paths fixed.")

🔧 Import paths fixed.


In [7]:
from google.colab import drive
print("📂 Mounting Google Drive...")
drive.mount('/content/drive')

# 🎙️ Your actual file paths
ref_clip = "/content/drive/MyDrive/my_voice.wav"  # Reference voice
input_file  = "/content/drive/MyDrive/life3.0_ai.mp3"            # Input audio
output_file = "/content/drive/MyDrive/final_cloned.wav"          # Output cloned voice

print(f"""
✅ File paths ready:
   - Reference: {ref_clip}
   - Input    : {input_file}
   - Output   : {output_file}
""")


📂 Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

✅ File paths ready:
   - Reference: /content/drive/MyDrive/my_voice.wav
   - Input    : /content/drive/MyDrive/life3.0_ai.mp3
   - Output   : /content/drive/MyDrive/final_cloned.wav



In [10]:
import torch, subprocess
from openvoice.api import ToneColorConverter
from openvoice import se_extractor

# --- Device selection (GPU if available) ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"⚡ Using device: {device}")

# --- Load model ---
converter = ToneColorConverter("assets/converter_ckpt/config.json", device=device)
converter.load_ckpt("assets/converter_ckpt/weights.pth")
print("✅ Model loaded!")

# --- Convert your reference voice (.opus → .wav) ---
opus_path = "/content/drive/MyDrive/my voice recording .opus"
wav_ref_path = "/content/drive/MyDrive/my_voice.wav"

subprocess.run([
    "ffmpeg", "-y", "-i", opus_path,
    "-ar", "44100", "-ac", "2", wav_ref_path
])

# --- Extract speaker embedding ---
print("🎤 Extracting speaker embedding...")
target_se, _ = se_extractor.get_se(wav_ref_path, converter)
print("✅ Speaker embedding ready!")


⚡ Using device: cuda


  WeightNorm.apply(module, name, dim)


Loaded checkpoint 'assets/converter_ckpt/weights.pth'
missing/unexpected keys: [] []
✅ Model loaded!
🎤 Extracting speaker embedding...
OpenVoice version: v1




Downloading: "https://github.com/snakers4/silero-vad/zipball/master" to /root/.cache/torch/hub/master.zip
[(0.0, 43.5735)]
after vad: dur = 43.57299319727891


Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at /pytorch/aten/src/ATen/native/SpectralOps.cpp:875.)
  return _VF.stft(  # type: ignore[attr-defined]


✅ Speaker embedding ready!


In [11]:
from concurrent.futures import ThreadPoolExecutor, as_completed
import os, time

# ✅ define your folders again
input_dir = "/content/drive/MyDrive/tts_outputs"
output_dir = "/content/drive/MyDrive/cloned_parts"
os.makedirs(output_dir, exist_ok=True)

files = [f for f in os.listdir(input_dir) if f.endswith(".mp3")]

def process_file(file):
    in_path = os.path.join(input_dir, file)
    out_name = f"cloned_{file.rsplit('.',1)[0]}.wav"
    out_path = os.path.join(output_dir, out_name)

    if os.path.exists(out_path):
        return f"⏩ Skipping {file}, already converted."

    start = time.time()
    converter.convert(audio_src_path=in_path, src_se=None, tgt_se=target_se, output_path=out_path)
    elapsed = time.time() - start

    return f"✅ Done: {file} (⏱ {elapsed:.2f} sec)"

# Parallel loop with more workers
with ThreadPoolExecutor(max_workers=4) as executor:  # try 3 or 4 on Tesla T4
    futures = [executor.submit(process_file, f) for f in files]

    for fut in as_completed(futures):
        print(fut.result())


⏩ Skipping part_001.mp3, already converted.
⏩ Skipping part_002.mp3, already converted.
⏩ Skipping part_006.mp3, already converted.
⏩ Skipping part_003.mp3, already converted.
⏩ Skipping part_004.mp3, already converted.
⏩ Skipping part_005.mp3, already converted.
⏩ Skipping part_007.mp3, already converted.
⏩ Skipping part_008.mp3, already converted.
⏩ Skipping part_010.mp3, already converted.
⏩ Skipping part_009.mp3, already converted.
⏩ Skipping part_013.mp3, already converted.
⏩ Skipping part_011.mp3, already converted.
⏩ Skipping part_012.mp3, already converted.
⏩ Skipping part_015.mp3, already converted.
⏩ Skipping part_014.mp3, already converted.
⏩ Skipping part_016.mp3, already converted.
⏩ Skipping part_017.mp3, already converted.
⏩ Skipping part_020.mp3, already converted.
⏩ Skipping part_019.mp3, already converted.
⏩ Skipping part_018.mp3, already converted.
⏩ Skipping part_021.mp3, already converted.
⏩ Skipping part_023.mp3, already converted.
⏩ Skipping part_024.mp3, already

In [12]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
from pydub import AudioSegment
import os

cloned_dir = "/content/drive/MyDrive/cloned_parts"
final_output = "/content/drive/MyDrive/final_cloned_full.wav"

# get original file order to preserve sequence
input_dir = "/content/drive/MyDrive/tts_outputs"
ordered_files = [f"cloned_{f.rsplit('.',1)[0]}.wav"
                 for f in os.listdir(input_dir) if f.endswith(".mp3")]

# merge
merged = AudioSegment.silent(duration=0)
for file in ordered_files:
    path = os.path.join(cloned_dir, file)
    if os.path.exists(path):
        print(f"🔗 Adding {file}")
        seg = AudioSegment.from_wav(path)
        merged += seg
    else:
        print(f"⚠️ Missing {file}, skipping.")

merged.export(final_output, format="wav")
print(f"🎶 Final merged file saved → {final_output}")


🔗 Adding cloned_part_001.wav
🔗 Adding cloned_part_002.wav
🔗 Adding cloned_part_003.wav
🔗 Adding cloned_part_004.wav
🔗 Adding cloned_part_005.wav
🔗 Adding cloned_part_006.wav
🔗 Adding cloned_part_007.wav
🔗 Adding cloned_part_008.wav
🔗 Adding cloned_part_009.wav
🔗 Adding cloned_part_010.wav
🔗 Adding cloned_part_011.wav
🔗 Adding cloned_part_012.wav
🔗 Adding cloned_part_013.wav
🔗 Adding cloned_part_014.wav
🔗 Adding cloned_part_015.wav
🔗 Adding cloned_part_016.wav
🔗 Adding cloned_part_017.wav
🔗 Adding cloned_part_018.wav
🔗 Adding cloned_part_019.wav
🔗 Adding cloned_part_020.wav
🔗 Adding cloned_part_021.wav
🔗 Adding cloned_part_022.wav
🔗 Adding cloned_part_023.wav
🔗 Adding cloned_part_024.wav
🔗 Adding cloned_part_025.wav
🔗 Adding cloned_part_026.wav
🔗 Adding cloned_part_027.wav
🔗 Adding cloned_part_028.wav
🔗 Adding cloned_part_029.wav
🔗 Adding cloned_part_030.wav
🔗 Adding cloned_part_031.wav
🔗 Adding cloned_part_032.wav
🔗 Adding cloned_part_033.wav
🔗 Adding cloned_part_034.wav
🔗 Adding clone

In [6]:
!pip uninstall -y torch torchvision torchaudio
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

Found existing installation: torch 2.8.0+cu126
Uninstalling torch-2.8.0+cu126:
  Successfully uninstalled torch-2.8.0+cu126
Found existing installation: torchvision 0.23.0+cu126
Uninstalling torchvision-0.23.0+cu126:
  Successfully uninstalled torchvision-0.23.0+cu126
Found existing installation: torchaudio 2.8.0+cu126
Uninstalling torchaudio-2.8.0+cu126:
  Successfully uninstalled torchaudio-2.8.0+cu126
Looking in indexes: https://download.pytorch.org/whl/cu118
Collecting torch
  Downloading https://download.pytorch.org/whl/cu118/torch-2.7.1%2Bcu118-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (28 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cu118/torchvision-0.22.1%2Bcu118-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting torchaudio
  Downloading https://download.pytorch.org/whl/cu118/torchaudio-2.7.1%2Bcu118-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu11==11.8.89 (from torch)
  Downloading htt

In [2]:
import torch

print(torch.cuda.is_available())   # True if GPU is active
print(torch.version.cuda)          # Shows CUDA version
if torch.cuda.is_available():
    print(torch.cuda.get_device_name(0))  # e.g. Tesla T4

True
12.6
Tesla T4
