# Text-To-Speech (TTS)

In [None]:
# !huggingface-cli download fishaudio/fish-speech-1.5 --local-dir checkpoints/fish-speech-1.5/

In [None]:
import os
import pandas as pd
from tqdm import tqdm
import re
from subprocess import run

In [None]:
# Load transcriptions
transcriptions = pd.read_csv("../Dataset/Transcriptions/transcriptions_complete.csv")
transcriptions = transcriptions.sort_values("confidence", ascending=False).reset_index(drop = True)
transcriptions.head()

In [None]:
# Parameters
watermark_path = "../Dataset/Watermarked Audio"
target_sr = 16000

# List files once
watermarked_files = os.listdir(watermark_path)
current_progress = os.listdir()

# Get list of already processed clips
current_progress2 = [
    re.findall("common_voice_en_\\d+", f)[0] + ".mp3"
    for f in current_progress
    if "common_voice_en_" in f
]

# Filter only missing transcriptions
transcriptions = transcriptions.loc[
    ~transcriptions["clip"].isin(current_progress2)
].reset_index(drop=True)

# Build a lookup dict to avoid linear search in every iteration
watermarked_lookup = {
    re.sub(r"_watermarked.*", "", f): f for f in watermarked_files
}

In [None]:
transcriptions.shape

In [None]:
for _, row in tqdm(transcriptions.iterrows(), total=len(transcriptions)):
    clip_path = row["clip"]
    transcript_i = row["transcription"]

    # Use lookup instead of search
    clip_key = clip_path[:-4]
    clip_water_path = watermarked_lookup.get(clip_key)

    if not clip_water_path:
        print(f"Warning: No match for {clip_key}")
        continue

    # Paths
    src_audio = os.path.join(watermark_path, clip_water_path)
    out_audio = clip_water_path.replace("watermarked", "unwatermarked")

    # Run inference (use subprocess.run for better control and error catching)
    result = run([
        "python", "fish-speech-main/fish_speech/models/vqgan/inference.py",
        "-i", src_audio,
        "-o", out_audio,
        "--checkpoint-path", "checkpoints/fish-speech-1.5/firefly-gan-vq-fsq-8x1024-21hz-generator.pth",
        "-d", "cuda"
    ])

    if result.returncode != 0:
        print(f"Error processing {src_audio}")
        continue

In [None]:
from IPython.display import Audio, display
audio = Audio(filename='/Users/lucas/Library/CloudStorage/OneDrive-UniversityofWaterloo/2025-W/CS858 - Security & Privacy in Machine Learning/Project/Code/fish-speech-main/fake.wav')
display(audio)