In [None]:
import pandas as pd

# re-assign issue/article numbers, whoops
df = (
    pd.read_csv("./data/naut_all.csv")
    .assign(issue_number=lambda x: x.issue_title.factorize()[0] + 1)
    .assign(article_number=lambda x: x.groupby("issue_number").cumcount() + 1)
)


In [None]:
# GCP cost?
def string_size_in_bytes(s):
    return len(s.encode("utf-8"))


# 23 million bytes
(df.article.apply(string_size_in_bytes).sum() - 1000000) * 0.000016
# ~353.800848 US for all text, less free month allowance


In [None]:
# coqui price estimates?
from pathlib import Path

from pydub import AudioSegment

# 10 TTS
audio_files = list(Path("./data/tts_output/tacotron2_ddc_ph").rglob("*/*.mp3"))


def get_audio_length(filename):
    audio = AudioSegment.from_file(filename)
    return len(audio)  # length in milliseconds


audio_all = []
for audio_file in audio_files:
    length = get_audio_length(audio_file) / 1000.0  # convert to seconds
    audio_all.append(length)

# hours > https://coqui.ai/pricing > $20/4 hours
sum(audio_all) / 60 / 60
# 2.2028605555555556, yikes


## Coqui model revisions

In [None]:
from TTS.api import TTS

tts = TTS(model_name="tts_models/en/vctk/vits")


In [None]:
speakers = [
    "ED\n",
    "p225",
    "p226",
    "p227",
    "p228",
    "p229",
    "p230",
    "p231",
    "p232",
    "p233",
    "p234",
    "p236",
    "p237",
    "p238",
    "p239",
    "p240",
    "p241",
    "p243",
    "p244",
    "p245",
    "p246",
    "p247",
    "p248",
    "p249",
    "p250",
    "p251",
    "p252",
    "p253",
    "p254",
    "p255",
    "p256",
    "p257",
    "p258",
    "p259",
    "p260",
    "p261",
    "p262",
    "p263",
    "p264",
    "p265",
    "p266",
    "p267",
    "p268",
    "p269",
    "p270",
    "p271",
    "p272",
    "p273",
    "p274",
    "p275",
    "p276",
    "p277",
    "p278",
    "p279",
    "p280",
    "p281",
    "p282",
    "p283",
    "p284",
    "p285",
    "p286",
    "p287",
    "p288",
    "p292",
    "p293",
    "p294",
    "p295",
    "p297",
    "p298",
    "p299",
    "p300",
    "p301",
    "p302",
    "p303",
    "p304",
    "p305",
    "p306",
    "p307",
    "p308",
    "p310",
    "p311",
    "p312",
    "p313",
    "p314",
    "p316",
    "p317",
    "p318",
    "p323",
    "p326",
    "p329",
    "p330",
    "p333",
    "p334",
    "p335",
    "p336",
    "p339",
    "p340",
    "p341",
    "p343",
    "p345",
    "p347",
    "p351",
    "p360",
    "p361",
    "p362",
    "p363",
    "p364",
    "p374",
    "p376",
]


In [None]:
import time

from tts import output_dir
from utils import logger, nautilus_editors_note


def tts_coqui_vctk(speaker_index, text, save_path):
    tts = TTS(model_name="tts_models/en/vctk/vits", gpu=True)
    if save_path.parent.exists() is False:
        save_path.parent.mkdir(parents=True)
    # coqui_output_dir = output_dir / "coqui"
    # if coqui_output_dir.exists() is False:
    #     coqui_output_dir.mkdir(parents=True)
    # save_file = coqui_output_dir / f"{to_snake_case(Path(model_name).name)}_{text_name}.mp3"
    start = time.time()
    tts.tts_to_file(text=text, file_path=save_path, speaker=speaker_index)
    end = time.time()
    logger.info(f"Successuflly Coqui TTS {save_path.name} in {end - start} seconds")


for speaker in speakers[1:]:
    speaker
    # output_file = output_dir / f"vctk_{speaker}_nautilus_editors_note.mp3"
    # tts_coqui_vctk(speaker, nautilus_editors_note, output_file)


In [None]:
!tts --model_name "tts_models/en/vctk/vits" \
--list_speaker_idxs 

In [None]:
{
    "ED\n": 0,
    "p225": 1,
    "p226": 2,
    "p227": 3,
    "p228": 4,
    "p229": 5,
    "p230": 6,
    "p231": 7,
    "p232": 8,
    "p233": 9,
    "p234": 10,
    "p236": 11,
    "p237": 12,
    "p238": 13,
    "p239": 14,
    "p240": 15,
    "p241": 16,
    "p243": 17,
    "p244": 18,
    "p245": 19,
    "p246": 20,
    "p247": 21,
    "p248": 22,
    "p249": 23,
    "p250": 24,
    "p251": 25,
    "p252": 26,
    "p253": 27,
    "p254": 28,
    "p255": 29,
    "p256": 30,
    "p257": 31,
    "p258": 32,
    "p259": 33,
    "p260": 34,
    "p261": 35,
    "p262": 36,
    "p263": 37,
    "p264": 38,
    "p265": 39,
    "p266": 40,
    "p267": 41,
    "p268": 42,
    "p269": 43,
    "p270": 44,
    "p271": 45,
    "p272": 46,
    "p273": 47,
    "p274": 48,
    "p275": 49,
    "p276": 50,
    "p277": 51,
    "p278": 52,
    "p279": 53,
    "p280": 54,
    "p281": 55,
    "p282": 56,
    "p283": 57,
    "p284": 58,
    "p285": 59,
    "p286": 60,
    "p287": 61,
    "p288": 62,
    "p292": 63,
    "p293": 64,
    "p294": 65,
    "p295": 66,
    "p297": 67,
    "p298": 68,
    "p299": 69,
    "p300": 70,
    "p301": 71,
    "p302": 72,
    "p303": 73,
    "p304": 74,
    "p305": 75,
    "p306": 76,
    "p307": 77,
    "p308": 78,
    "p310": 79,
    "p311": 80,
    "p312": 81,
    "p313": 82,
    "p314": 83,
    "p316": 84,
    "p317": 85,
    "p318": 86,
    "p323": 87,
    "p326": 88,
    "p329": 89,
    "p330": 90,
    "p333": 91,
    "p334": 92,
    "p335": 93,
    "p336": 94,
    "p339": 95,
    "p340": 96,
    "p341": 97,
    "p343": 98,
    "p345": 99,
    "p347": 100,
    "p351": 101,
    "p360": 102,
    "p361": 103,
    "p362": 104,
    "p363": 105,
    "p364": 106,
    "p374": 107,
    "p376": 108,
}


In [None]:
from utils import nautilus_editors_note

nautilus_editors_note


In [None]:
# code equivlanet for these models?
!tts --text "Behold the humble nautilus. Just about a foot in diameter, it is a slow bottom-dweller with short tentacles that moves through the water with an unsteady wobble. It\'s also 500 million years old and, in its day, was the best and brightest, using its newly evolved depth control to lay waste to acre after acre of scuttling crustacean prey.\nWe became interested in it here at Nautilus because, well, we stole its name. But also because (for a mollusk) it represents a remarkable intersection of science, math, myth, and culture. Since that is exactly the kind of intersection we love to write about, we decided to put together a little "teaser" issue all about it.\nThere\'s the science. The nautilus has a beautiful, logarithmic, and fractal spiral in its shell. Benoit Mandelbrot, discoverer of the fractal, gives us a few words on that topic. One of the world\'s foremost nautilus experts, Peter Ward, tells us about nautilus evolution and biology, and about his life of nautilus research.\nThen, the myth: from Jules Verne\'s fictional submarine, to Oliver Wendell Holmes\' poem, to how and why we turn science into story.\nTwo chapters, one undersea creature. Welcome aboard.\nMichael Segal\n    Editor in Chief"\
--out_path spkr-out.wav --model_name "tts_models/en/vctk/vits" \
--use_cuda True \
--speaker_idx "p227"


## Tortoise Diffusion models

In [None]:
import sys
from pathlib import Path

import torch
import torchaudio

from tortoise.api import MODELS_DIR, TextToSpeech
from tortoise.utils.audio import load_voices

sys.path.append("../tortoise-tts/tortoise")


voices = """angie                daniel  freeman  jlaw  myself  rainbow       tom           train_dotrice  train_grace     train_mouse
applejack            deniro  geralt   lj    pat     snakes        train_atkins  train_dreams   train_kennard   weaver
cond_latent_example  emma    halle    mol   pat2    tim_reynolds  train_daws    train_empire   train_lescault  william"""


output_dir = Path("./data/tts_output/dev/tortoise")
output_dir.mkdir(parents=True, exist_ok=True)

voices_formatted = [word for line in voices.splitlines() for word in line.split()]
model = voices_formatted[0]

tts = TextToSpeech(models_dir=MODELS_DIR)
voice_samples, conditioning_latents = load_voices([model])


In [None]:
import spacy
from utils import nautilus_editors_note

nlp = spacy.load("en_core_web_md")

sents = [e.text for e in list(nlp(nautilus_editors_note).sents)]


In [None]:
for idx, sent in enumerate(sents):
    print(sent)
    gen, dbg_state = tts.tts_with_preset(
        sent,
        k=1,
        voice_samples=voice_samples,
        conditioning_latents=conditioning_latents,
        preset="fast",
        use_deterministic_seed=42,
        return_deterministic_state=True,
        cvvp_amount=0.0,
    )
    torchaudio.save(str(output_dir / f"test_{idx}.wav"), gen.squeeze(0).cpu(), 24000)


In [None]:
# picks?
ddc

en_au_neural2_a
en_au_neural2_b

en_au_neural2_c
en_au_neural2_d


In [None]:
model_configurations = [
    {
        "type": "coqui",
        "name": "tts_models/en/ljspeech/tacotron2-DDC",
    },
    {
        "type": "coqui",
        "name": "tts_models/en/ljspeech/tacotron2-DDC",
    },
    {
        "type": "coqui",
        "name": "tts_models/en/ljspeech/tacotron2-DDC",
    },
]


In [None]:
from pathlib import Path

from tts import tts_coqui
from utils import to_snake_case

coqui_model = "tts_models/en/ljspeech/tacotron2-DDC_ph"
output_dir = Path("./data/tts_output/") / to_snake_case(Path(coqui_model).name)
if output_dir.exists() is False:
    output_dir.mkdir(parents=True)

for idx, row in df.head(10).iterrows():
    issue_dir = output_dir / f"{row.issue_number}_{to_snake_case(row.issue_title)}"
    if issue_dir.exists() is False:
        issue_dir.mkdir(parents=True)
    article_fp = issue_dir / f"{row.article_number}_{to_snake_case(row.headline)}.mp3"

    tts_coqui(coqui_model, row.article, article_fp)


In [None]:
issue_dir / f"{to_snake_case(row.headline)}.mp3"
