This notebook takes a script string or file and generates TTS audio from it. The script is chunked by section, using markdown headers as delimiters. Each chunk of text is generated into audio using ElevenLabs.

Section titles are saved as separate chunks then the content to allow for more natural pauses between sections.


In [None]:
import os, sys

sys.path.append(os.path.abspath('..'))

import json
from src.clients import elevenlabs
from src.utils import saveB64Audio, chunkTextForTTS, create_project_folder
from src.enums import ElevenLabsTTSModel

# if not specified, will use default model from config/config.yaml
tts_model = ElevenLabsTTSModel.Turbo_v25.value

script = """"""
script_location = "input-test.txt"
specific_chunk = -1


def create_audio(script,
                 audio_name='audio.mp3',
                 project_folder=None,
                 model=None):
	# TODO use request stitching to improve gaps
	# see https://elevenlabs.io/docs/api-reference/how-to-use-request-stitching
	if project_folder is None:
		project_folder = create_project_folder()

	audio = elevenlabs.getSpeechB64(script, model)
	audio_path = os.path.join(project_folder, audio_name)
	saveB64Audio(audio, audio_path)

	print(f"Audio {audio_name} created successfully in {project_folder}")
	return project_folder, audio_path


input_exists = os.path.exists(script_location)
if not script and not input_exists:
	raise SystemExit("Please provide a script or a script location")

if not script and script_location:
	with open(script_location, 'r') as f:
		script = f.read()

chunks = chunkTextForTTS(script)
project_folder = create_project_folder()

if specific_chunk >= 0:
	chunk = chunks[specific_chunk]
	audio_name = chunk['audio']
	create_audio(chunks[specific_chunk]['content'], audio_name, project_folder)
else:
	skipped = 0
	for i, chunk in enumerate(chunks):
		chunk_type = chunk['type']
		audio_name = f'{i}_{chunk_type}.mp3'
		chunk['audio'] = audio_name
		audio_exists = os.path.exists(os.path.join(project_folder, audio_name))

		if audio_exists:
			skipped += 1
			continue

		content = chunk['content']
		create_audio(content, audio_name, project_folder, tts_model)

	if skipped > 0:
		print(f"Skipped generating {skipped} audio files")

chunks_name = os.path.join(project_folder, 'tts-chunks.json')
if not os.path.exists(chunks_name):
	with open(chunks_name, 'w') as f:
		f.write(json.dumps(chunks))
else:
	# compare with existing chunks
	with open(chunks_name, 'r') as f:
		existing_chunks = eval(f.read())
		if existing_chunks != chunks:
			with open(chunks_name, 'w') as f:
				f.write(json.dumps(chunks))

In [2]:
# elevenlabs.getModels()

In [None]:
import os
import json
import sys
from PyQt5.QtWidgets import QApplication
from src.gui.windows.chunk_manager import ChunkManager

app = QApplication(sys.argv)
ex = ChunkManager(chunks, project_folder)
ex.show()
app.exec_()
%gui qt