Skip to content

Commit

Permalink
Merge pull request #30 from xtmu/dev
Browse files Browse the repository at this point in the history
feat: add Edge TTS provider
  • Loading branch information
p0n1 committed Jan 11, 2024
2 parents 702acc8 + 1be794c commit 7432dc7
Show file tree
Hide file tree
Showing 9 changed files with 524 additions and 76 deletions.
8 changes: 6 additions & 2 deletions .gitignore
Expand Up @@ -3,11 +3,12 @@ __pycache__/
*.pyc
*.pyo
*.pyd
*.pyc

# Virtual environment
venv/
.idea
.history/
.run/

# Temporary files
*.tmp
Expand All @@ -26,4 +27,7 @@ Thumbs.db
# audio files
audiobook_output/

private_examples/
private_examples/

# custom
scripts/
6 changes: 4 additions & 2 deletions README.md
@@ -1,6 +1,6 @@
# EPUB to Audiobook Converter

This project provides a command-line tool to convert EPUB ebooks into audiobooks. It now supports both the [Microsoft Azure Text-to-Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech) and the [OpenAI Text-to-Speech API](https://platform.openai.com/docs/guides/text-to-speech) to generate the audio for each chapter in the ebook. The output audio files are optimized for use with [Audiobookshelf](https://github.com/advplyr/audiobookshelf).
This project provides a command-line tool to convert EPUB ebooks into audiobooks. It now supports both the [Microsoft Azure Text-to-Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech) (alternativly [EdgeTTS](https://github.com/rany2/edge-tts)) and the [OpenAI Text-to-Speech API](https://platform.openai.com/docs/guides/text-to-speech) to generate the audio for each chapter in the ebook. The output audio files are optimized for use with [Audiobookshelf](https://github.com/advplyr/audiobookshelf).

*This project is developed with the help of ChatGPT.*

Expand Down Expand Up @@ -216,6 +216,9 @@ Check this [step by step guide](https://gist.github.com/p0n1/cba98859cdb6331cc1a
*Source: <https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech#prerequisites>*
## About Edge TTS
Edge TTS and Azure TTS are almost same, the difference is that Edge TTS don't require API Key because it's based on Edge read aloud functionality, and parameters are restricted a bit, like [custom ssml](https://github.com/rany2/edge-tts#custom-ssml).
## How to Get Your OpenAI API Key?
Check https://platform.openai.com/docs/quickstart/account-setup. Make sure you check the [price](https://openai.com/pricing) details before use.
Expand Down Expand Up @@ -262,7 +265,6 @@ Here are some examples that demonstrate various option combinations:
```sh
python3 main.py "path/to/book.epub" "path/to/output/folder" --tts azure --chapter_start 5 --chapter_end 10 --break_duration "1500"
```
### Examples Using OpenAI TTS
1. **Basic conversion using OpenAI with default settings**
Expand Down
12 changes: 9 additions & 3 deletions audiobook_generator/book_parsers/epub_book_parser.py
Expand Up @@ -45,7 +45,12 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
content = item.get_content()
soup = BeautifulSoup(content, "lxml")
title = soup.title.string if soup.title else ""
title = ""
title_levels = ['title', 'h1', 'h2', 'h3']
for level in title_levels:
if soup.find(level):
title = soup.find(level).text
break
raw = soup.get_text(strip=False)
logger.debug(f"Raw text: <{raw[:]}>")

Expand All @@ -67,7 +72,7 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
logger.debug(f"Cleaned text step 4: <{cleaned_text[:100]}>")

# fill in the title if it's missing
if not title:
if title == "":
title = cleaned_text[:60]
logger.debug(f"Raw title: <{title}>")
title = self._sanitize_title(title, break_string)
Expand All @@ -77,7 +82,8 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
soup.decompose()
return chapters

def _sanitize_title(self, title, break_string) -> str:
@staticmethod
def _sanitize_title(title, break_string) -> str:
# replace MAGIC_BREAK_STRING with a blank space
# strip incase leading bank is missing
title = title.replace(break_string, " ")
Expand Down
6 changes: 6 additions & 0 deletions audiobook_generator/config/general_config.py
Expand Up @@ -23,5 +23,11 @@ def __init__(self, args):
# TTS provider: Azure specific arguments
self.break_duration = args.break_duration

# TTS provider: Edge specific arguments
self.voice_rate = args.voice_rate
self.voice_volume = args.voice_volume
self.voice_pitch = args.voice_pitch
self.proxy = args.proxy

def __str__(self):
return ', '.join(f"{key}={value}" for key, value in self.__dict__.items())
126 changes: 65 additions & 61 deletions audiobook_generator/core/audiobook_generator.py
Expand Up @@ -34,64 +34,68 @@ def __str__(self) -> str:
return f"{self.config}"

def run(self):
book_parser = get_book_parser(self.config)
tts_provider = get_tts_provider(self.config)

os.makedirs(self.config.output_folder, exist_ok=True)
chapters = book_parser.get_chapters(tts_provider.get_break_string())
# Filter out empty or very short chapters
chapters = [(title, text) for title, text in chapters if text.strip()]

logger.info(f"Chapters count: {len(chapters)}.")

# Check chapter start and end args
if self.config.chapter_start < 1 or self.config.chapter_start > len(chapters):
raise ValueError(
f"Chapter start index {self.config.chapter_start} is out of range. Check your input."
)
if self.config.chapter_end < -1 or self.config.chapter_end > len(chapters):
raise ValueError(
f"Chapter end index {self.config.chapter_end} is out of range. Check your input."
)
if self.config.chapter_end == -1:
self.config.chapter_end = len(chapters)
if self.config.chapter_start > self.config.chapter_end:
raise ValueError(
f"Chapter start index {self.config.chapter_start} is larger than chapter end index {self.config.chapter_end}. Check your input."
)

logger.info(f"Converting chapters from {self.config.chapter_start} to {self.config.chapter_end}.")

# Initialize total_characters to 0
total_characters = get_total_chars(chapters)
logger.info(f"✨ Total characters in selected book: {total_characters} ✨")
rough_price = tts_provider.estimate_cost(total_characters)
confirm_conversion(rough_price)

# Loop through each chapter and convert it to speech using the provided TTS provider
for idx, (title, text) in enumerate(chapters, start=1):
if idx < self.config.chapter_start:
continue
if idx > self.config.chapter_end:
break
logger.info(
f"Converting chapter {idx}/{len(chapters)}: {title}, characters: {len(text)}"
)

if self.config.output_text:
text_file = os.path.join(self.config.output_folder, f"{idx:04d}_{title}.txt")
with open(text_file, "w", encoding='utf-8') as file:
file.write(text)

if self.config.preview:
continue

output_file = os.path.join(self.config.output_folder,
f"{idx:04d}_{title}.{tts_provider.get_output_file_extension()}")

audio_tags = AudioTags(title, book_parser.get_book_author(), book_parser.get_book_title(), idx)
tts_provider.text_to_speech(
text,
output_file,
audio_tags,
)
try:
book_parser = get_book_parser(self.config)
tts_provider = get_tts_provider(self.config)

os.makedirs(self.config.output_folder, exist_ok=True)
chapters = book_parser.get_chapters(tts_provider.get_break_string())
# Filter out empty or very short chapters
chapters = [(title, text) for title, text in chapters if text.strip()]

logger.info(f"Chapters count: {len(chapters)}.")

# Check chapter start and end args
if self.config.chapter_start < 1 or self.config.chapter_start > len(chapters):
raise ValueError(
f"Chapter start index {self.config.chapter_start} is out of range. Check your input."
)
if self.config.chapter_end < -1 or self.config.chapter_end > len(chapters):
raise ValueError(
f"Chapter end index {self.config.chapter_end} is out of range. Check your input."
)
if self.config.chapter_end == -1:
self.config.chapter_end = len(chapters)
if self.config.chapter_start > self.config.chapter_end:
raise ValueError(
f"Chapter start index {self.config.chapter_start} is larger than chapter end index {self.config.chapter_end}. Check your input."
)

logger.info(f"Converting chapters from {self.config.chapter_start} to {self.config.chapter_end}.")

# Initialize total_characters to 0
total_characters = get_total_chars(chapters)
logger.info(f"✨ Total characters in selected book: {total_characters} ✨")
rough_price = tts_provider.estimate_cost(total_characters)
confirm_conversion(rough_price)

# Loop through each chapter and convert it to speech using the provided TTS provider
for idx, (title, text) in enumerate(chapters, start=1):
if idx < self.config.chapter_start:
continue
if idx > self.config.chapter_end:
break
logger.info(
f"Converting chapter {idx}/{len(chapters)}: {title}, characters: {len(text)}"
)

if self.config.output_text:
text_file = os.path.join(self.config.output_folder, f"{idx:04d}_{title}.txt")
with open(text_file, "w", encoding='utf-8') as file:
file.write(text)

if self.config.preview:
continue

output_file = os.path.join(self.config.output_folder,
f"{idx:04d}_{title}.{tts_provider.get_output_file_extension()}")

audio_tags = AudioTags(title, book_parser.get_book_author(), book_parser.get_book_title(), idx)
tts_provider.text_to_speech(
text,
output_file,
audio_tags,
)
except KeyboardInterrupt:
logger.info("Job stopped by user.")
exit()
6 changes: 5 additions & 1 deletion audiobook_generator/tts_providers/base_tts_provider.py
Expand Up @@ -4,6 +4,7 @@

TTS_AZURE = "azure"
TTS_OPENAI = "openai"
TTS_EDGE = "edge"


class BaseTTSProvider: # Base interface for TTS providers
Expand Down Expand Up @@ -33,7 +34,7 @@ def get_output_file_extension(self):

# Common support methods for all TTS providers
def get_supported_tts_providers() -> List[str]:
return [TTS_AZURE, TTS_OPENAI]
return [TTS_AZURE, TTS_OPENAI, TTS_EDGE]


def get_tts_provider(config) -> BaseTTSProvider:
Expand All @@ -43,5 +44,8 @@ def get_tts_provider(config) -> BaseTTSProvider:
elif config.tts == TTS_OPENAI:
from audiobook_generator.tts_providers.openai_tts_provider import OpenAITTSProvider
return OpenAITTSProvider(config)
elif config.tts == TTS_EDGE:
from audiobook_generator.tts_providers.edge_tts_provider import EdgeTTSProvider
return EdgeTTSProvider(config)
else:
raise ValueError(f"Invalid TTS provider: {config.tts}")

0 comments on commit 7432dc7

Please sign in to comment.