Merge pull request #30 from xtmu/dev

feat: add Edge TTS provider
p0n1 · Jan 11, 2024 · 7432dc7 · 7432dc7
2 parents 702acc8 + 1be794c
commit 7432dc7
Show file tree

Hide file tree

Showing 9 changed files with 524 additions and 76 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,11 +3,12 @@ __pycache__/
 *.pyc
 *.pyo
 *.pyd
-*.pyc
 
 # Virtual environment
 venv/
 .idea
+.history/
+.run/
 
 # Temporary files
 *.tmp
@@ -26,4 +27,7 @@ Thumbs.db
 # audio files
 audiobook_output/
 
-private_examples/
+private_examples/
+
+# custom
+scripts/
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 # EPUB to Audiobook Converter
 
-This project provides a command-line tool to convert EPUB ebooks into audiobooks. It now supports both the [Microsoft Azure Text-to-Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech) and the [OpenAI Text-to-Speech API](https://platform.openai.com/docs/guides/text-to-speech) to generate the audio for each chapter in the ebook. The output audio files are optimized for use with [Audiobookshelf](https://github.com/advplyr/audiobookshelf).
+This project provides a command-line tool to convert EPUB ebooks into audiobooks. It now supports both the [Microsoft Azure Text-to-Speech API](https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech) (alternativly [EdgeTTS](https://github.com/rany2/edge-tts)) and the [OpenAI Text-to-Speech API](https://platform.openai.com/docs/guides/text-to-speech) to generate the audio for each chapter in the ebook. The output audio files are optimized for use with [Audiobookshelf](https://github.com/advplyr/audiobookshelf).
 
 *This project is developed with the help of ChatGPT.*
 
@@ -216,6 +216,9 @@ Check this [step by step guide](https://gist.github.com/p0n1/cba98859cdb6331cc1a
 
 *Source: <https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-text-to-speech#prerequisites>*
 
+## About Edge TTS
+Edge TTS and Azure TTS are almost same, the difference is that Edge TTS don't require API Key because it's based on Edge read aloud functionality, and parameters are restricted a bit, like [custom ssml](https://github.com/rany2/edge-tts#custom-ssml).
+
 ## How to Get Your OpenAI API Key?
 
 Check https://platform.openai.com/docs/quickstart/account-setup. Make sure you check the [price](https://openai.com/pricing) details before use.
@@ -262,7 +265,6 @@ Here are some examples that demonstrate various option combinations:
    ```sh
    python3 main.py "path/to/book.epub" "path/to/output/folder" --tts azure --chapter_start 5 --chapter_end 10 --break_duration "1500"
    ```
-
 ### Examples Using OpenAI TTS
 
 1. **Basic conversion using OpenAI with default settings**  

diff --git a/audiobook_generator/book_parsers/epub_book_parser.py b/audiobook_generator/book_parsers/epub_book_parser.py
@@ -45,7 +45,12 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
         for item in self.book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
             content = item.get_content()
             soup = BeautifulSoup(content, "lxml")
-            title = soup.title.string if soup.title else ""
+            title = ""
+            title_levels = ['title', 'h1', 'h2', 'h3']
+            for level in title_levels:
+                if soup.find(level):
+                    title = soup.find(level).text
+                    break
             raw = soup.get_text(strip=False)
             logger.debug(f"Raw text: <{raw[:]}>")
 
@@ -67,7 +72,7 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
                 logger.debug(f"Cleaned text step 4: <{cleaned_text[:100]}>")
 
             # fill in the title if it's missing
-            if not title:
+            if title == "":
                 title = cleaned_text[:60]
             logger.debug(f"Raw title: <{title}>")
             title = self._sanitize_title(title, break_string)
@@ -77,7 +82,8 @@ def get_chapters(self, break_string) -> List[Tuple[str, str]]:
             soup.decompose()
         return chapters
 
-    def _sanitize_title(self, title, break_string) -> str:
+    @staticmethod
+    def _sanitize_title(title, break_string) -> str:
         # replace MAGIC_BREAK_STRING with a blank space
         # strip incase leading bank is missing
         title = title.replace(break_string, " ")

diff --git a/audiobook_generator/config/general_config.py b/audiobook_generator/config/general_config.py
@@ -23,5 +23,11 @@ def __init__(self, args):
         # TTS provider: Azure specific arguments
         self.break_duration = args.break_duration
 
+        # TTS provider: Edge specific arguments
+        self.voice_rate = args.voice_rate
+        self.voice_volume = args.voice_volume
+        self.voice_pitch = args.voice_pitch
+        self.proxy = args.proxy
+
     def __str__(self):
         return ', '.join(f"{key}={value}" for key, value in self.__dict__.items())
diff --git a/audiobook_generator/core/audiobook_generator.py b/audiobook_generator/core/audiobook_generator.py
@@ -34,64 +34,68 @@ def __str__(self) -> str:
         return f"{self.config}"
 
     def run(self):
-        book_parser = get_book_parser(self.config)
-        tts_provider = get_tts_provider(self.config)
-
-        os.makedirs(self.config.output_folder, exist_ok=True)
-        chapters = book_parser.get_chapters(tts_provider.get_break_string())
-        # Filter out empty or very short chapters
-        chapters = [(title, text) for title, text in chapters if text.strip()]
-
-        logger.info(f"Chapters count: {len(chapters)}.")
-
-        # Check chapter start and end args
-        if self.config.chapter_start < 1 or self.config.chapter_start > len(chapters):
-            raise ValueError(
-                f"Chapter start index {self.config.chapter_start} is out of range. Check your input."
-            )
-        if self.config.chapter_end < -1 or self.config.chapter_end > len(chapters):
-            raise ValueError(
-                f"Chapter end index {self.config.chapter_end} is out of range. Check your input."
-            )
-        if self.config.chapter_end == -1:
-            self.config.chapter_end = len(chapters)
-        if self.config.chapter_start > self.config.chapter_end:
-            raise ValueError(
-                f"Chapter start index {self.config.chapter_start} is larger than chapter end index {self.config.chapter_end}. Check your input."
-            )
-
-        logger.info(f"Converting chapters from {self.config.chapter_start} to {self.config.chapter_end}.")
-
-        # Initialize total_characters to 0
-        total_characters = get_total_chars(chapters)
-        logger.info(f"✨ Total characters in selected book: {total_characters} ✨")
-        rough_price = tts_provider.estimate_cost(total_characters)
-        confirm_conversion(rough_price)
-
-        # Loop through each chapter and convert it to speech using the provided TTS provider
-        for idx, (title, text) in enumerate(chapters, start=1):
-            if idx < self.config.chapter_start:
-                continue
-            if idx > self.config.chapter_end:
-                break
-            logger.info(
-                f"Converting chapter {idx}/{len(chapters)}: {title}, characters: {len(text)}"
-            )
-
-            if self.config.output_text:
-                text_file = os.path.join(self.config.output_folder, f"{idx:04d}_{title}.txt")
-                with open(text_file, "w", encoding='utf-8') as file:
-                    file.write(text)
-
-            if self.config.preview:
-                continue
-
-            output_file = os.path.join(self.config.output_folder,
-                                       f"{idx:04d}_{title}.{tts_provider.get_output_file_extension()}")
-
-            audio_tags = AudioTags(title, book_parser.get_book_author(), book_parser.get_book_title(), idx)
-            tts_provider.text_to_speech(
-                text,
-                output_file,
-                audio_tags,
-            )
+        try:
+            book_parser = get_book_parser(self.config)
+            tts_provider = get_tts_provider(self.config)
+
+            os.makedirs(self.config.output_folder, exist_ok=True)
+            chapters = book_parser.get_chapters(tts_provider.get_break_string())
+            # Filter out empty or very short chapters
+            chapters = [(title, text) for title, text in chapters if text.strip()]
+
+            logger.info(f"Chapters count: {len(chapters)}.")
+
+            # Check chapter start and end args
+            if self.config.chapter_start < 1 or self.config.chapter_start > len(chapters):
+                raise ValueError(
+                    f"Chapter start index {self.config.chapter_start} is out of range. Check your input."
+                )
+            if self.config.chapter_end < -1 or self.config.chapter_end > len(chapters):
+                raise ValueError(
+                    f"Chapter end index {self.config.chapter_end} is out of range. Check your input."
+                )
+            if self.config.chapter_end == -1:
+                self.config.chapter_end = len(chapters)
+            if self.config.chapter_start > self.config.chapter_end:
+                raise ValueError(
+                    f"Chapter start index {self.config.chapter_start} is larger than chapter end index {self.config.chapter_end}. Check your input."
+                )
+
+            logger.info(f"Converting chapters from {self.config.chapter_start} to {self.config.chapter_end}.")
+
+            # Initialize total_characters to 0
+            total_characters = get_total_chars(chapters)
+            logger.info(f"✨ Total characters in selected book: {total_characters} ✨")
+            rough_price = tts_provider.estimate_cost(total_characters)
+            confirm_conversion(rough_price)
+
+            # Loop through each chapter and convert it to speech using the provided TTS provider
+            for idx, (title, text) in enumerate(chapters, start=1):
+                if idx < self.config.chapter_start:
+                    continue
+                if idx > self.config.chapter_end:
+                    break
+                logger.info(
+                    f"Converting chapter {idx}/{len(chapters)}: {title}, characters: {len(text)}"
+                )
+
+                if self.config.output_text:
+                    text_file = os.path.join(self.config.output_folder, f"{idx:04d}_{title}.txt")
+                    with open(text_file, "w", encoding='utf-8') as file:
+                        file.write(text)
+
+                if self.config.preview:
+                    continue
+
+                output_file = os.path.join(self.config.output_folder,
+                                           f"{idx:04d}_{title}.{tts_provider.get_output_file_extension()}")
+
+                audio_tags = AudioTags(title, book_parser.get_book_author(), book_parser.get_book_title(), idx)
+                tts_provider.text_to_speech(
+                    text,
+                    output_file,
+                    audio_tags,
+                )
+        except KeyboardInterrupt:
+            logger.info("Job stopped by user.")
+            exit()
diff --git a/audiobook_generator/tts_providers/base_tts_provider.py b/audiobook_generator/tts_providers/base_tts_provider.py
@@ -4,6 +4,7 @@
 
 TTS_AZURE = "azure"
 TTS_OPENAI = "openai"
+TTS_EDGE = "edge"
 
 
 class BaseTTSProvider:  # Base interface for TTS providers
@@ -33,7 +34,7 @@ def get_output_file_extension(self):
 
 # Common support methods for all TTS providers
 def get_supported_tts_providers() -> List[str]:
-    return [TTS_AZURE, TTS_OPENAI]
+    return [TTS_AZURE, TTS_OPENAI, TTS_EDGE]
 
 
 def get_tts_provider(config) -> BaseTTSProvider:
@@ -43,5 +44,8 @@ def get_tts_provider(config) -> BaseTTSProvider:
     elif config.tts == TTS_OPENAI:
         from audiobook_generator.tts_providers.openai_tts_provider import OpenAITTSProvider
         return OpenAITTSProvider(config)
+    elif config.tts == TTS_EDGE:
+        from audiobook_generator.tts_providers.edge_tts_provider import EdgeTTSProvider
+        return EdgeTTSProvider(config)
     else:
         raise ValueError(f"Invalid TTS provider: {config.tts}")