In [None]:
import gradio as gr
import tempfile
import os
from typing import Optional, Tuple
import time
from transformers import pipeline
import torch
import scipy.io.wavfile as wav
import numpy as np

# Initialize IBM Granite model for text rewriting
try:
    # Using IBM Granite model as specified
    rewriter_pipe = pipeline(
        "text-generation",
        model="ibm-granite/granite-3.3-2b-instruct",
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto" if torch.cuda.is_available() else None
    )
    REWRITER_AVAILABLE = True
except Exception as e:
    print(f"Warning: Could not load IBM Granite model: {e}")
    try:
        # Fallback to a smaller model for demo purposes
        rewriter_pipe = pipeline("text-generation", model="microsoft/DialoGPT-small")
        REWRITER_AVAILABLE = True
    except Exception as e2:
        print(f"Warning: Could not load fallback model: {e2}")
        rewriter_pipe = None
        REWRITER_AVAILABLE = False

# Enhanced TTS initialization with multiple fallback options
TTS_AVAILABLE = False
TTS_METHOD = None
tts_processor = None
tts_model = None
vocoder = None
speaker_embeddings = None

# Try multiple TTS approaches
def initialize_tts():
    global TTS_AVAILABLE, TTS_METHOD, tts_processor, tts_model, vocoder, speaker_embeddings

    # Method 1: Try SpeechT5 (Microsoft's TTS)
    try:
        from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
        from datasets import load_dataset
        import soundfile as sf

        print("Initializing SpeechT5 TTS...")
        tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
        tts_model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
        vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")

        # Load speaker embeddings
        embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
        speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

        TTS_AVAILABLE = True
        TTS_METHOD = "speecht5"
        print("✅ SpeechT5 TTS initialized successfully!")
        return True

    except Exception as e:
        print(f"SpeechT5 initialization failed: {e}")

    # Method 2: Try gTTS (Google Text-to-Speech) - simpler alternative
    try:
        from gtts import gTTS
        import pygame

        # Test if gTTS works
        test_tts = gTTS(text="test", lang='en')
        TTS_AVAILABLE = True
        TTS_METHOD = "gtts"
        print("✅ gTTS initialized successfully!")
        return True

    except Exception as e:
        print(f"gTTS initialization failed: {e}")

    # Method 3: Try pyttsx3 (offline TTS)
    try:
        import pyttsx3

        # Test if pyttsx3 works
        engine = pyttsx3.init()
        TTS_AVAILABLE = True
        TTS_METHOD = "pyttsx3"
        print("✅ pyttsx3 initialized successfully!")
        return True

    except Exception as e:
        print(f"pyttsx3 initialization failed: {e}")

    print("❌ No TTS method available. Please install dependencies:")
    print("For SpeechT5: pip install transformers datasets soundfile")
    print("For gTTS: pip install gtts pygame")
    print("For pyttsx3: pip install pyttsx3")
    return False

# Initialize TTS
initialize_tts()

class EchoVerseSystem:
    def __init__(self):
        self.tone_prompts = {
            "Neutral": {
                "system": "You are a professional editor. Rewrite the text to be clear, balanced, and informative for audio narration.",
                "prompt": "Rewrite the following text in a clear, neutral, and informative tone suitable for audiobook narration. Maintain all key information while ensuring smooth flow for spoken delivery:\n\n{text}\n\nRewritten version:"
            },
            "Suspenseful": {
                "system": "You are a dramatic storyteller. Add mystery and tension while preserving information.",
                "prompt": "Rewrite the following text with a suspenseful, engaging, and dramatic tone. Add mystery and tension while preserving all important information for an audiobook:\n\n{text}\n\nSuspenseful version:"
            },
            "Inspiring": {
                "system": "You are a motivational speaker. Make the content uplifting and empowering.",
                "prompt": "Rewrite the following text with an inspiring, motivational, and uplifting tone. Make it energetic and empowering while keeping all essential content for audiobook narration:\n\n{text}\n\nInspiring version:"
            }
        }

    def read_uploaded_file(self, file) -> str:
        """Read content from uploaded text file"""
        if file is None:
            return ""

        try:
            with open(file.name, 'r', encoding='utf-8') as f:
                content = f.read()
            return content
        except Exception as e:
            return f"Error reading file: {str(e)}"

    def rewrite_text_with_tone(self, original_text: str, tone: str, progress=gr.Progress()) -> Tuple[str, str]:
        """Rewrite text using IBM Granite model with specified tone"""
        if not original_text.strip():
            return "", "Please provide text to rewrite."

        if not REWRITER_AVAILABLE:
            return "", "❌ Text rewriting model is not available. Please install transformers and required models."

        progress(0.1, desc="Preparing prompt...")

        tone_config = self.tone_prompts.get(tone, self.tone_prompts["Neutral"])
        formatted_prompt = tone_config["prompt"].format(text=original_text)

        progress(0.3, desc="Generating rewrite...")

        try:
            # Simple text generation for fallback models
            if "DialoGPT" in str(type(rewriter_pipe.model)):
                response = rewriter_pipe(
                    formatted_prompt,
                    max_length=len(formatted_prompt.split()) + len(original_text.split()) * 2,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=rewriter_pipe.tokenizer.eos_token_id
                )
                rewritten = response[0]['generated_text'].replace(formatted_prompt, "").strip()
            else:
                # For chat-based models like Granite
                messages = [
                    {"role": "system", "content": tone_config["system"]},
                    {"role": "user", "content": f"Rewrite this text in {tone.lower()} tone: {original_text}"}
                ]

                response = rewriter_pipe(
                    messages,
                    max_new_tokens=len(original_text.split()) * 2,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=rewriter_pipe.tokenizer.eos_token_id
                )

                if isinstance(response, list) and len(response) > 0:
                    generated_text = response[0].get('generated_text', '')
                    if isinstance(generated_text, list):
                        rewritten = generated_text[-1].get('content', str(generated_text[-1]))
                    else:
                        rewritten = generated_text.split(":")[-1].strip()
                else:
                    rewritten = str(response)

            progress(1.0, desc="Rewrite complete!")

            # Clean up the response
            rewritten = rewritten.replace(formatted_prompt, "").strip()
            if not rewritten or len(rewritten) < 10:
                rewritten = f"[{tone} tone] " + original_text  # Fallback

            return rewritten, f"✅ Text successfully rewritten in {tone} tone!"

        except Exception as e:
            return "", f"❌ Error during rewriting: {str(e)}"

    def generate_audio_speecht5(self, text: str, progress) -> Tuple[Optional[str], str]:
        """Generate audio using SpeechT5"""
        try:
            # Split text into chunks if it's too long
            max_length = 400
            text_chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)]

            all_audio = []

            for i, chunk in enumerate(text_chunks):
                progress(0.3 + (0.6 * i / len(text_chunks)), desc=f"Processing chunk {i+1}/{len(text_chunks)}...")

                # Process text
                inputs = tts_processor(text=chunk, return_tensors="pt")

                # Generate speech
                with torch.no_grad():
                    speech = tts_model.generate_speech(
                        inputs["input_ids"],
                        speaker_embeddings,
                        vocoder=vocoder
                    )

                all_audio.append(speech.numpy())

            # Combine all audio chunks
            combined_audio = np.concatenate(all_audio)

            # Create temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
                sample_rate = 16000
                wav.write(tmp_file.name, sample_rate, (combined_audio * 32767).astype(np.int16))
                return tmp_file.name, f"🎵 Audio generated using SpeechT5! Duration: ~{len(combined_audio)/16000:.1f}s"

        except Exception as e:
            return None, f"❌ SpeechT5 error: {str(e)}"

    def generate_audio_gtts(self, text: str, progress) -> Tuple[Optional[str], str]:
        """Generate audio using gTTS"""
        try:
            from gtts import gTTS

            progress(0.5, desc="Generating speech with gTTS...")

            tts = gTTS(text=text, lang='en', slow=False)

            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
                tts.save(tmp_file.name)
                return tmp_file.name, "🎵 Audio generated using Google TTS!"

        except Exception as e:
            return None, f"❌ gTTS error: {str(e)}"

    def generate_audio_pyttsx3(self, text: str, progress) -> Tuple[Optional[str], str]:
        """Generate audio using pyttsx3"""
        try:
            import pyttsx3

            progress(0.5, desc="Generating speech with pyttsx3...")

            engine = pyttsx3.init()

            # Set properties
            rate = engine.getProperty('rate')
            engine.setProperty('rate', rate - 50)  # Slower speech

            voices = engine.getProperty('voices')
            if voices:
                engine.setProperty('voice', voices[0].id)  # Use first available voice

            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
                engine.save_to_file(text, tmp_file.name)
                engine.runAndWait()
                return tmp_file.name, "🎵 Audio generated using pyttsx3!"

        except Exception as e:
            return None, f"❌ pyttsx3 error: {str(e)}"

    def generate_audio(self, text: str, progress=gr.Progress()) -> Tuple[Optional[str], str]:
        """Generate audio from text using available TTS method"""
        if not text.strip():
            return None, "Please provide text to convert to audio."

        if not TTS_AVAILABLE:
            return None, """❌ Text-to-Speech functionality is not available.

Please install one of the following:

📦 For high-quality TTS (SpeechT5):
pip install transformers datasets soundfile torch

📦 For simple online TTS (gTTS):
pip install gtts

📦 For offline TTS (pyttsx3):
pip install pyttsx3

Then restart the application."""

        progress(0.1, desc="Preparing text for speech synthesis...")

        try:
            if TTS_METHOD == "speecht5":
                return self.generate_audio_speecht5(text, progress)
            elif TTS_METHOD == "gtts":
                return self.generate_audio_gtts(text, progress)
            elif TTS_METHOD == "pyttsx3":
                return self.generate_audio_pyttsx3(text, progress)
            else:
                return None, "❌ No TTS method available"

        except Exception as e:
            return None, f"❌ Error during audio generation: {str(e)}"

# Initialize the system
echoverse = EchoVerseSystem()

# Create Gradio interface
def create_interface():
    with gr.Blocks(
        title="🎧 EchoVerse - AI Audiobook Creator",
        theme=gr.themes.Soft(),
        css="""
        .main-container { max-width: 1200px; margin: 0 auto; }
        .step-container { border: 2px solid #e1e5e9; border-radius: 10px; padding: 20px; margin: 10px 0; }
        .tone-card { border: 1px solid #ddd; border-radius: 8px; padding: 15px; margin: 5px; }
        .status-info { background-color: #f8f9fa; padding: 15px; border-radius: 8px; margin: 10px 0; }
        """
    ) as demo:

        gr.HTML(f"""
        <div style="text-align: center; padding: 20px;">
            <h1>🎧 EchoVerse - AI Audiobook Creator</h1>
            <p>Transform your text into expressive, downloadable audiobooks with AI-powered tone adaptation</p>
            <p><em>Powered by IBM Granite 3.3B and advanced Text-to-Speech technology</em></p>
            <div class="status-info">
                <strong>System Status:</strong><br>
                🤖 Text Rewriter: {"✅ Available" if REWRITER_AVAILABLE else "❌ Not Available"}<br>
                🎵 Text-to-Speech: {"✅ Available (" + TTS_METHOD + ")" if TTS_AVAILABLE else "❌ Not Available"}
            </div>
        </div>
        """)

        with gr.Row():
            with gr.Column(scale=2):
                # Step 1: Text Input
                gr.HTML("<h3>📝 Step 1: Input Your Text</h3>")

                with gr.Tab("📋 Paste Text"):
                    input_text = gr.Textbox(
                        label="Enter your text",
                        placeholder="Paste your text here or upload a .txt file...",
                        lines=8,
                        max_lines=15,
                        value="Artificial intelligence has revolutionized the way we process information. Machine learning algorithms can analyze vast datasets and identify patterns that humans might miss. This technology continues to evolve, promising exciting developments in healthcare, education, and scientific research."  # Default example
                    )

                with gr.Tab("📁 Upload File"):
                    file_upload = gr.File(
                        label="Upload .txt file",
                        file_types=[".txt"],
                        type="filepath"
                    )
                    upload_btn = gr.Button("📖 Load File Content", variant="secondary")

                # Step 2: Tone Selection
                gr.HTML("<h3>🎭 Step 2: Choose Tone Style</h3>")
                tone_selector = gr.Radio(
                    choices=["Neutral", "Suspenseful", "Inspiring"],
                    value="Neutral",
                    label="Select tone for rewriting",
                    info="Choose how you want your audiobook to sound"
                )

                with gr.Row():
                    gr.HTML("""
                    <div style="display: flex; gap: 10px;">
                        <div class="tone-card">
                            <strong>📖 Neutral</strong><br>
                            Clear, balanced, informative
                        </div>
                        <div class="tone-card">
                            <strong>🎭 Suspenseful</strong><br>
                            Dramatic, mysterious, engaging
                        </div>
                        <div class="tone-card">
                            <strong>✨ Inspiring</strong><br>
                            Motivational, uplifting, energetic
                        </div>
                    </div>
                    """)

                rewrite_btn = gr.Button(
                    "🔄 Rewrite Text with AI",
                    variant="primary",
                    size="lg",
                    interactive=REWRITER_AVAILABLE
                )

            with gr.Column(scale=2):
                # Step 3: Display Results
                gr.HTML("<h3>📋 Original vs Rewritten Text</h3>")

                with gr.Row():
                    original_display = gr.Textbox(
                        label="📄 Original Text",
                        lines=6,
                        interactive=False
                    )
                    rewritten_display = gr.Textbox(
                        label="✨ AI Rewritten Text",
                        lines=6,
                        interactive=False
                    )

                rewrite_status = gr.Textbox(
                    label="Status",
                    interactive=False,
                    show_label=False
                )

                # Step 4: Audio Generation
                gr.HTML("<h3>🎵 Step 3: Generate Audio</h3>")

                generate_audio_btn = gr.Button(
                    "🎤 Generate Audiobook",
                    variant="primary",
                    size="lg",
                    interactive=False
                )

                audio_status = gr.Textbox(
                    label="Audio Status",
                    interactive=False,
                    show_label=False
                )

                audio_output = gr.Audio(
                    label="🎧 Generated Audiobook",
                    type="filepath"
                )

                # Download section
                gr.HTML("<h3>💾 Download Your Audiobook</h3>")
                download_file = gr.File(
                    label="📥 Download Audio File",
                    interactive=False
                )

        # Installation instructions
        if not TTS_AVAILABLE or not REWRITER_AVAILABLE:
            gr.HTML("""
            <div style="background-color: #fff3cd; border: 1px solid #ffeaa7; border-radius: 8px; padding: 20px; margin: 20px 0;">
                <h4>📋 Installation Instructions:</h4>
                <p><strong>To enable all features, install these dependencies:</strong></p>
                <pre style="background-color: #f8f9fa; padding: 10px; border-radius: 4px;">
# For text rewriting (IBM Granite model)
pip install transformers torch

# For high-quality TTS (recommended)
pip install transformers datasets soundfile torch

# OR for simple online TTS
pip install gtts

# OR for offline TTS
pip install pyttsx3
                </pre>
            </div>
            """)

        # Event handlers
        def load_file_content(file):
            content = echoverse.read_uploaded_file(file)
            return content, content

        def perform_rewrite(original_text, tone):
            if not original_text.strip():
                return "", "", "Please provide text to rewrite."

            rewritten, status = echoverse.rewrite_text_with_tone(original_text, tone)
            enable_audio = len(rewritten.strip()) > 0 and TTS_AVAILABLE

            return original_text, rewritten, status, gr.update(interactive=enable_audio)

        def perform_audio_generation(rewritten_text):
            if not rewritten_text.strip():
                return None, "No rewritten text available for audio generation.", None

            audio_path, status = echoverse.generate_audio(rewritten_text)
            return audio_path, status, audio_path

        # Connect event handlers
        upload_btn.click(
            fn=load_file_content,
            inputs=[file_upload],
            outputs=[input_text, original_display]
        )

        rewrite_btn.click(
            fn=perform_rewrite,
            inputs=[input_text, tone_selector],
            outputs=[original_display, rewritten_display, rewrite_status, generate_audio_btn]
        )

        generate_audio_btn.click(
            fn=perform_audio_generation,
            inputs=[rewritten_display],
            outputs=[audio_output, audio_status, download_file]
        )

    return demo

# Launch the application
if __name__ == "__main__":
    demo = create_interface()
    demo.launch(
        server_name="127.0.0.1",
        server_port=7860,
        share=True,
        show_error=True,
        debug=True
    )

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/787 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/67.1M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/207 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/801 [00:00<?, ?B/s]

Device set to use cpu


Initializing SpeechT5 TTS...


Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/433 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/232 [00:00<?, ?B/s]

spm_char.model:   0%|          | 0.00/238k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/40.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/585M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/585M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/50.7M [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

cmu-arctic-xvectors.py: 0.00B [00:00, ?B/s]

SpeechT5 initialization failed: Dataset scripts are no longer supported, but found cmu-arctic-xvectors.py
pygame 2.6.1 (SDL 2.28.4, Python 3.12.11)
Hello from the pygame community. https://www.pygame.org/contribute.html
✅ gTTS initialized successfully!


model.safetensors:   0%|          | 0.00/50.6M [00:00<?, ?B/s]

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://bb8487b6ab54601376.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
!pip install transformers torch

In [None]:
!pip install transformers datasets soundfile torch

In [None]:
!pip install gtts

In [None]:
!pip install pyttsx3