In [None]:
# Install dependencies (uncomment and run once)
# !pip install openai python-dotenv pydub ipywidgets
# !conda install -c conda-forge ffmpeg

import os
import io
import tempfile
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
from pydub import AudioSegment
import ipywidgets as widgets
from IPython.display import display, Audio, Markdown
from threading import Thread

# Load environment variables and set up OpenAI client
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY not found. Please set it in your .env file or environment variables.")
client = OpenAI(api_key=api_key)

def _process_audio(data: bytes, ext: str):
    """Process audio to 16kHz mono and return both audio for playback and file for API."""
    # Convert to AudioSegment
    audio = AudioSegment.from_file(io.BytesIO(data), format=ext.lstrip('.'))
    audio = audio.set_frame_rate(16000).set_channels(1)
    
    # Create a BytesIO for playback
    playback_buf = io.BytesIO()
    audio.export(playback_buf, format="wav")
    playback_buf.seek(0)
    
    # Create a temporary file for API upload
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    audio.export(temp_file.name, format="mp3")
    temp_file.close()
    
    return playback_buf, temp_file.name

def _whisper_translate(audio_path: str):
    """Call OpenAI Whisper translate and render English text."""
    try:
        display(Markdown("**🌐 Translating…**"))
        
        # Open the file and pass it to the API
        with open(audio_path, "rb") as audio_file:
            response = client.audio.translations.create(
                model="whisper-1",
                file=audio_file
            )
        
        text = response.text.strip()
        display(Markdown(f"**📝 English Translation:**\n\n{text}"))
    except Exception as e:
        display(Markdown(f"**❌ Error:** {str(e)}"))
    finally:
        # Clean up the temporary file
        if os.path.exists(audio_path):
            os.unlink(audio_path)

# Create file upload widget
upload = widgets.FileUpload(accept=".wav,.mp3,.m4a,.flac", multiple=False, description="Upload Audio")

def _on_upload_change(change):
    files = change.new
    if not files: return
    upload.disabled = True
    
    try:
        iterator = files.items() if isinstance(files, dict) else ((f["name"], f) for f in files)
        for fname, info in iterator:
            raw = info.get("content") or info.get("data")
            ext = Path(fname).suffix.lower()
            
            display(Markdown(f"### Uploaded: {fname}"))
            # Process the audio
            playback_buf, temp_audio_path = _process_audio(raw, ext)
            display(Audio(data=playback_buf.getvalue(), rate=16000))
            
            # Start translation in a separate thread
            Thread(target=_whisper_translate, args=(temp_audio_path,)).start()
    except Exception as e:
        display(Markdown(f"**❌ Processing Error:** {str(e)}"))
    finally:
        # Re-enable upload after processing
        upload.disabled = False

upload.observe(_on_upload_change, names="value")

# Render the UI
display(Markdown("## 🎙️ Whisper Translate (Regional → English)"))
display(upload)

## 🎙️ Whisper Translate (Regional → English)

FileUpload(value=(), accept='.wav,.mp3,.m4a,.flac', description='Upload Audio')

### Uploaded: tamil_voice.wav

**🌐 Translating…**

**📝 English Translation:**

Vanakkam. My name is Jai Prakash. What are you all doing? Have you eaten?

In [None]:
# Install dependencies (uncomment and run once)
# !pip install openai python-dotenv pydub ipywidgets
# !conda install -c conda-forge ffmpeg

import os
import io
import tempfile
from pathlib import Path
from dotenv import load_dotenv
from openai import OpenAI
from pydub import AudioSegment
import ipywidgets as widgets
from IPython.display import display, Audio, Markdown, HTML
from threading import Thread
import base64

# Load environment variables and set up OpenAI client
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("OPENAI_API_KEY not found. Please set it in your .env file or environment variables.")
client = OpenAI(api_key=api_key)

# Track the detected language for bidirectional translation
detected_language = {"code": None, "name": None}

def _process_audio(data: bytes, ext: str):
    """Process audio to 16kHz mono and return both audio for playback and file for API."""
    # Convert to AudioSegment
    audio = AudioSegment.from_file(io.BytesIO(data), format=ext.lstrip('.'))
    audio = audio.set_frame_rate(16000).set_channels(1)
    
    # Create a BytesIO for playback
    playback_buf = io.BytesIO()
    audio.export(playback_buf, format="wav")
    playback_buf.seek(0)
    
    # Create a temporary file for API upload
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    audio.export(temp_file.name, format="mp3")
    temp_file.close()
    
    return playback_buf, temp_file.name

def _whisper_translate(audio_path: str):
    """Call OpenAI Whisper translate and render English text."""
    try:
        display(Markdown("**🌐 Translating to English...**"))
        
        # Open the file and pass it to the API
        with open(audio_path, "rb") as audio_file:
            response = client.audio.translations.create(
                model="whisper-1",
                file=audio_file
            )
        
        text = response.text.strip()
        display(Markdown(f"**📝 English Translation:**\n\n{text}"))
        
        # Detect the original language
        with open(audio_path, "rb") as audio_file:
            detection = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="verbose_json"
            )
            
        detected_language["code"] = detection.language
        language_names = {
            "en": "English", "es": "Spanish", "fr": "French", "de": "German", 
            "it": "Italian", "pt": "Portuguese", "nl": "Dutch", "ru": "Russian", 
            "zh": "Chinese", "ja": "Japanese", "ar": "Arabic", "hi": "Hindi",
            "ko": "Korean", "tr": "Turkish", "pl": "Polish", "vi": "Vietnamese",
            "th": "Thai", "id": "Indonesian", "uk": "Ukrainian", "sv": "Swedish"
        }
        detected_language["name"] = language_names.get(detection.language, detection.language)
        
        display(Markdown(f"**🔍 Detected Language:** {detected_language['name']} ({detected_language['code']})"))
        
        # Show the text input for reverse translation
        display(english_input)
        display(translate_button)
        
    except Exception as e:
        display(Markdown(f"**❌ Error:** {str(e)}"))
    finally:
        # Clean up the temporary file
        if os.path.exists(audio_path):
            os.unlink(audio_path)

def _text_to_speech(text, target_language):
    """Convert English text to speech in the target language."""
    try:
        display(Markdown(f"**🌐 Translating to {detected_language['name']}...**"))
        
        # Translate the text to the target language using the OpenAI Chat API
        if target_language != "en":  # Skip translation if target is already English
            messages = [
                {"role": "system", "content": f"You are a translator. Translate the following English text to {detected_language['name']} ({target_language}). Only respond with the translation, no explanations."},
                {"role": "user", "content": text}
            ]
            
            translation_response = client.chat.completions.create(
                model="gpt-3.5-turbo",
                messages=messages
            )
            
            translated_text = translation_response.choices[0].message.content.strip()
            display(Markdown(f"**📝 {detected_language['name']} Translation:**\n\n{translated_text}"))
        else:
            translated_text = text
            display(Markdown(f"**📝 Source is already in English:**\n\n{text}"))
        
        # Convert the translated text to speech
        voice_map = {
            "es": "alloy", "fr": "alloy", "de": "alloy", "it": "alloy", 
            "pt": "alloy", "nl": "alloy", "ru": "alloy", "zh": "nova", 
            "ja": "nova", "ar": "alloy", "hi": "nova", "ko": "nova", 
            "tr": "alloy", "pl": "alloy", "vi": "alloy", "th": "alloy", 
            "id": "alloy", "uk": "alloy", "sv": "alloy", "en": "nova"
        }
        
        voice = voice_map.get(target_language, "alloy")
        
        response = client.audio.speech.create(
            model="tts-1",
            voice=voice,
            input=translated_text
        )
        
        # Save the audio to a file
        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        temp_audio_file.close()
        
        with open(temp_audio_file.name, "wb") as f:
            response.stream_to_file(temp_audio_file.name)
        
        # Load the file for playback
        audio_segment = AudioSegment.from_file(temp_audio_file.name, format="mp3")
        
        # Play the audio
        display(Markdown("**🔊 Generated Audio:**"))
        with open(temp_audio_file.name, "rb") as f:
            audio_data = f.read()
        
        display(Audio(data=audio_data, autoplay=True))
        
        # Provide download link
        data_url = f"data:audio/mp3;base64,{base64.b64encode(audio_data).decode()}"
        download_html = f"""
        <a href="{data_url}" download="translation_{target_language}.mp3" 
           style="display: inline-block; padding: 10px 15px; background-color: #4CAF50; 
                  color: white; text-decoration: none; border-radius: 4px; 
                  font-weight: bold; margin-top: 10px;">
           💾 Download Audio File
        </a>
        """
        display(HTML(download_html))
        
    except Exception as e:
        display(Markdown(f"**❌ Error in text-to-speech:** {str(e)}"))
    finally:
        # Clean up
        if os.path.exists(temp_audio_file.name):
            os.unlink(temp_audio_file.name)

def _on_translate_click(b):
    """Handle translate button click."""
    if not detected_language["code"]:
        display(Markdown("**❌ Error:** No language detected yet. Please upload an audio file first."))
        return
    
    text = english_input.value
    if not text.strip():
        display(Markdown("**❌ Error:** Please enter some text to translate."))
        return
    
    translate_button.disabled = True
    Thread(target=_text_to_speech, args=(text, detected_language["code"])).start()
    translate_button.disabled = False

# Create widgets
upload = widgets.FileUpload(
    accept=".wav,.mp3,.m4a,.flac", 
    multiple=False, 
    description="Upload Audio"
)

english_input = widgets.Textarea(
    placeholder="Enter English text to translate back...",
    description="English:",
    layout=widgets.Layout(width="100%", height="100px")
)

translate_button = widgets.Button(
    description="Translate & Speak",
    button_style="primary",
    icon="microphone"
)

translate_button.on_click(_on_translate_click)

def _on_upload_change(change):
    files = change.new
    if not files: return
    upload.disabled = True
    
    try:
        iterator = files.items() if isinstance(files, dict) else ((f["name"], f) for f in files)
        for fname, info in iterator:
            raw = info.get("content") or info.get("data")
            ext = Path(fname).suffix.lower()
            
            display(Markdown(f"### Uploaded: {fname}"))
            # Process the audio
            playback_buf, temp_audio_path = _process_audio(raw, ext)
            display(Audio(data=playback_buf.getvalue(), rate=16000))
            
            # Start translation in a separate thread
            Thread(target=_whisper_translate, args=(temp_audio_path,)).start()
    except Exception as e:
        display(Markdown(f"**❌ Processing Error:** {str(e)}"))
    finally:
        # Re-enable upload after processing
        upload.disabled = False

upload.observe(_on_upload_change, names="value")

# Render the UI
display(Markdown("## 🎙️ Whisper Translate - Bidirectional"))
display(Markdown("### 1️⃣ Upload audio in any language to translate to English"))
display(upload)
display(Markdown("### 2️⃣ After detection, enter English text to translate back and hear it"))
# English input and translate button will be displayed after language detection

## 🎙️ Whisper Translate - Bidirectional

### 1️⃣ Upload audio in any language to translate to English

FileUpload(value=(), accept='.wav,.mp3,.m4a,.flac', description='Upload Audio')

### 2️⃣ After detection, enter English text to translate back and hear it

### Uploaded: Hindi_Voice.wav

**🌐 Translating to English...**

**📝 English Translation:**

The weather is very good today. It rained yesterday, but now it is hot. I hope it will rain at night.

**🔍 Detected Language:** hindi (hindi)

Textarea(value='', description='English:', layout=Layout(height='100px', width='100%'), placeholder='Enter Eng…

Button(button_style='primary', description='Translate & Speak', icon='microphone', style=ButtonStyle())

**🌐 Translating to hindi...**

**📝 hindi Translation:**

चावल उगाने के लिए आप लाल मिट्टी का उपयोग कर सकते हैं।

  response.stream_to_file(temp_audio_file.name)


**🔊 Generated Audio:**