In [5]:
# !pip install boto3
# ! pip install pydub

Defaulting to user installation because normal site-packages is not writeable
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [17]:
import io
import json
import boto3
from pydub import AudioSegment
from pydub.playback import play

In [21]:
class Translator:
    def __init__(self, region_name='us-east-1'):
        """
        Initializes the Translator with boto3 clients for Bedrock, Polly, and Transcribe.
        """
        self.bedrock_runtime = boto3.client('bedrock-runtime', region_name=region_name)
        self.polly = boto3.client('polly', region_name=region_name)
        self.transcribe = boto3.client('transcribe', region_name=region_name)

    def translate_text(self, text: str, source_lang: str, target_lang: str) -> str:
        """
        Translates text from a source language to a target language using an LLM on Bedrock.
        """
        prompt = f"Translate the following text from {source_lang} to {target_lang}: {text}"

        body = json.dumps({
            "prompt": f"\n\nHuman: {prompt}\n\nAssistant:",
            "max_tokens_to_sample": 300,
            "temperature": 0.1,
            "top_p": 0.9,
        })

        modelId = 'anthropic.claude-v2'
        accept = 'application/json'
        contentType = 'application/json'

        try:
            response = self.bedrock_runtime.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
            result = json.loads(response.get('body').read())
            return result.get('completion').strip()
        except Exception as e:
            print(f"Error invoking model: {e}")
            return ""

    def text_to_speech(self, text: str, voice_id: str = 'Joanna') -> io.BytesIO:
        """
        Converts text to speech using Amazon Polly.
        """
        try:
            response = self.polly.synthesize_speech(Text=text, OutputFormat='mp3', VoiceId=voice_id)
            return io.BytesIO(response['AudioStream'].read())
        except Exception as e:
            print(f"Error in text-to-speech conversion: {e}")
            return None

    def speech_to_text(self, audio_file_path: str, language_code: str) -> str:
        """
        Transcribes speech from an audio file to text using Amazon Transcribe.
        """
        job_name = "transcription-job-" + str(hash(audio_file_path))
        s3 = boto3.client('s3')
        bucket_name = 'your-s3-bucket-for-transcribe' # 👈 **Replace with your S3 bucket name**
        s3_key = f"audio/{job_name}.mp3"

        try:
            s3.upload_file(audio_file_path, bucket_name, s3_key)

            self.transcribe.start_transcription_job(
                TranscriptionJobName=job_name,
                Media={'MediaFileUri': f's3://{bucket_name}/{s3_key}'},
                MediaFormat='mp3',
                LanguageCode=language_code
            )

            while True:
                status = self.transcribe.get_transcription_job(TranscriptionJobName=job_name)
                if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
                    break
            
            if status['TranscriptionJob']['TranscriptionJobStatus'] == 'COMPLETED':
                response = s3.get_object(Bucket=bucket_name, Key=f"{job_name}.json")
                result = json.loads(response['Body'].read().decode('utf-8'))
                return result['results']['transcripts'][0]['transcript']
            else:
                return "Transcription failed."
        except Exception as e:
            print(f"Error in speech-to-text conversion: {e}")
            return ""


    def translate_voice(self, audio_file_path: str, source_lang_code: str, target_lang: str, target_lang_voice_id: str):
        """
        Performs voice-to-voice translation.
        """
        # 1. Transcribe the input audio
        transcribed_text = self.speech_to_text(audio_file_path, source_lang_code)
        if not transcribed_text:
            print("Could not transcribe the audio.")
            return

        print(f"Transcribed Text: {transcribed_text}")

        # 2. Translate the transcribed text
        translated_text = self.translate_text(transcribed_text, source_lang_code.split('-')[0], target_lang)
        if not translated_text:
            print("Could not translate the text.")
            return

        print(f"Translated Text: {translated_text}")

        # 3. Convert the translated text to speech
        translated_audio = self.text_to_speech(translated_text, target_lang_voice_id)
        if translated_audio:
            print("Playing translated audio...")
            audio = AudioSegment.from_mp3(translated_audio)
            play(audio)

In [22]:
# Initialize the class
translator = Translator()

In [23]:
# --- Text-to-Text Translation Example ---
text_to_translate = "Hello, how are you?"
source_language = "English"
target_language = "Spanish"
translated_text = translator.translate_text(text_to_translate, source_language, target_language)
print(f"Original: {text_to_translate}")
print(f"Translated: {translated_text}")

print("-" * 20)

Error invoking model: An error occurred (AccessDeniedException) when calling the InvokeModel operation: You don't have access to the model with the specified model ID.
Original: Hello, how are you?
Translated: 
--------------------


In [None]:
# --- Voice-to-Voice Translation Example ---
# You will need an mp3 audio file for this part.
# For example, create a file named 'input.mp3' with a recording of "Hello, how are you?" in English.
input_audio_file = 'input.mp3'
source_language_code_transcribe = 'en-US'  # For Transcribe
target_language_polly = 'Spanish'          # For the LLM
target_voice_id_polly = 'Lucia'            # Spanish voice in Polly

# Create a dummy audio file for testing if you don't have one
try:
    with open(input_audio_file, 'wb') as f:
        # A very short, silent mp3 file for demonstration purposes.
        # Replace this with a real audio file.
        f.write(b'\xFF\xFB\x90\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00')
    print(f"Created a dummy audio file: {input_audio_file}")
    
    # Uncomment the line below to run the voice translation
    # translator.translate_voice(input_audio_file, source_language_code_transcribe, target_language_polly, target_voice_id_polly)

except Exception as e:
    print(f"Could not create dummy audio file. Please provide your own '{input_audio_file}'. Error: {e}")