In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [2]:
import sounddevice as sd
import soundfile as sf
import torch
import numpy as np
import time
from transformers import AutoProcessor, SeamlessM4Tv2Model, pipeline
from threading import Thread
from queue import Queue
import warnings
warnings.filterwarnings("ignore")

import replicate

In [None]:
class ContinuousTranslator:
    def __init__(self, target_language="spa", chunk_duration=5, sample_rate=16000):
        self.chunk_duration = chunk_duration
        self.sample_rate = sample_rate
        self.target_language = target_language
        self.is_running = False
        self.audio_queue = Queue()
        self.transcript = ""
        
        # Initialize ML models
        print("Loading ML models...")
        self.processor = AutoProcessor.from_pretrained("facebook/seamless-m4t-v2-large")
        self.model = SeamlessM4Tv2Model.from_pretrained("facebook/seamless-m4t-v2-large")
        print("Models loaded!")

    def record_audio_chunk(self):
        """Record a chunk of audio"""
        recording = sd.rec(
            int(self.sample_rate * self.chunk_duration),
            samplerate=self.sample_rate,
            channels=1,
            dtype='float32'
        )
        sd.wait()
        return torch.tensor(recording.T)  # Convert to tensor and transpose

    def translate_audio(self, audio):
        """Translate audio chunk using the ML model"""
        try:

            audio_inputs = self.processor(audios=audio, return_tensors="pt")
            sf.write("temp.mp3", audio_inputs, self.sample_rate)
            file_input = open("./temp.mp3", "rb")
            
            translated_text = replicate.run(
                "lucataco/seamless_communication:b61de43a89a30bb31baa14ba81647303accb8220975ea91268a447650f013298",
                input={
                    "task_name": "S2TT (Speech to Text translation)",
                    "input_audio": file_input,
                    "max_input_audio_length": 60,
                    "target_language_text_only": "Spanish",
                }
            )

            #audio_inputs into a file

            # audio_inputs = self.processor(audios=audio, return_tensors="pt")
            # output_tokens = self.model.generate(
            #     **audio_inputs,
            #     tgt_lang=self.target_language,
            #     generate_speech=False,
            # )
            # translated_text = self.processor.decode(
            #     output_tokens[0].tolist()[0],
            #     skip_special_tokens=True
            # )
            return translated_text
        except Exception as e:
            return f"Translation error: {str(e)}"

    def recording_worker(self):
        """Worker function to continuously record audio"""
        while self.is_running:
            audio = self.record_audio_chunk()
            self.audio_queue.put(audio)

    def translation_worker(self):
        """Worker function to process and translate audio chunks"""
        while self.is_running:
            if not self.audio_queue.empty():
                audio = self.audio_queue.get()
                translation = self.translate_audio(audio)
                print(f"\nTranslation: {translation}")
                self.transcript += translation + "\n"
            time.sleep(0.1)  # Small delay to prevent CPU overuse

    def start(self):
        """Start the continuous recording and translation process"""
        self.is_running = True
        
        # Create and start worker threads
        self.record_thread = Thread(target=self.recording_worker)
        self.translate_thread = Thread(target=self.translation_worker)
        
        self.record_thread.start()
        self.translate_thread.start()
        
        print(f"Started continuous translation to {self.target_language}")
        print("Press Ctrl+C to stop...")

    def stop(self):
        """Stop the continuous recording and translation process"""
        self.is_running = False
        self.record_thread.join()
        self.translate_thread.join()
        print("\nStopped continuous translation")

In [4]:
translator = ContinuousTranslator(
    target_language="spa",  # Spanish
    chunk_duration=5,       # 5 seconds
    sample_rate=16000      # 16kHz
)

try:
    translator.start()
    # Keep the main thread running
    while True:
        time.sleep(1)
except KeyboardInterrupt:
    translator.stop()

Loading ML models...


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Models loaded!
Started continuous translation to spa
Press Ctrl+C to stop...


It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.



Translation: Translation error: dtype must be one of ['float32', 'float64', 'int16', 'int32'] and not 'str448'


It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.



Translation: Translation error: dtype must be one of ['float32', 'float64', 'int16', 'int32'] and not 'str448'

Stopped continuous translation
