In [1]:
import os
from groq import AsyncGroq
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
client = AsyncGroq(api_key=os.environ['GROQ_API_KEY'])

async def chat(messages, model='llama3-8b-8192'):
    res = await client.chat.completions.create(messages=messages, model=model)
    return res.choices[0].message.content

async def chat_stream(messages, model='llama3-8b-8192'):
    stream = await client.chat.completions.create(messages=messages, model=model, stream=True)
    async for chunk in stream:
        content = chunk.choices[0].delta.content
        if content:
            yield content

In [3]:
async def translate(text):
    messages = [
        {'role': 'system', 'content': 'You are a helpful real-time translator from Spanish to English. Every time you receive a message from the user, just output the same message in English'},
        {'role': 'user', 'content': text}
    ]
    async for chunk in chat_stream(messages):
        print(chunk, end='')
    print('\n')

In [4]:
from deepgram import (
    DeepgramClient,
    DeepgramClientOptions,
    LiveTranscriptionEvents,
    LiveOptions,
    Microphone,
)
import asyncio
import string
import re

In [7]:
async def transcribe_audio():
    # Event to signal transcription is complete
    transcription_complete = asyncio.Event()
    
    try:
        config: DeepgramClientOptions = DeepgramClientOptions(
            options={'keepalive': 'true'}
        )
        deepgram: DeepgramClient = DeepgramClient('', config)
        dg_connection = deepgram.listen.asynclive.v('1')
        transcript_parts = []

        def is_transaction_complete(text):
            # Remove punctuation
            text = text.translate(str.maketrans('', '', string.punctuation))
            text = text.strip().lower()
            return re.search(r'\b(goodbye|bye)\b$', text) is not None
            # return re.search(r'\b(adios|adiós)\b$', text) is not None

        async def on_message(self, result, **kwargs):
            nonlocal transcript_parts
            sentence = result.channel.alternatives[0].transcript
            if len(sentence) == 0:
                return
            if result.is_final:
                # We need to collect these and concatenate them together when we get a speech_final=true
                transcript_parts.append(sentence)
                print(sentence)
                
                # Sufficent silence detected to consider this end of speech
                if result.speech_final:
                    full_transcript = ' '.join(transcript_parts)
                    transcript_parts = []
                    # Signal transcription is complete and exit
                    if is_transaction_complete(sentence):
                        transcription_complete.set()
                    # await translate(full_transcript)
            else:
                # Interim results
                print(sentence, end='\r')
        
        # async def on_utterance_end(self, utterance_end, **kwargs):
        #     nonlocal transcript_parts
        #     if len(transcript_parts) > 0:
        #         full_transcript = ' '.join(transcript_parts)
        #         transcript_parts = []
        
        async def on_error(self, error, **kwargs):
            print(f'Error: {error}')
        
        dg_connection.on(LiveTranscriptionEvents.Transcript, on_message)
        # dg_connection.on(LiveTranscriptionEvents.UtteranceEnd, on_utterance_end)
        dg_connection.on(LiveTranscriptionEvents.Error, on_error)

        # connect to websocket
        options: LiveOptions = LiveOptions(
            model='nova-2',
            language='en',
            # language='es',
            # Apply smart formatting to the output
            smart_format=True,
            # Raw audio format details
            encoding='linear16',
            channels=1,
            sample_rate=16000,
            # To get UtteranceEnd, the following must be set:
            interim_results=True,
            utterance_end_ms='1000',
            vad_events=True,
            # Time in milliseconds of silence to wait for before finalizing speech
            endpointing=300,
        )
        addons = {
            # Prevent waiting for additional numbers
            'no_delay': 'true'
        }

        print('Start talking...\n')
        if await dg_connection.start(options, addons=addons) is False:
            print('Failed to connect to Deepgram')
            return
        
        # Open a microphone stream on the default input device
        microphone = Microphone(dg_connection.send)
        microphone.start()

        # Wait for the transcription to complete
        await transcription_complete.wait()
        
        microphone.finish()
        await dg_connection.finish()
    
    except Exception as e:
        print(f"Could not open socket: {e}")
        return

In [None]:
await transcribe_audio()

In [91]:
import requests
import wave
import pyaudio

In [92]:
DEEPGRAM_URL = 'https://api.deepgram.com/v1/speak?model=aura-luna-en&encoding=linear16&sample_rate=24000'
headers = {
    'Authorization': f'Token {os.environ['DEEPGRAM_API_KEY']}',
    'Content-Type': 'application/json'
}
payload = {
    'text': 'I am deploying my first text to speech demo. I am really happy right now. I hope you enjoy it too!'
}

res = requests.post(DEEPGRAM_URL, headers=headers, json=payload, stream=True)
with wave.open(res.raw, 'rb') as wf:
    p = pyaudio.PyAudio()
    stream = p.open(
        format=p.get_format_from_width(wf.getsampwidth()),
        channels=wf.getnchannels(),
        rate=wf.getframerate(),
        frames_per_buffer=1024,
        output=True
    )
    while len(data := wf.readframes(1024)): 
        stream.write(data)
    
    stream.close()
    p.terminate()