In [1]:
import asyncio
import logging
import openai
import signal
from dotenv import load_dotenv

load_dotenv()

from vocode import getenv
from vocode.streaming.streaming_conversation import StreamingConversation
from vocode.helpers import create_streaming_microphone_input_and_speaker_output
from vocode.streaming.transcriber import *
from vocode.streaming.agent import *
from vocode.streaming.synthesizer import *
from vocode.streaming.models.transcriber import *
from vocode.streaming.models.agent import *
from vocode.streaming.models.synthesizer import *
from vocode.streaming.models.message import BaseMessage

class LocalAgent(ChatGPTAgent):
    def __init__(
        self,
        agent_config: ChatGPTAgentConfig,
        logger: Optional[logging.Logger] = None,
        openai_api_key: Optional[str] = None,
    ):
        super().__init__(agent_config=agent_config, logger=logger, openai_api_key='fake')
        openai.api_base = getenv("LOCAL_MODEL_BASE_URL")
        if not openai.api_key:
            raise ValueError("LOCAL_MODEL_BASE_URL must be set in environment or passed in")
        self.first_response = (
            self.create_first_response(agent_config.expected_first_prompt)
            if agent_config.expected_first_prompt
            else None
        )
        self.is_first_response = True

async def main():
    (
        microphone_input,
        speaker_output,
    ) = create_streaming_microphone_input_and_speaker_output(
        use_default_devices=False,
    )

    conversation = StreamingConversation(
        output_device=speaker_output,
        transcriber=DeepgramTranscriber(
            DeepgramTranscriberConfig.from_input_device(
                microphone_input,
                endpointing_config=PunctuationEndpointingConfig(),
            )
        ),
        agent=LocalAgent(
            ChatGPTAgentConfig(
                prompt_preamble="The following is a chat between a human and an TherapyBot, a therapist AI.",
                initial_message=BaseMessage(text="Hi, I'm TherapyBot, your therapist. What can I help you with today?"),
                generate_responses=False,
                model=getenv("LOCAL_MODEL_NAME"),
                allowed_idle_time_seconds=60
            )
        ),
        synthesizer=ElevenLabsSynthesizer(
            ElevenLabsSynthesizerConfig.from_output_device(
                speaker_output,
            )
        ),
    )
    await conversation.start()
    print("Conversation started, press Ctrl+C to end")
    signal.signal(signal.SIGINT, lambda _0, _1: conversation.terminate())
    while conversation.is_active():
        chunk = await microphone_input.get_audio()
        conversation.receive_audio(chunk)




In [None]:
await main()