In [None]:
import speech_recognition as sr
import pyaudio
import os
from openai import OpenAI

recognizer = sr.Recognizer()

def speech_to_text():
    with sr.Microphone() as source:
        print("Adjusting for ambient noise...")
        recognizer.adjust_for_ambient_noise(source, duration=1) 
        print("Please speak...")
        try:
            audio_data = recognizer.listen(source, timeout=5) 
            print("Recognizing with PocketSphinx...")
            text = recognizer.recognize_sphinx(audio_data)
            print("Recognized Text: " + text)
        except sr.UnknownValueError:
            print("Could not understand the audio.")
        except Exception as e:
            print(f"Error: {e}")
        return text


In [None]:
def text_to_meaning(input):
    monster_api_key = os.getenv('MONSTER_API_KEY')

    generation_model_name: str
    temperature: float = 0.9
    top_p = 0.9
    max_tokens: int = 2048
    stream: bool = False  
    llm_name: str = "Meta-Llama"

    monster_client = OpenAI(
        base_url="https://llm.monsterapi.ai/v1/",
        api_key=monster_api_key
    )

    monster_ai_model_name = {
        "Google-Gemma": "google/gemma-2-9b-it",
        "Mistral": "mistralai/Mistral-7B-Instruct-v0.2",
        "Microsoft-Phi": "microsoft/Phi-3-mini-4k-instruct",
        "Meta-Llama": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    }
    message = [
                {"role": "system", "content": "You are an AI assistant. Kindly answer the following question:"},
                {"role": "user", "content": input}
            ]

    response = monster_client.chat.completions.create( model=monster_ai_model_name[llm_name], messages=message, temperature=temperature, top_p=top_p, max_tokens=max_tokens, stream=False)
    return (response.choices[0].message.content)

In [None]:
# orchestrator

input_text = speech_to_text()
output_text = text_to_meaning(input_text)
