In [1]:
import time
import pyaudio
import json
import requests
import whisper
import webrtcvad
import threading
import numpy as np
from dotenv import load_dotenv
load_dotenv()
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module='whisper')

# Constants
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
CHUNK_DURATION_MS = 30
CHUNK_SIZE = int(RATE * CHUNK_DURATION_MS / 1000)
KEY_PHRASE = "Hey Monkey"
SILENCE_TIMEOUT_MS = 2000  # Silence duration to stop recording
MAX_RECORD_DURATION_MS = 10000  # Maximum record duration
VAD = webrtcvad.Vad(1)

# Initialize PyAudio
audio = pyaudio.PyAudio()

# Load Whisper model
whisper_model = whisper.load_model("base")

# Global variables for conversation history
conversation_history = []

model = "llama2:7b"
def ask_llm(data):
    """
    Simulated function to interact with an LLM and manage conversation history.
    """
    global conversation_history
    conversation_history.append(f"User: {data}")
    print(f"Conversation history is: {conversation_history}")
    try:
        r = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
        full_response = json.loads(r.text)
        resp = json.loads(full_response["response"])
        # resp = (json.dumps(json.loads(full_response["response"]), indent=2))
        print(f"/n/n Response is: /n {resp}")
    except Exception as e:
        print(f"Error making request to LLM: {e}")
        resp = "Error in LLM response."

    conversation_history.append(f"GPT: {resp}")
    # Truncate history if it exceeds a certain size
    max_history = 5  # for example, keep the last 5 exchanges
    if len(conversation_history) > max_history * 2:
        conversation_history = conversation_history[-max_history * 2:]
    return resp

def ask_llm(prompt):
    """
    Simulated function to interact with an LLM and manage conversation history.
    """
    global conversation_history
    conversation_history.append(f"User: {prompt}")
    print(f"Conversation history is: {conversation_history}")
    r = requests.post('http://0.0.0.0:11434/api/generate',
                      json={
                          'model': "mistral",
                          'prompt': prompt,
                      },
                      stream=False)
    full_response = ""    
    for line in r.iter_lines():
        if line:
            decoded_line = line.decode('utf-8')
            json_line = json.loads(decoded_line)
            full_response += json_line.get("response", "")
            if json_line.get("done"):
                break

    print(full_response)
    conversation_history.append(f"GPT: {full_response}")
    # Truncate history if it exceeds a certain size
    max_history = 5  # for example, keep the last 5 exchanges
    if len(conversation_history) > max_history * 2:
        conversation_history = conversation_history[-max_history * 2:]
    return full_response

def is_speech(chunk):
    return VAD.is_speech(chunk, RATE)

def record_audio():
    stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK_SIZE)
    frames, start_time, last_speech_time = [], time.time(), time.time()

    while True:
        chunk = np.frombuffer(stream.read(CHUNK_SIZE), dtype=np.int16)
        frames.append(chunk)

        if is_speech(chunk.tobytes()):
            last_speech_time = time.time()

        current_duration = (time.time() - start_time) * 1000
        silence_duration = (time.time() - last_speech_time) * 1000

        if silence_duration > SILENCE_TIMEOUT_MS or current_duration > MAX_RECORD_DURATION_MS:
            break

    stream.stop_stream()
    stream.close()
    return np.concatenate(frames).tobytes()

def transcribe_audio(audio_data):
    """
    Transcribe audio data using Whisper.
    """
    # Convert from 16-bit integers to floating-point
    audio_np = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32)
    # Normalize the audio to the range of -1.0 to 1.0
    audio_normalized = audio_np / np.iinfo(np.int16).max
    
    return whisper_model.transcribe(audio_normalized)

def listen_and_transcribe():
    """
    Continuously listen for the key phrase and transcribe the speech.
    """
    try:
        while True:
            print("Listening for key phrase...")
            audio_data = record_audio()
            print("Transcribing audio...")
            transcription = transcribe_audio(audio_data)
            if KEY_PHRASE.lower() in transcription["text"].lower():
                question = transcription["text"]
                print("Question:", question)
                prompt = f"Answer the question as asked, precisely and succinctly. This is a conversation, not an essay. Question: {question}"
                response = ask_llm(prompt)
                print("Response:", response)
    except KeyboardInterrupt:
        print("Stopping...")

if __name__ == "__main__":
    thread = threading.Thread(target=listen_and_transcribe)
    thread.start()

Listening for key phrase...


Transcribing audio...
Question:  Hey monkey, what's going on?
Conversation history is: ["User: Answer the question as asked, precisely and succinctly. This is a conversation, not an essay. Question:  Hey monkey, what's going on?"]
 I'm just an artificial intelligence designed to answer questions. I don't have the ability to experience events or have a personal life like a monkey or any other living organism. If you have a specific question, feel free to ask and I'll do my best to provide an accurate and succinct response.
Response:  I'm just an artificial intelligence designed to answer questions. I don't have the ability to experience events or have a personal life like a monkey or any other living organism. If you have a specific question, feel free to ask and I'll do my best to provide an accurate and succinct response.
Listening for key phrase...
Transcribing audio...
Question:  Hey monkey, what's going on?
Conversation history is: ["User: Answer the question as asked, precisely an

Exception in thread Thread-4 (listen_and_transcribe):
Traceback (most recent call last):
  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/opt/homebrew/Cellar/python@3.11/3.11.6_1/Frameworks/Python.framework/Versions/3.11/lib/python3.11/threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
  File "/var/folders/bl/nkj7dq8s6m16hf4zmr47vkfr0000gn/T/ipykernel_22526/303968499.py", line 131, in listen_and_transcribe
  File "/var/folders/bl/nkj7dq8s6m16hf4zmr47vkfr0000gn/T/ipykernel_22526/303968499.py", line 93, in record_audio
  File "/opt/homebrew/lib/python3.11/site-packages/pyaudio/__init__.py", line 639, in open
    stream = PyAudio.Stream(self, *args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/homebrew/lib/python3.11/site-packages/pyaudio/__init__.py", line 447, in __init__
    pa.start_stream(self._stream)
