In [None]:
import sounddevice as sd
import numpy as np
import scipy.io.wavfile as wav
import whisper
from AnthropicWrapper import ClaudeChatCV
from dotenv import load_dotenv, find_dotenv
import os
from openai import OpenAI
import pyaudio
import threading
import time
import keyboard

load_dotenv(find_dotenv())

In [None]:
# --- Settings ---
FS = 44100               # Sampling frequency
THRESHOLD = 50          # Volume threshold for silence (adjust this)
SILENCE_DURATION = 1.5   # Seconds of silence before stopping (adjust this)
CHUNK_SIZE = 1024        # Process audio in chunks for efficiency
SPEAKING_SPEED = 1.1     # Speed of speaking

In [None]:
# --- Globals ---
pause_loop = False

In [None]:
# Initialising Claude and ConversationChain
chat_model_name = "claude-3-5-sonnet-20240620"

system_prompt = "You are a friendly enthusiastic chatbot. You are here to help me with whatever tasks I need or to have a nice conversation. Be sure to keep your answers short and concise. Do not delve into super deep details or lists unless asked for."


pdf_path = "D:\Hidden Desktop\OneDrive\Cross Device\Jobs Applications\Graduating\CV.pdf"
chat_model = ClaudeChatCV(chat_model_name, system_prompt, pdf_path)

In [None]:
# Initialising whisper
model = whisper.load_model("medium", device="cuda")

In [None]:
# Initialising text2speech
tts_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def stream_tts(input_string):
    def _stream_tts():
        p = pyaudio.PyAudio()
        stream = p.open(format=8,
                        channels=1,
                        rate=24_005 * SPEAKING_SPEED,
                        output=True)
        with tts_client.audio.speech.with_streaming_response.create(
            model="tts-1",
            voice="nova",
            input=input_string,
            response_format="pcm"
        ) as response:
            for chunk in response.iter_bytes(1024):
                stream.write(chunk)
                
        #print("FINISHED!!!!!!!!!!!!!!!!!!!!")
        thread_done.set()

    thread_done = threading.Event()

    thread = threading.Thread(target=_stream_tts)
    thread.start()
    thread_done.wait()

In [None]:
# --- Functions ---
def is_silent(data):
    rms = np.sqrt(np.mean(data**2))
    print("RMS: ", rms)
    return rms < THRESHOLD

In [None]:
# Initialising speech2text without silence detection
def record_speech():
    print("Recording... Speak now!")
    audio_data = np.array([], dtype=np.int16)  # Initialize empty array

    with sd.InputStream(samplerate=FS, channels=1, dtype='int16') as stream:
        while True:
            chunk, overflowed = stream.read(CHUNK_SIZE)
            if overflowed:
                print("Warning: Input overflowed!")
            audio_data = np.append(audio_data, chunk)

            if pause_loop:
                break
    
    wav.write("g97613g9f0g8.wav", FS, audio_data)

    return "g97613g9f0g8.wav"


In [None]:
def keyboard_listener():
    global pause_loop
    
    def on_key_press(event):
        global pause_loop
        #print("Key pressed: {}" .format(event.name))
        if event.name == "+":
            pause_loop = False
            print("Set to continue on next loop")
        elif event.name == "-":
            pause_loop = True
            print("Set to pause on next loop")
    
    keyboard.on_press(on_key_press)
    keyboard.wait('esc')

listener_thread = threading.Thread(target=keyboard_listener)
listener_thread.start()

In [None]:
while True:
    if not pause_loop:
        # --- Record Speech ---
        time.sleep(0.1)
        print("Recording speech...")
        wav_file = record_speech()

        # --- Speech to Text ---
        print("Converting speech to text...")
        text = model.transcribe(wav_file, language="en")
        print("You said: ", text.get("text"))

        # --- Chatbot ---
        print("Chatting...")
        response = chat_model.chat_with_history_doc(text.get("text"))

        print("Chatbot: ", response)

        # --- Text to Speech ---
        print("Converting text to speech...")
        stream_tts(response)

    else:
        time.sleep(0.1)