In [None]:
import torch
torch.cuda.is_available()

### UNIT tests

In [None]:
import requests

def ask_llm(text:str):
    url = "http://localhost:5001/chat"

    data = [
        {"id": 1, "text": text},
    ]

    response = requests.post(url, json=data)

    return response .json()[0]["response"]


text = "Cosa è successo alla nazionale di calcio francese nel 2006?"
#"Immagina di essere un master che gioca una sessione di Dungeons and Dragons. Inventa una scena per presentare il mondo a dei giocatori che giocano la prima volta."#"Qual è la capitale della Francia?"

answer = ask_llm(text)

print(answer)


In [None]:
def say(text: str, language: str = "en"):
    url = "http://localhost:5001/tts"

    data = [
        {"id": 1, "text": text, "language": language},
    ]

    response = requests.post(url, json=data)
    result = response.json()[0]

    if "status" in result and result["status"] == "success":
        print(f"[TTS] Audio saved to: {result['file']}")
    else:
        print(f"[TTS] Error: {result.get('error', 'Unknown error')}")


say(answer, language="it")

In [None]:
from typing import Optional
import requests
import pyaudio
import wave
import io

def find_mic_index(p:pyaudio.PyAudio, auto_select:bool=False) -> int:
    """Tries to automatically find the correct microphone index for the device. 
       Useful in the docker or even if you are not sure.
       @param auto_select: if True, does not ask user to select mic and automatically selects the first non zero one.
    """
    # List devices if device_index is not provided
    print("Available audio input devices:")
    for i in range(p.get_device_count()):
        info = p.get_device_info_by_index(i)
        if info['maxInputChannels'] > 0:
            # if auto_select mode is on, return the first non-zero device_index
            #  - without printing anything
            if auto_select and i != 0:
                return i 
            # else, print devices info to select by hand later
            if not auto_select:
                print(f"  Index {i}: {info['name']}")

    # ask user to select by hand the device id
    device_index = int(input("Enter the device index for your digital mic: "))
    return 0    #device_index



def hear(duration:int = 5, device_index: Optional[int] = None):
    """Record audio and return it as bytes in WAV format"""
    
    # Recording parameters
    CHUNK = 2048
    FORMAT = pyaudio.paInt32
    CHANNELS = 1    # to correct based on device_index
    RATE = 48000
    RECORD_SECONDS = duration

    # init 
    p = pyaudio.PyAudio()

    # select mic device if it was not provided
    if device_index == None:
        device_index = find_mic_index(p, auto_select=True)

    # set param
    device_info = p.get_device_info_by_index(device_index)
    max_channels = device_info['maxInputChannels']
    CHANNELS = min(CHANNELS, max_channels)  # auto-correct request
    
    
   
    # Open stream
    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    input_device_index=device_index,
                    frames_per_buffer=CHUNK)
    
    print("Recording...")
    frames = []
    
    for _ in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK, exception_on_overflow=False)
        frames.append(data)
    
    print("Recording finished.")
    
    stream.stop_stream()
    stream.close()
    
    # Create WAV file in memory
    wav_buffer = io.BytesIO()
    wf = wave.open(wav_buffer, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()
    
    p.terminate()
    
    # Get the WAV data as bytes
    wav_buffer.seek(0)
    return wav_buffer.read()


def transcribe(audio_data):
    # Send directly to the STT endpoint
    url = "http://localhost:5001/stt"
    files = {"file": ("audio.wav", audio_data, "audio/wav")}
    response = requests.post(url, files=files)
    return response.json()

# Record audio
audio_data = hear(duration=4, device_index=None)
# transcription
transcription = transcribe(audio_data)
print("Transcription:", transcription["text"])



In [None]:
device_index=None
p = pyaudio.PyAudio()
if device_index is None:
    print(find_mic_index(p, True))

### FULL Conversation test

In [None]:
import tkinter as tk
from tkinter import font as tkfont

def create_record_window():
    """Creates the UI to send a vocal message and interact with an llm for Tiago interactions.
    """
    # GUI setup
    window = tk.Tk()
    window.title("Tieni premuto per parlare con Tiago.")
    window.geometry("800x640")
    window.wm_attributes("-topmost", -1)    # Set window to always appear on top

    # Set up recording button
    recording_button = tk.Button(window, 
                                 text=f"Tieni premuto\nper registrare", 
                                 font=("liberation sans", 8, "bold"),
                                 width=10,
                                 height=5 
                                 )
    recording_button.pack(expand=True)      # center the button
    
    return window


interaction_gui = create_record_window()
interaction_gui.mainloop()


