In [1]:
import sys
sys.path.append("../")

%load_ext autoreload
%autoreload 2

In [2]:
from modules.stt.whisper import FasterWhisperBackend
from modules.stt.stage import STTStage

stt_backend = FasterWhisperBackend("large-v3", "cuda", "int8")
stt_stage = STTStage(backend=stt_backend)


In [3]:
from modules.LLM.llama_cpp import LlamaCPPServerBackend
from modules.LLM.stage import LLMStage

llm = LlamaCPPServerBackend("http://localhost:8080")
llm_stage = LLMStage(backend=llm)

In [4]:
stt_stage.connect(llm_stage)

In [16]:
from jupyter_utility_widgets.chat.chat_interface import ChatWidget
from jupyter_utility_widgets.chat.message import ContentType, TextMessage, Message, RawAudioMessage
from tpipes.data import Data
from tpipes.pipeline import run_pipeline
from datatypes.content_types import RawAudioContent, StringContent

def add_llm_response(data):
    chat.add_message(TextMessage(content="LLM: " + data.content.value, source=chat.ASSISTANT_KEY))

def raw_audio_input_pipeline(message: RawAudioMessage):
    data = Data(content=RawAudioContent(value=message.content.data, sample_rate=message.content.sample_rate), metadata={})
    pipeline_iter = run_pipeline(stt_stage, input_data=data)
    data = next(pipeline_iter)
    # print(data)
    if data.error:
        raise data.content
    chat.add_message(TextMessage(content="STT: "+ data.content.value, source=chat.ASSISTANT_KEY))
    data = next(pipeline_iter)
    # print(data)
    if data.error:
        raise data.content
    add_llm_response(data)

def text_input_pipeline(message: TextMessage):
    data = Data(content=StringContent(value=message.content), metadata={})
    pipeline_iter = run_pipeline(llm_stage, input_data=data)
    data = next(pipeline_iter)
    # print(data)
    if data.error:
        raise data.content
    add_llm_response(data)

def test_pipeline(message: Message):
    if message.type == ContentType.RAW_AUDIO:
        raw_audio_input_pipeline(message)
    elif message.type == ContentType.TEXT:
        text_input_pipeline(message)

llm.reset()
chat = ChatWidget()
chat.listen(test_pipeline)
chat

ChatWidget(children=(ChatDisplay(layout=Layout(border_bottom='1px solid black', border_left='1px solid black',…

In [6]:

llm.process("hello!")

' Hello! How can I assist you today? \n'

In [14]:
llm.history

[{'user': 'remember yes=no',
  'assistant': " I do not store any personal data or retain information about individual users' preferences. Each time you interact with me, it's a new conversation, and I don't retain any context from previous conversations. I'm designed to provide helpful and informative responses based on my training data.\n"},
 {'user': '',
  'assistant': ' I can provide information on a wide range of topics, including but not limited to: general knowledge, science, history, technology, entertainment, and more. My training data includes a vast amount of text from various sources, which I use to generate responses.\n'},
 {'user': 'I want you to answer the opposite of what you mean.',
  'assistant': " I can try to do that, but it's worth noting that my primary goal is to provide accurate and helpful information. While I can attempt to provide the opposite of a statement, my responses are ultimately based on my understanding of the topic and the information available to me

In [None]:
# TODO: Fix submit.
# TODO: Add TTS
## TODO: make everything available on GitHub
## TODO: move to Jetson
## TODO: ensure you can connect vscode to the jetson

