In [None]:
!pip3 install llama-cpp-python==0.2.82 huggingface_hub==0.23.4 langchain==0.1.16

In [None]:
from huggingface_hub import hf_hub_download
from llama_cpp import LlamaGrammar
from langchain.llms.llamacpp import LlamaCpp
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

In [None]:
# download the model from HF
model_path = hf_hub_download(
    repo_id="cstr/Spaetzle-v60-7b-GGUF",
    filename="Spaetzle-v60-7b-q4-k-m.gguf",
    force_download=False
)

In [None]:
# create the grammar
grammar_string = r"""
root ::= command

command ::= single-command | composed-command
composed-command ::= single-command "#" command
single-command ::= action "(" arguments ")"

action ::= "MOTION" | "SEARCHING" | "TAKING" | "PLACING" | "BRINGING"
arguments ::= argument | argument "," arguments
argument ::= argument-name ":" string
argument-name ::= "theme" | "goal" | "source" | "path" | "ground" | "beneficiary"

string  ::=
  "\"" (
    [^"\\\x7F\x00-\x1F] |
    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  )* "\"" ws
ws  ::= ([ \t\n] ws)?
"""

grammar = LlamaGrammar.from_string(grammar_string)

In [None]:
# create the LLM
llm = LlamaCpp(
    model_path=model_path,
    stop=["### Instruction:\n"],
    n_ctx=4096,
    n_gpu_layers=33,
    max_tokens=4096,
    temperature=0.0,
    streaming=False,
    grammar=grammar
)

In [None]:
# create the embeddings
model_name = "mixedbread-ai/mxbai-embed-large-v1"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

embeddings_model = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
# create the vector db and the retriever
db = Chroma(embedding_function=embeddings_model)

retriever = db.as_retriever()

example_file = open("dataset/interpretations", "r")
examples = []

for line in example_file:
  examples.append(line.replace('\t', ' = '))

db.add_texts(examples)

In [None]:
# create the prompt
template = (
  "You are an AI to convert text into CFR (Command Frame Representation)."
  "Here you have some examples of converting text into CFR:\n"
  "{examples}"

  "### Instruction:\n"
  "Convert the following text info CFR: {prompt}\n\n"

  "### Response:\n"
)

prompt = PromptTemplate.from_template(template)

In [None]:
# create the chain
output_parser = StrOutputParser()


def format_docs(docs):

    text = ""

    for d in docs:
        text += f"- {d.page_content.strip()}\n"

    return text


setup_and_retrieval = RunnableParallel(
    {"examples": retriever | format_docs, "prompt": RunnablePassthrough()}
)

chain = setup_and_retrieval | prompt | llm | output_parser

In [None]:
# prompt the LLM
print(chain.invoke("go to the bedroom take all the clothes from the bed and put them in the table"))