In [None]:
!pip3 install llama-cpp-python==0.2.82 huggingface_hub==0.23.4 langchain==0.1.16

In [None]:
from huggingface_hub import hf_hub_download
from langchain.llms.llamacpp import LlamaCpp
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain_core.runnables import RunnableParallel, RunnablePassthrough

In [None]:
# download the model from HF
model_path = hf_hub_download(
    repo_id="cstr/Spaetzle-v60-7b-GGUF",
    filename="Spaetzle-v60-7b-q4-k-m.gguf",
    force_download=False
)

In [None]:
# create the LLM
llm = LlamaCpp(
    model_path=model_path,
    stop=["### Instruction:\n"],
    n_ctx=2048,
    max_tokens=2048,
    temperature=0.0,
    streaming=True,
    n_batch=512
)

In [None]:
# create the embeddings
model_name = "mixedbread-ai/mxbai-embed-large-v1"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}

embeddings_model = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
# create the vector db and the retriever
db = Chroma(embedding_function=embeddings_model)

retriever = db.as_retriever()

db.add_texts(["harrison worked at kensho",
              "bears like to eat honey"])

In [None]:
# create the prompt
template = """You are an AI assistant with the following context: 
{context}

### Instruction:
Answer the question: {question}

###: Response:
"""

prompt = PromptTemplate.from_template(template)

In [None]:
# create the chain
output_parser = StrOutputParser()


def format_docs(docs):

    text = ""

    for d in docs:
        text += f"- {d.page_content}\n"

    return text


setup_and_retrieval = RunnableParallel(
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
)

chain = setup_and_retrieval | prompt | llm | output_parser

In [None]:
# prompt the LLM
print(chain.invoke("Where did harrison work?"))