In [None]:
from langchain.globals import set_debug
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_community.vectorstores.chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

Adding helper functions and turning debug mode on

In [None]:
set_debug(True)


def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
embeddings_model_id = "NbAiLab/nb-bert-large"
model_id = "RuterNorway/Llama-2-13b-chat-norwegian-GPTQ"

Loading data from PDF

In [None]:
loader = PyPDFLoader("./data/my-cv.pdf")
data = loader.load()

Splitting text into chunks

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=24, keep_separator=True)
splits = text_splitter.split_documents(data)

Creating vectorstore and indexing the embeddings into ChromaDB

In [None]:
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_id)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

Creating the retriever based on the vectorstore

In [None]:
retriever = vectorstore.as_retriever()

Creating a custom prompt template

In [None]:
prompt = PromptTemplate(input_variables=['context', 'question'],
                        template="Du er en assistent for en IT konsulent. "
                                 "Bruk følgende informasjon for å besvare oppgaven. Hvis du ikke vet svaret, "
                                 "så si at du ikke vet det. Svar så presist som mulig."
                                 "\nOppgave: {question}\nInformasjon: {context}\nSvar:")

Creating the llm

In [None]:
llm = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
    "text-generation",
    model=llm,
    do_sample=True,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.8,
    top_p=0.92,
    repetition_penalty=1.13
)
hf = HuggingFacePipeline(pipeline=pipe)

Chainging everything together via LangChain Expression Language (LECL)

In [None]:
rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | hf
        | StrOutputParser()
)

Querying the chain

In [None]:
# prompt = "Nevn alle språk Mikkel kan i en liste."
prompt = "Hvilket programmeringsspråk kan Mikkel best?"
result = rag_chain.invoke(prompt)
print(result)