## Example of querying a set of documents with sources using Caikit-TGIS with Llama2, Langchain and a custom prompt

### Set the Inference server url (replace with your own address) and the model-id

In [1]:
inference_server_url = "http://ollama.redis.svc.cluster.local:11434"
model_id = "Llama-2-7b-chat-hf"

# If your endpoint is using a self-signed certificate, export the certificate chain as a .pem file and provide its path
# Example: certificate_chain = "certificate.pem"
# Adjust llm instantiation to use this parameter or not
certificate_chain_file = "certificate.pem"

In [2]:
# Optional, requirements if they are not already present
# !pip -q install grpcio grpcio-reflection

### Load some data from the folder where we have stored the PDF documentation

In [3]:
from langchain.document_loaders import PyPDFDirectoryLoader

pdf_folder_path = 'rhods-doc'

loader = PyPDFDirectoryLoader(pdf_folder_path)
docs = loader.load()

### Split the data in chunks large enough to have meaningful answers, and some overlap not to miss anything

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1024, chunk_overlap = 40)
all_splits = text_splitter.split_documents(docs)

### Store the data as embeddings in a vector database (Chroma)

In [5]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=HuggingFaceEmbeddings())

Unable to connect optimized C data functions [No module named '_testbuffer'], falling back to pure Python


IndexError: list index out of range

### Test data retrieving

In [None]:
question = "How do I create a Data Science Project"
docs = vectorstore.similarity_search(question)
docs

### Create the chain

In [None]:
from langchain.chains import RetrievalQA
import caikit_tgis_langchain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate

# NOTE: This template syntax is specific to Llama2
template="""<s>[INST] <<SYS>>
You are a helpful, respectful and honest assistant.
You will be given a question you need to answer, and a context to provide you with information. You must answer the question based as much as possible on this context.
Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.

If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>

Question: {question}
Context: {context} [/INST]
"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

llm = caikit_tgis_langchain.CaikitLLM(
    inference_server_url=inference_server_url,
    model_id=model_id,
    certificate_chain=certificate_chain_file,
    streaming=True
)

qa_chain = RetrievalQA.from_chain_type(llm,
                                       retriever=vectorstore.as_retriever(),
                                       chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
                                       return_source_documents=True)

### Launch the query

In [None]:
question = "How do I create a Data Science Project?"
result = qa_chain({"query": question, "min_new_tokens": 100, "max_new_tokens": 1024}, callbacks=[StreamingStdOutCallbackHandler()])

### Print the sources

In [None]:
def remove_duplicates(input_list):
    unique_list = []
    for item in input_list:
        if item.metadata['source'] not in unique_list:
            unique_list.append(item.metadata['source'])
    return unique_list

results = remove_duplicates(result['source_documents'])

for s in results:
    print(s)