# v4.1 - From v4 as-is but used HuggingFaceInstructEmbeddings embeddings

In [1]:
# imports
import os
from dotenv import load_dotenv
from genai.schemas import GenerateParams
from genai.credentials import Credentials
from langchain.vectorstores import FAISS
from genai.extensions.langchain import LangChainInterface
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain.document_loaders import PDFMinerLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import RetrievalQA

In [2]:
# InstructorEmbedding 
from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings

  from tqdm.autonotebook import trange


## Indexing - Starts here!

In [3]:
# variables
pdf_folder_path = './data'
db_folder_path = './db_v4.1'

In [4]:
# define embedding function
def initEmbedFunc():
    embedding_function = HuggingFaceInstructEmbeddings(
                            model_name="hkunlp/instructor-xl", 
                            model_kwargs={"device": "cpu"}
    )
    return embedding_function

In [5]:
# populate chroma db
def generateDB():
    docs = []
    for root, dirs, files in os.walk(pdf_folder_path):
        for file in files:
            if file.endswith(".pdf"):
                print(f'Reading File: {file}')
                
                # read PDF
                loader = PDFMinerLoader(os.path.join(root, file))
                documents = loader.load()

                # load the document and split it into chunks
                text_splitter = RecursiveCharacterTextSplitter(
                                    chunk_size=500, 
                                    chunk_overlap=50,
                                    separators=["\n"]
                )
                temp = text_splitter.split_documents(documents)
                
                # append to docs
                docs += temp

    # create the open-source embedding function
    embedding_function = initEmbedFunc()
    
    # Use Langchain to create the embeddings
    db = FAISS.from_documents(documents=docs, embedding=embedding_function)
    
    # save the embeddings into FAISS vector store
    db.save_local(db_folder_path)
    
    return db

In [None]:
db = None

if [f for f in os.listdir(db_folder_path) if not f.startswith('.')] == []:
    print("FAISS DB is empty. Generating indexes...")
    
    # generate chroma db
    db = generateDB()
else:
    print("FAISS DB is not empty.")

    # create the open-source embedding function
    embedding_function = initEmbedFunc()
    
    # load the faiss vector store we saved into memory
    db = FAISS.load_local(db_folder_path, embedding_function)

FAISS DB is empty. Generating indexes...
Reading File: 9.0 administrators_guide.pdf
Reading File: Trigger_Developer_Guide_v9.pdf
Reading File: op9_solutions_guide.pdf
Reading File: op_user_guide.pdf
load INSTRUCTOR_Transformer
max_seq_length  512


## RAG - Starts here!

In [None]:
# retrieve the watsonx.ai credentials
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)

In [None]:
# variables
# ibm/mpt-7b-instruct -> 3/5
# meta-llama/llama-2-7b -> 3/5
# ibm/granite-13b-sft -> 3/5
# google/ul2 -> 3.5/5
# google/flan-ul2
# google/flan-t5-xxl
model_id = 'google/ul2'

In [None]:
# # generate LLM params
params = GenerateParams(
            decoding_method='greedy', 
            min_new_tokens=1,
            max_new_tokens=200,
            stream=False,
            repetition_penalty=1.5)

# params = GenerateParams(
#     decoding_method="sample",
#     max_new_tokens=200,
#     min_new_tokens=1,
#     stream=False,
#     temperature=0.55,
#     top_k=50,
#     top_p=1,
#     repetition_penalty=1.5
# )

In [None]:
# create a langchain interface to use with retrieved content
langchain_model = LangChainInterface(model=model_id, params=params, credentials=creds)

In [None]:
# create retrieval QA
qa = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 7}),
        return_source_documents=True
)

In [None]:
# generate response
def generateResponse(query, qa):    
    generated_text = qa(query)
    answer = generated_text['result']
    return answer   

## Testing - Starts here!

In [None]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
generateResponse(query, qa)

In [None]:
%%time
query = "What is FastMap?"
generateResponse(query, qa)

In [None]:
%%time
query = "What is the purpose of Reporting Periods?"
generateResponse(query, qa)

In [None]:
%%time
query = "What is a Role Template?"
generateResponse(query, qa)

In [None]:
%%time
query = "What are the different access controls available for non-participants for a standard stage within a workflow?"
generateResponse(query, qa)

In [None]:
%%time
query = "What is the purpose of Object Reset?"
generateResponse(query, qa)

In [None]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
generateResponse(query, qa)

In [None]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
generateResponse(query, qa)

In [None]:
from langchain import PromptTemplate

# Define prompt
template = """Answer the question based on the context below. Keep the answer short and concise. Respond "Unsure about answer" if not sure about the answer.

Context: {context}

Question: {question}

Answer: """

# instantiate prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

In [None]:
# create retrieval QA
qa1 = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 7}),
        chain_type_kwargs={"prompt": prompt_template}
)

In [None]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
qa1.run(query)

In [None]:
%%time
query = "What is FastMap?"
qa1.run(query)

In [None]:
%%time
query = "What is the purpose of Reporting Periods?"
qa1.run(query)

In [None]:
%%time
query = "What is a Role Template?"
qa1.run(query)

In [None]:
%%time
query = "What are the different types of access controls available for non-participants for a standard stage within a workflow?"
qa1.run(query)

In [None]:
%%time
query = "What is the purpose of Object Reset?"
qa1.run(query)

In [None]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
qa1.run(query)

In [None]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
qa1.run(query)

In [None]:
%%time
query = "List the user administration permissions that can be delegated."
qa1.run(query)