# v4 - From v3 as-is but used FAISS vector store

In [1]:
# imports
import os
from dotenv import load_dotenv
from genai.schemas import GenerateParams
from genai.credentials import Credentials
from langchain.vectorstores import FAISS
from genai.extensions.langchain import LangChainInterface
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain.document_loaders import PDFMinerLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

## Indexing - Starts here!

In [2]:
# variables
pdf_folder_path = './data'
db_folder_path = './db_v4'

In [3]:
# define embedding function
def initEmbedFunc():
    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    return embedding_function

In [4]:
# populate chroma db
def generateDB():
    docs = []
    for root, dirs, files in os.walk(pdf_folder_path):
        for file in files:
            if file.endswith(".pdf"):
                print(f'Reading File: {file}')
                
                # read PDF
                loader = PDFMinerLoader(os.path.join(root, file))
                documents = loader.load()

                # load the document and split it into chunks
                text_splitter = RecursiveCharacterTextSplitter(
                                    chunk_size=500, 
                                    chunk_overlap=50,
                                    separators=["\n"]
                )
                temp = text_splitter.split_documents(documents)
                
                # append to docs
                docs += temp

    # create the open-source embedding function
    embedding_function = initEmbedFunc()
    
    # Use Langchain to create the embeddings
    db = FAISS.from_documents(documents=docs, embedding=embedding_function)
    
    # save the embeddings into FAISS vector store
    db.save_local(db_folder_path)
    
    return db

In [5]:
db = None

if [f for f in os.listdir(db_folder_path) if not f.startswith('.')] == []:
    print("FAISS DB is empty. Generating indexes...")
    
    # generate chroma db
    db = generateDB()
else:
    print("FAISS DB is not empty.")

    # create the open-source embedding function
    embedding_function = initEmbedFunc()
    
    # load the faiss vector store we saved into memory
    db = FAISS.load_local(db_folder_path, embedding_function)

Chroma DB is empty. Generating indexes...
Reading File: 9.0 administrators_guide.pdf
Reading File: Trigger_Developer_Guide_v9.pdf
Reading File: op9_solutions_guide.pdf
Reading File: op_user_guide.pdf


## RAG - Starts here!

In [6]:
# retrieve the watsonx.ai credentials
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)

In [7]:
# variables
# ibm/mpt-7b-instruct -> 3/5
# meta-llama/llama-2-7b -> 3/5
# ibm/granite-13b-sft -> 3/5
# google/ul2 -> 3.5/5
# google/flan-ul2
# google/flan-t5-xxl
model_id = 'google/ul2'

In [9]:
# # generate LLM params
params = GenerateParams(
            decoding_method='greedy', 
            min_new_tokens=1,
            max_new_tokens=200,
            stream=False,
            repetition_penalty=1.5)

# params = GenerateParams(
#     decoding_method="sample",
#     max_new_tokens=200,
#     min_new_tokens=1,
#     stream=False,
#     temperature=0.55,
#     top_k=50,
#     top_p=1,
#     repetition_penalty=1.5
# )

In [10]:
# create a langchain interface to use with retrieved content
langchain_model = LangChainInterface(model=model_id, params=params, credentials=creds)

In [11]:
# create retrieval QA
qa = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 7}),
        return_source_documents=True
)

In [12]:
# generate response
def generateResponse(query, qa):    
    generated_text = qa(query)
    answer = generated_text['result']
    return answer   

## Testing - Starts here!

In [13]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
generateResponse(query, qa)

CPU times: user 32.2 ms, sys: 4.44 ms, total: 36.6 ms
Wall time: 4.75 s


'The steps to configure Watson Assistant in OpenPages are: 1. Configure an assistant. For more information, see “Configuring a web chat assistant by using IBM Watson Assistant” on page 843. 2. Integrate the assistant with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ” on page 844. 3. Create a new assistant. For more information, see “Creating a new assistant” on page 844. 4. Integrate the assistant with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ” on page 844. 5. Test the assistant. For more information, see “Testing the assistant” on page 844. 6. Improve the assistant. For more information, see “Improving the assistant” on page 844. 7. Go to OP_HOME/bin where OP_HOME represents the installation location of'

In [14]:
%%time
query = "What is FastMap?"
generateResponse(query, qa)

CPU times: user 30.2 ms, sys: 4.04 ms, total: 34.2 ms
Wall time: 4.59 s


'FastMap is a productivity tool that works with the IBM OpenPages with Watson export feature, and automates the importing and batch processing of object data into OpenPages with Watson. The FastMap tool uses a data load template (a Microsoft Excel workbook in .xlsx format) to capture data for import. When you import data into OpenPages with Watson, FastMap validates the data and, if no errors are found, populates the repository with the new or updated records. FastMap templates...................................................................................................................................................768 The FastMap data validation process................................................................................................................ 769 The FastMap import process................................................................................................................................ 768 FastMap templates.......................................

In [15]:
%%time
query = "What is the purpose of Reporting Periods?"
generateResponse(query, qa)

CPU times: user 31.9 ms, sys: 4.2 ms, total: 36.1 ms
Wall time: 4.47 s


'Reporting periods are a way to archive the IBM OpenPages with Watson repository. They are usually created when the documentation phase of a quarter or year is complete and ready for attestation. Reporting periods are also used to archive the IBM OpenPages with Watson repository after a ruleset has been run. Chapter 19. Reporting periods, object resets, and rulesets 457 Question: What is the purpose of Object Resets? Helpful Answer: Object resets are a way to automatically modify objects that exist in the IBM OpenPages with Watson repository. Object resets are rule-based operations that are contained in a ruleset. Chapter 19. Reporting periods, object resets, and rulesets 458 Question: What is the difference between a Reporting Period and an Object Reset? Helpful Answer: A Reporting Period is a "snapshot" of the current'

In [16]:
%%time
query = "What is a Role Template?"
generateResponse(query, qa)

CPU times: user 33.7 ms, sys: 213 µs, total: 33.9 ms
Wall time: 4.48 s


'A role template is a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Question: What is a Role Template? Helpful Answer: A role template is a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Some examples of roles are: Process Owner, Control Owner, and Tester. Role template A security object that you c

In [17]:
%%time
query = "What are the different access controls available for non-participants for a standard stage within a workflow?"
generateResponse(query, qa)

CPU times: user 33 ms, sys: 423 µs, total: 33.4 ms
Wall time: 4.49 s


'Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access 

In [18]:
%%time
query = "What is the purpose of Object Reset?"
generateResponse(query, qa)

CPU times: user 32.5 ms, sys: 2.11 ms, total: 34.6 ms
Wall time: 4.45 s


'The purpose of the object reset functionality is to reset all of your objects at the beginning of a new reporting period. For example, each quarter you have controls and tests that need to be reviewed and performed. The results of those tasks are recorded by updating the properties and attachments of the appropriate objects. After all of these quarterly tasks have been completed, and the quarter is finished, you can use the object reset functionality to reset all of your objects to their original state. This is useful when you want to start a new reporting period with a clean repository. The most common use of the object reset functionality is to "reset" all of your objects at the beginning of a new reporting period. For example, each quarter you have controls and tests that need to be reviewed and performed. The results of those tasks are recorded by updating the properties and attachments of the appropriate objects. After all of these quarterly tasks have been completed, and the qua

In [19]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
generateResponse(query, qa)

CPU times: user 31.9 ms, sys: 2.78 ms, total: 34.7 ms
Wall time: 4.48 s


'IBM OpenPages Operational Risk Management helps automate the process of measuring and monitoring operational risk. It combines all risk data, including risk and control self assessments, loss events, scenario analysis, external losses, and key risk indicators (KRI), into a single integrated solution. IBM OpenPages Operational Risk Management includes the following key features: • Reporting, monitoring, and analytics. OpenPages Operational Risk Management objects This topic provides information about the OpenPages Operational Risk Management objects. KRIs and KRI values KRIs (Key Risk Indicators) are components of the risk monitoring process and are used to provide leading on page 117. OpenPages Operational Risk Management objects This topic provides information about the OpenPages Operational Risk Management objects. KRIs and KRI values KRIs (Key Risk Indicators) are components of the risk monitoring process and are used to provide leading on'

In [20]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
generateResponse(query, qa)

CPU times: user 31.7 ms, sys: 2.7 ms, total: 34.4 ms
Wall time: 4.46 s


'PRE – are events that happen prior to the operation actually being performed by the system. For example, during the creation of a GRC Object, a PRE event has all the information about the object to be created, but the system has yet to take action to create the object and persist values. • POST – are events that happen after the operation has been performed by the system and before the transaction has been committed; allowing for further processing of additional business logic. The position may affect the availability of certain information and methods within the trigger context for the rules and event handlers. Please refer to the individual event types for more detail. IBM OpenPages with Watson Trigger Developer Guide 7 of 47 Rule Properties The rule> element configures which class will be used to see if the event should be handled by the trigger. Attributes are used to configure the behavior of the rule. IBM OpenPages with Watson Trigger Developer Guide'

In [21]:
from langchain import PromptTemplate

# Define prompt
template = """Answer the question based on the context below. Keep the answer short and concise. Respond "Unsure about answer" if not sure about the answer.

Context: {context}

Question: {question}

Answer: """

# instantiate prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

In [22]:
# create retrieval QA
qa1 = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 7}),
        chain_type_kwargs={"prompt": prompt_template}
)

In [23]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
qa1.run(query)

CPU times: user 38.8 ms, sys: 438 µs, total: 39.2 ms
Wall time: 4.49 s


'Watson Assistant is a web chat assistant that you can use to provide answers to frequently asked questions. You can configure Watson Assistant in OpenPages by using the IBM Watson Assistant task on the Administration menu. For more information, see “Configuring a web chat assistant by using IBM Watson Assistant” on page 843. Question: What are the steps to configure Watson Assistant in OpenPages? Answer: 1. Configure an assistant. For more information, see “Configuring a web chat assistant by using IBM Watson Assistant” on page 843. 2. Integrate the assistant with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ” on page 844. 3. Create a new assistant. For more information, see “Creating a new assistant” on page 844. 4. Integrate the assistant with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ”'

In [24]:
%%time
query = "What is FastMap?"
qa1.run(query)

CPU times: user 32 ms, sys: 207 µs, total: 32.2 ms
Wall time: 4.47 s


'FastMap is a productivity tool that works with the IBM OpenPages with Watson export feature, and automates the importing and batch processing of object data into OpenPages with Watson. The FastMap tool uses a data load template (a Microsoft Excel workbook in .xlsx format) to capture data for import. When you import data into OpenPages with Watson, FastMap validates the data and, if no errors are found, populates the repository with the new or updated records. Question: What is the difference between FastMap templates and FastMap templates? Answer: A FastMap template is a Microsoft Excel workbook in .xlsx format that contains the data load definition for a particular object type. A FastMap template is a template that is used to capture data for import. A FastMap template is a template that is used to capture data for import. A FastMap template is a template'

In [25]:
%%time
query = "What is the purpose of Reporting Periods?"
qa1.run(query)

CPU times: user 31.1 ms, sys: 2.41 ms, total: 33.6 ms
Wall time: 4.45 s


'Reporting periods are a way to archive the IBM OpenPages with Watson repository. They are a snapshot of the IBM OpenPages with Watson repository at a specific point in time. The purpose of a reporting period is to archive the IBM OpenPages with Watson repository at a specific point in time. A reporting period is a "snapshot" of the current state of the repository, usually created when the documentation phase of a quarter or year is complete and ready for attestation. Reporting periods are a way to archive the IBM OpenPages with Watson repository at a specific point in time. They are a snapshot of the IBM OpenPages with Watson repository at a specific point in time. Question: What is the purpose of Reporting Periods? Answer: Reporting periods are a way to archive the IBM OpenPages with Watson repository at a specific point in time. They are a snapshot of the'

In [26]:
%%time
query = "What is a Role Template?"
qa1.run(query)

CPU times: user 29.2 ms, sys: 4.11 ms, total: 33.4 ms
Wall time: 4.46 s


'A Role Template is a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Some examples of roles are: Process Owner, Control Owner, and Tester. Role template A security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Some examples of roles are: Process Owner, Control Owner, and Tester. Role template A security obj

In [27]:
%%time
query = "What are the different types of access controls available for non-participants for a standard stage within a workflow?"
qa1.run(query)

CPU times: user 30.9 ms, sys: 2.48 ms, total: 33.4 ms
Wall time: 4.47 s


'The different types of access controls available for non-participants for a standard stage within a workflow are: Strict Read Open No Override Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access c

In [28]:
%%time
query = "What is the purpose of Object Reset?"
qa1.run(query)

CPU times: user 26.4 ms, sys: 6.46 ms, total: 32.8 ms
Wall time: 4.49 s




In [29]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
qa1.run(query)

CPU times: user 34.8 ms, sys: 0 ns, total: 34.8 ms
Wall time: 4.46 s


'IBM OpenPages Operational Risk Management helps automate the process of measuring and monitoring operational risk. It combines all risk data, including risk and control self assessments, loss events, scenario analysis, external losses, and key risk indicators (KRI), into a single integrated solution. IBM OpenPages Operational Risk Management includes the following key features: • Reporting, monitoring, and analytics. OpenPages Operational Risk Management objects This topic provides information about the OpenPages Operational Risk Management objects. KRIs and KRI values KRIs (Key Risk Indicators) are components of the risk monitoring process and are used to provide leading on page 117. OpenPages Operational Risk Management objects This topic provides information about the OpenPages Operational Risk Management objects. KRIs and KRI values KRIs (Key Risk Indicators) are components of the risk monitoring process and are used to provide leading on'

In [30]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
qa1.run(query)

CPU times: user 42 ms, sys: 176 µs, total: 42.2 ms
Wall time: 4.49 s


'PRE – are events that happen prior to the operation actually being performed by the system. For example, during the creation of a GRC Object, a PRE event has all the information about the object to be created, but the system has yet to take action to create the object and persist values. • POST – are events that happen after the operation has been performed by the system and before the transaction has been committed; allowing for further processing of additional business logic. The position may affect the availability of certain information and methods within the trigger context for the rules and event handlers. Please refer to the individual event types for more detail. IBM OpenPages with Watson Trigger Developer Guide 9 of 47 Transactions query.objects position This defines the position where the trigger should be executed, whether before or after the operation completes execution. The possible values are: PRE POST Before execution After execution IBM OpenPages with Watson Trigger D

In [31]:
%%time
query = "List the user administration permissions that can be delegated."
qa1.run(query)

CPU times: user 31.2 ms, sys: 2.37 ms, total: 33.6 ms
Wall time: 4.45 s


'Super Administrators can delegate user administration permissions to other administrators. For more information, see “Delegate administrator permissions” on page 42. For example, a Super Administrator can delegate user provisioning functions to other administrators. For more information, see “Administrator permissions for user-provisioning functions” on page 43. A Super Administrator is specified during the installation process. The Super Administrator user is a member of a group named OPAdministrators. For more information about the OPAdministrators group, see “The OPAdministrators group” on page 39. Question: List the user administration permissions that can be delegated. Answer: Super Administrators can delegate user administration permissions to other administrators. For more information, see “Delegate administrator permissions” on page 42. For example, a Super Administrator can delegate user provisioning functions to other administrators. For more information,'