In [1]:
# imports
import os
from dotenv import load_dotenv
from genai.schemas import GenerateParams
from genai.credentials import Credentials
from langchain.vectorstores import Chroma
from genai.extensions.langchain import LangChainInterface
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain.document_loaders import PDFMinerLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

## Indexing - Starts here!

In [2]:
# variables
pdf_folder_path = './data'
db_folder_path = './db_v3'

In [3]:
# define embedding function
def initEmbedFunc():
    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    return embedding_function

In [4]:
# populate chroma db
def generateDB():
    docs = []
    for root, dirs, files in os.walk(pdf_folder_path):
        for file in files:
            if file.endswith(".pdf"):
                print(f'Reading File: {file}')
                
                # read PDF
                loader = PDFMinerLoader(os.path.join(root, file))
                documents = loader.load()

                # load the document and split it into chunks
                text_splitter = RecursiveCharacterTextSplitter(
                                    chunk_size=500, 
                                    chunk_overlap=50,
                                    separators=["\n"]
                )
                temp = text_splitter.split_documents(documents)
                
                # append to docs
                docs += temp

    # create the open-source embedding function
    embedding_function = initEmbedFunc()

    # save to disk
    db = Chroma.from_documents(docs, embedding_function, persist_directory=db_folder_path)
    
    return db

In [5]:
db = None

if [f for f in os.listdir(db_folder_path) if not f.startswith('.')] == []:
    print("Chroma DB is empty. Generating indexes...")
    
    # generate chroma db
    db = generateDB()
else:
    print("Chroma DB is not empty.")

    # create the open-source embedding function
    embedding_function = initEmbedFunc()

    # load from disk
    db = Chroma(persist_directory=db_folder_path, embedding_function=embedding_function)

Chroma DB is not empty.


## RAG - Starts here!

In [6]:
# retrieve the watsonx.ai credentials
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)

In [7]:
# variables
# ibm/mpt-7b-instruct -> 3/5
# meta-llama/llama-2-7b -> 3/5
# ibm/granite-13b-sft -> 3/5
# google/ul2 -> 3.5/5
# google/flan-ul2
# google/flan-t5-xxl
model_id = 'google/ul2'

In [8]:
# # generate LLM params
# params = GenerateParams(
#             decoding_method='greedy', 
#             min_new_tokens=1,
#             max_new_tokens=100,
#             stream=False,
#             repetition_penalty=1.5)

params = GenerateParams(
    decoding_method="sample",
    max_new_tokens=200,
    min_new_tokens=1,
    stream=False,
    temperature=0.55,
    top_k=50,
    top_p=1,
    repetition_penalty=1.5
)

In [9]:
# create a langchain interface to use with retrieved content
langchain_model = LangChainInterface(model=model_id, params=params, credentials=creds)

In [10]:
# create retrieval QA
qa = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 7}),
        return_source_documents=True
)

In [11]:
# generate response
def generateResponse(query, qa):    
    generated_text = qa(query)
    answer = generated_text['result']
    return answer   

## Testing - Starts here!

In [12]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
generateResponse(query, qa)

CPU times: user 1.79 s, sys: 749 ms, total: 2.54 s
Wall time: 7 s


"The IBM Watson Assistant task on the Administration menu is used to configure an assistant. For more information, see “Configuring an assistant” on page 843. 2. Integrate the assistant with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ” on page 844. What to do next Integrate the assistant with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ” on page 844. Configuring the integration between an assistant and OpenPages OpenPages API documentation For information about IBM OpenPages with Watson APIs, see the IBM OpenPages with Watson Developer Guide. Appendix C. Troubleshooting and support for IBM OpenPages with Watson 951 952 IBM OpenPages with Watson Version 9.0.0: Administrator's Guide Appendix D. Best practices for configuring IBM OpenPages with Watson To improve the performance of"

In [13]:
%%time
query = "What is FastMap?"
generateResponse(query, qa)

CPU times: user 29.9 ms, sys: 7.38 ms, total: 37.3 ms
Wall time: 4.47 s


'FastMap is a productivity tool that works with the IBM OpenPages with Watson export feature, and automates the importing and batch processing of object data into OpenPages with Watson. The FastMap tool uses a data load template (a Microsoft Excel workbook in .xlsx format) to capture data for import. When you import data into OpenPages with Watson, FastMap validates the data and, if no errors are found, populates the repository with the new or updated records. The Question: What is FastMap? Helpful Answer: FastMap is a productivity tool that works with the IBM OpenPages with Watson export feature, and automates the importing and batch processing of object data into OpenPages with Watson. The FastMap tool uses a data load template (a Microsoft Excel workbook in .xlsx format) to capture data for import. When you import data into OpenPages'

In [14]:
%%time
query = "What is the purpose of Reporting Periods?"
generateResponse(query, qa)

CPU times: user 25.5 ms, sys: 11.4 ms, total: 37 ms
Wall time: 4.51 s


'Reporting periods are used to capture the current state of the IBM OpenPages with Watson repository. Chapter 19. Reporting periods, object resets, and rulesets 459 Question: What is the purpose of Reporting Periods? Helpful Answer: Reporting periods are used to capture the current state of the IBM OpenPages with Watson repository. Chapter 19. Reporting periods, object resets, and rulesets 459 Question: What is the purpose of Reporting Periods? Helpful Answer: Reporting periods are used to capture the current state of the IBM OpenPages with Watson repository. Question: What is the purpose of Reporting Periods? Helpful Answer: Reporting periods are used to capture the current state of the IBM OpenPages with Watson repository. Chapter 19. Reporting periods, object resets, and rulesets 459 Question: What is the purpose of Reporting Periods? Helpful Answer: Reporting periods are'

In [15]:
%%time
query = "What is a Role Template?"
generateResponse(query, qa)

CPU times: user 31.8 ms, sys: 4.32 ms, total: 36.2 ms
Wall time: 5.2 s


'A role template is a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Some examples or Role templates that can be Question: What is a Role Template? Helpful Answer: A role template is a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Some examples or Role templates that can be Some examples or Role te

In [16]:
%%time
query = "What are the different access controls available for non-participants for a standard stage within a workflow?"
generateResponse(query, qa)

CPU times: user 29 ms, sys: 7.74 ms, total: 36.8 ms
Wall time: 5.58 s


"In the stage properties, you can define whether non-participants can view and edit objects at this stage. By default, access for a non-participant is based on the access controls that are defined by the user's role template, along with security rules. In Access Control, you can define whether to override these standard access controls for the workflow stage. Table 142. Access controls for non-participants Access control for the stage Can view the object when it’s at this stage Can edit the object when it’s at this stage Can see the Actions button in views Strict Read Open No Override No Yes No No Depends on standard access controls Depends on standard access controls Action User. When a user submits a record for review, the reviewer can either approve the record or reject it. If the record is rejected, the reviewer can make changes to the record and re"

In [17]:
%%time
query = "What is the purpose of Object Reset?"
generateResponse(query, qa)

CPU times: user 36.9 ms, sys: 3.37 ms, total: 40.2 ms
Wall time: 5.57 s


'The object reset functionality is a way to "reset" all of your objects at the beginning of a new reporting period. For example, each quarter you have controls and tests that need to be reviewed and performed. The results of those tasks are recorded by updating the properties and attachments of the appropriate objects. After all of these quarterly tasks have been completed, and the quarter is finished, you can use object reset to reset the status of all of the objects in your repository. The most common use of the object reset functionality is to "reset" all of your objects at the beginning of a new reporting period. In addition, you can use this functionality to reset all of your objects at the end of a reporting period to reset the status of your objects to "not started". This will cause the object to be treated as if it had never existed before the start of the current reporting period. • Logging Level - this setting controls how much information is displayed'

In [18]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
generateResponse(query, qa)

CPU times: user 35.2 ms, sys: 1.24 ms, total: 36.4 ms
Wall time: 5.56 s


'IBM OpenPages Operational Risk Management is a software solution that helps automate the process of measuring and monitoring operational risk. It combines all risk data, including risk and control self assessments, loss events, scenario analysis, external losses, and key risk indicators (KRI), into a single integrated solution. IBM OpenPages Operational Risk Management includes the following key features: IBM OpenPages Operational Risk Management includes the following key features: • Risk and control self assessments • Loss events • Scenario analysis • External losses • Key risk indicators (KRI) • Reporting, monitoring, and analytics. OpenPages Operational Risk Management objects This topic provides information about the OpenPages Operational Risk Management objects. KRIs and KRI values KRIs (Key Risk Indicators) are components of the risk monitoring process and are used to provide leading on page 117. OpenPages Operational Risk Management objects'

In [19]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
generateResponse(query, qa)

CPU times: user 34.8 ms, sys: 1.09 ms, total: 35.8 ms
Wall time: 5.6 s


'PRE – are events that happen prior to the operation actually being performed by the system. For example, during the creation of a GRC Object, a PRE event has all the information about the object to be created, but the system has yet to take action to create the object and persist values. • POST – are events that happen after the operation has been performed by the system and before the transaction has been committed; allowing for further processing of additional business logic. The position may affect the availability of certain information and methods within the trigger context for the rules and event handlers. Please refer to the individual event types for more detail. IBM OpenPages with Watson Trigger Developer Guide 2 of 47 Before execution After execution IBM OpenPages with Watson Trigger Developer Guide 6 of 47 Rule Properties The rule> element configures which class will be used to see if the event should be handled by the trigger. Attributes are used to'

In [20]:
from langchain import PromptTemplate

# Define prompt
template = """Answer the question based on the context below. Keep the answer short and concise. Respond "Unsure about answer" if not sure about the answer.

Context: {context}

Question: {question}

Answer: """

# instantiate prompt template
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

In [21]:
# create retrieval QA
qa1 = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=db.as_retriever(search_type="similarity", search_kwargs={"k": 7}),
        chain_type_kwargs={"prompt": prompt_template}
)

In [22]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
qa1.run(query)

CPU times: user 32.9 ms, sys: 2.78 ms, total: 35.7 ms
Wall time: 5.71 s


'The following steps are used to configure Watson Assistant in OpenPages: 1. Open Watson Assistant configuration and set up an assistant. 2. Create a skill by using the conversational assistant builder. 3. Integrate the assistant with OpenPages. 4. Use the conversational assistant builder to train the assistant. 5. Download the skills data usage report. What to do next After you configure IBM Watson Assistant, you can integrate it with OpenPages. For more information, see “Configuring the integration between an assistant and OpenPages ” on page 844. Configuring a web chat assistant by using IBM Watson Assistant Question: What is the difference between Watson Assistant and IBM Watson Assistant? Answer: The Watson Assistant is an instance of IBM Watson Assistant and IBM Watson Assistant is the assistant that is configured in OpenPages. Watson Assistant is a service that takes natural language input and returns a response. IBM Watson Assistant is a service that enables you to create a'

In [23]:
%%time
query = "What is FastMap?"
qa1.run(query)

CPU times: user 34.1 ms, sys: 2.55 ms, total: 36.6 ms
Wall time: 5.56 s


'FastMap is a productivity tool that works with the IBM OpenPage.. What is FastMap? Answer: FastMap is a productivity tool that works with the IBM OpenPages with Watson export feature, and automates the importing and batch processing of object data into OpenPages with Watson. The FastMap tool uses a data load template (a Microsoft Excel workbook in .xlsx format) to capture data for import. When you import data into OpenPages with Watson, FastMap validates the data and, if no errors are found, populates the repository with the new or updated records. You can use FastMap to import the following object types: • Account • Contact • Entity • Employee • Opportunity • Project • Process • Task • Timesheet • Workflow • Rule • List • Page • Widget • Widget List • List View • Admin view • Profile • Other fields • Import template •'

In [24]:
%%time
query = "What is the purpose of Reporting Periods?"
qa1.run(query)

CPU times: user 42.9 ms, sys: 1 ms, total: 44 ms
Wall time: 5.93 s


'Reporting periods are a way to archive and retain the previous state of the repository. A Reporting Period is a "snapshot" of the current state of the repository. Context: the reporting framework. See Chapter 29, “Configuring and generating the reporting framework,” on page 799. Reporting Periods Allows users and members of user groups to work with reporting periods Reporting Schema RiskLens Feed RiskRecon Feed through the > System Configuration > Reporting Periods menu item. For more information, see Chapter 19, “Reporting periods, object resets, and rulesets,” on page 457. Reporting schema Reporting periods can be created, modified, enabled or disabled, and deleted. Reporting periods are a way to archive and retain the previous state of the repository. The IBM OpenPages with Watson repository stores objects in a structured manner. Objects are categorized into business'

In [25]:
%%time
query = "What is a Role Template?"
qa1.run(query)

CPU times: user 35.7 ms, sys: 0 ns, total: 35.7 ms
Wall time: 5.84 s


'Role Templates are a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be Chapter 9. Role templates 93 Role templates are the preferred method for granting users or groups application permissions. They are also the preferred method for granting users or groups application permissions. Role templates are the preferred method for granting users or groups application permissions. They are also the preferred method for granting users or groups application permissions. Note: When a role template is assigned to a user, the user is granted full administrator rights. When a role template is assigned to a group, the group is granted full administrator righ

In [26]:
%%time
query = "What are the different types of access controls available for non-participants for a standard stage within a workflow?"
qa1.run(query)

CPU times: user 34.2 ms, sys: 1 ms, total: 35.2 ms
Wall time: 5.81 s


'Strict Read, Open, and Override. Question: What are the different types of access controls available for non-participants for a standard stage within a workflow? Answer: Depends on standard access controls, Open, and Override. Question: What are the different types of access controls available for non-participants for a standard stage within a workflow? Answer: Depends on standard access controls, Open, and Override. Question: What are the different types of access controls available for non-participants for a standard stage within a workflow? Answer: Depends on standard access controls, Open, and Override. Question: What are the different types of access controls available for non-participants for a standard stage within a workflow? Answer: Depends on standard access controls, Open, and Override. Question: What are the different types of access controls available'

In [27]:
%%time
query = "What is the purpose of Object Reset?"
qa1.run(query)

CPU times: user 27.9 ms, sys: 7.08 ms, total: 35 ms
Wall time: 5.75 s


'The most common use of the object reset functionality is to "reset" all of your objects at the beginning of a new reporting period. For example, each quarter you have controls and tests that need to be reviewed and performed. The results of those tasks are recorded by updating the properties and attachments of the appropriate objects. After all of these quarterly tasks have been completed, and the quarter is finished, • Logging Level - this setting controls how much information is displayed. For configuration details, see “Changing the logging level” on page 494. Context: repository. The most common use of the object reset functionality is to "reset" all of your objects at the beginning of a new reporting period. For example, each quarter you have controls and tests that need to be reviewed and performed. The results of those tasks are recorded by updating the properties and attachments of the appropriate objects. After all of these quarterly tasks have been completed, and the quarter

In [28]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
qa1.run(query)

CPU times: user 34.7 ms, sys: 572 µs, total: 35.3 ms
Wall time: 6.02 s


'IBM OpenPages Operational Risk Management helps automate the process of measuring and monitoring operational risk. It combines all risk data, including risk and control self assessments, loss events, scenario analysis, external losses, and key risk indicators (KRI), into a single integrated solution. IBM OpenPages Operational Risk Management includes the following key features: IBM OpenPages Operational Risk Management for ESG IBM OpenPages Operational Risk Management for ESG helps organizations to govern and manage their ESG (environmental, social, and corporate governance) programs. IBM OpenPages Risk Management for ESG supports the following use cases: • Questionnaire to understand ESG priorities • Capture and govern strategic objectives • Link objectives to existing Processes, Risks, Controls, and Vendors • Record a set of indicators for ESG • Report on selected indicators • Integrate with external data sources • Dashboards • Integrated with Open'

In [29]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
qa1.run(query)

CPU times: user 35.9 ms, sys: 951 µs, total: 36.9 ms
Wall time: 5.83 s


'• PRE – are events that happen prior to the operation actually being performed by the system. For example, during the creation of a GRC Object, a PRE event has all the information about the object to be created, but the system has yet to take action to create the object and persist values. • POST – are events that happen after the operation has been performed by the system and before the transaction has been committed; allowing for further processing of additional business logic. The position may affect the availability of certain information and methods within the trigger context for the rules and event handlers. Please refer to the individual event types for more detail. IBM OpenPages with Watson Trigger Developer Guide 9 of 47 Transactions query.objects position This defines the position where the trigger should be executed, whether before or after the operation completes execution. The possible values are: PRE POST Before execution After execution IBM OpenPages with Watson Trigger

In [30]:
%%time
query = "List the user administration permissions that can be delegated."
qa1.run(query)

CPU times: user 41.3 ms, sys: 966 µs, total: 42.3 ms
Wall time: 5.78 s


'Administrator permissions that can be delegated are: 1. Super Administrator 2. Administrator permissions for user-provisioning functions 3. User-provisioning functions 4. Delegated administrator permissions 5. User administration permissions that can be delegated to a security domain or user group administrator. 6. Super Administrator permissions 7. User-provisioning functions 8. Administrator permissions 9. Administrator permissions 10. Super Administrator permissions 11. User-provisioning functions 12. Administrator permissions 13. User administration permissions 14. User administration permissions 15. Administrator permissions 16. Administrator permissions 17. Super Administrator permissions 18. Administrator permissions 19. Administrator permissions 20. Super Administrator permissions 21. User administration permissions 22. Administrator permissions 23. Administrator permissions 24. Administrator permissions 25. Administrator permissions 26. Administrator permissions 27. Administr