In [1]:
# imports
import os
from dotenv import load_dotenv
from genai.schemas import GenerateParams
from genai.credentials import Credentials
from langchain.vectorstores import Chroma
from genai.extensions.langchain import LangChainInterface
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter, TokenTextSplitter
from langchain.document_loaders import PDFMinerLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA

The class ModelType is being deprecated.
Please replace any reference to ModelType by its model id string equivalent.
Example :
  ModelType.FLAN_T5 becomes "google/flan-t5-xxl"[0m

  from genai.schemas.models import ModelType


## Indexing - Starts here!

In [2]:
# variables
pdf_folder_path = './data'
db_folder_path = './db_new'

In [3]:
# define embedding function
def initEmbedFunc():
    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
    return embedding_function

In [4]:
# populate chroma db
def generateDB():
    docs = []
    for root, dirs, files in os.walk(pdf_folder_path):
        for file in files:
            if file.endswith(".pdf"):
                print(f'Reading File: {file}')
                
                # read PDF
                loader = PDFMinerLoader(os.path.join(root, file))
                documents = loader.load()

                # load the document and split it into chunks
                text_splitter = RecursiveCharacterTextSplitter(
                                    chunk_size=1000, 
                                    chunk_overlap=100, 
                                    separators=["\n"]
                )
                temp = text_splitter.split_documents(documents)
                
                # append to docs
                docs += temp

    # create the open-source embedding function
    embedding_function = initEmbedFunc()

    # save to disk
    db = Chroma.from_documents(docs, embedding_function, persist_directory=db_folder_path)
    
    return db

In [5]:
db = None

if [f for f in os.listdir(db_folder_path) if not f.startswith('.')] == []:
    print("Chroma DB is empty. Generating indexes...")
    
    # generate chroma db
    db = generateDB()
else:
    print("Chroma DB is not empty.")

    # create the open-source embedding function
    embedding_function = initEmbedFunc()

    # load from disk
    db = Chroma(persist_directory=db_folder_path, embedding_function=embedding_function)

Chroma DB is empty. Generating indexes...
Reading File: 9.0 administrators_guide.pdf
Reading File: Trigger_Developer_Guide_v9.pdf
Reading File: op9_solutions_guide.pdf
Reading File: op_user_guide.pdf


## RAG - Starts here!

In [6]:
# retrieve the watsonx.ai credentials
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
creds = Credentials(api_key, api_endpoint=api_url)

In [72]:
# variables
# ibm/mpt-7b-instruct -> 3/5
# meta-llama/llama-2-7b -> 3/5
# ibm/granite-13b-sft -> 3/5
# google/ul2 -> 3.5/5
# google/flan-ul2
# google/flan-t5-xxl
model_id = 'google/ul2'

In [73]:
# # generate LLM params
# params = GenerateParams(
#             decoding_method='greedy', 
#             min_new_tokens=1,
#             max_new_tokens=100,
#             stream=False,
#             repetition_penalty=1.5)

params = GenerateParams(
    decoding_method="sample",
    max_new_tokens=200,
    min_new_tokens=1,
    stream=False,
    temperature=0.55,
    top_k=50,
    top_p=1,
    repetition_penalty=1.5
)

In [74]:
# create a langchain interface to use with retrieved content
langchain_model = LangChainInterface(model=model_id, params=params, credentials=creds)

In [75]:
# create retrieval QA
retriever = db.as_retriever()
qa = RetrievalQA.from_chain_type(
        llm=langchain_model,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
)

In [76]:
# generate response
def generateResponse(query, qa):    
    generated_text = qa(query)
    answer = generated_text['result']
    return answer   

## Testing - Starts here!

In [77]:
%%time
query = "Provide the steps to configure Watson Assistant in OpenPages?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 38.6 ms, sys: 622 µs, total: 39.3 ms
Wall time: 14.3 s


'To configure Watson Assistant in OpenPages, you need to install the Watson Assistant package. Then you can follow the steps below: • From the OpenPages menu, select Solution Configuration > Profiles > Watson Assistant. • Select the Watson Assistant profile. • Click Configure Watson Assistant. • Enter the following information: • Watson Assistant URL • Username • Password • Deployment ID • Space ID • API key • API secret • Callback URL • Voice name • Voice description. • Click Save. • Click Done. Question: How do I configure Watson Assistant? Helpful Answer: To configure Watson Assistant, you need to install the Watson Assistant package. Then you can follow the steps below: • Launch the Watson Assistant application. • Click the Watson Assistant icon. • Click the Install button. • On the Install Watson Assistant page, enter the following information: • Watson Assistant URL • Username • Password • Deployment ID • Space ID'

In [78]:
%%time
query = "What is FastMap?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 30.3 ms, sys: 9.13 ms, total: 39.5 ms
Wall time: 14.3 s


'FastMap is a data import tool that allows you to import data into the application. FastMap templates are available for import. FastMap templates are spreadsheets that contain data mappings that are stored on the server. FastMap templates are downloaded and imported to the application. When FastMap templates are imported, the data is validated against the profile of the logged-on user. If the FastMap template is localized according to the locale of the end user, the FastMap template is also localized. FastMap templates are encrypted. FastMap templates are imported from the FastMap Import tab in the application. FastMap templates can be imported manually or automatically. FastMap templates are customizable to meet your process needs. FastMap templates can be multivalued. FastMap templates can be encrypted. FastMap templates are not available in the FastMap Import dialog box. FastMap templates are not available in the FastMap Validate and Import dialog box. FastMap templates'

In [79]:
%%time
query = "What is the purpose of Reporting Periods?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 33.7 ms, sys: 4.97 ms, total: 38.7 ms
Wall time: 14.2 s


'Reporting periods are used to archive data and reports from the previous reporting period. They provide a snapshot of the data at the end of the reporting period and can be viewed at any time. Chapter 19. Reporting periods, object resets, and rulesets 453 for the new quarter. You can "reset" all of your objects at the beginning of a new reporting period. This allows you to start the new quarter with a clean set of objects and properties and attachments. You can also reset a reporting period, which will cause all objects and properties to be reset. Then, when you start the new quarter, you can use the object reset functionality to "reset" all of the objects and properties and attachments to the state they were in before you archived them into a reporting period. When you enable a reporting period, the objects and properties of all objects in the reporting period are reset to the state they were in when'

In [80]:
%%time
query = "What is a Role Template?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 31.2 ms, sys: 7.41 ms, total: 38.6 ms
Wall time: 13.9 s


'A Role Template is a security object that you can use to define all aspects of application security for various groups and users within a business unit. It contains access control definitions on folder structures for object types and application permissions. Role templates generally reflect the usual or expected function that a user or group plays within an organization. Some examples or Role templates that can be defined are Process Owner, Control Owner, and Tester. The template can then be applied to different Users/Groups for a specific security context. Security context point role An instance of a role template that is applied to a set of Users/Groups for a specific security context. Roles are granted to Users/Groups which allows them access to objects with certain permissions. Some examples of roles are: Process Owner, Control Owner, and Tester. Role template A security object that you can use to define all aspects of application security for various groups and users within '

In [81]:
%%time
query = "What are the different access controls available for non-participants for a standard stage within a workflow?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 38.6 ms, sys: 355 µs, total: 39 ms
Wall time: 14.2 s


'Can view the object when it’s at this stage Can edit the object when it’s at this stage Can see the Actions button in views Strict Read Open No Override No Yes No No Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depends on standard access controls Depen

In [82]:
%%time
query = "What is the purpose of Object Reset?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 38.5 ms, sys: 679 µs, total: 39.2 ms
Wall time: 14.1 s


'Object Reset is used to update object types and their properties to a new state. This is done before a new reporting period is turned on. The reset session can be configured to either update all objects or only the objects that meet the specified criteria. The Object Reset feature is used to update, delete, and create object types, property values, and rulesets for a new reporting period. The purpose of this feature is to reset objects for a new reporting period. The Object Reset feature can be used to update, delete, and create object types, property values, and rulesets for a new reporting period. You can configure the Object Reset feature to: • OBEY ACL restrictions • IGNORE LOCK RESTRAINING SETTINGS • CONTINUE ON ERROR • LOG ERRORS • RESET - This option resets all objects within the scope of the'

In [83]:
%%time
query = "What are the features of Operational Risk Management in OpenPages?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 40.8 ms, sys: 3.99 ms, total: 44.8 ms
Wall time: 13.6 s


'• Loss Events to track, assess, and manage internal and external events that might result in operational loss. • Risk and Control Self Assessments (RCSA) to identify, measure, and mitigate risk. • Key Risk Indicators (KRIs) and Key Performance Indicators (KPIs), which can track performance metrics to potentially show the presence or state of a risk condition or trend. • Scenario Analysis, which is an assessment technique that is used to identify and measure specific kinds of risks, in particular, low frequency, high-severity events. • External Loss Events provide the ability to import loss data from IBM FIRST Risk Case Studies, ORX, and ORIC loss databases into OpenPages Operational Risk Management for scenario analysis, benchmarking, and reports generation. You can also export loss data to analytic tools or capital allocation applications. • Issue Management and Remediation (IMR), which includes the following activities'

In [84]:
%%time
query = "What is the difference between PRE and POST position in Triggers?"
generateResponse(query, qa)

  self._read_ready.notifyAll()


CPU times: user 35.9 ms, sys: 3.61 ms, total: 39.5 ms
Wall time: 14.6 s


'- PRE - are events that happen prior to the operation actually being performed by the system. For example, during the creation of a GRC Object, a PRE event has all the information about the object to be created, but the system has yet to take action to create the object and persist values. - POST - are events that happen after the operation has been performed by the system and before the transaction has been committed; allowing for further processing of additional business logic. The position may affect the availability of certain information and methods within the trigger context for the rules and event handlers. Please refer to the individual event types for more detail. IBM OpenPages with Watson Trigger Developer Guide 11 of 47 Transactions All triggers on operations are executed within the same transaction of the original system operation. If an error occurs in a trigger, whether system or business logic, the framework will roll back the transaction. In other words, the original o