In [16]:
# imports
import os
from dotenv import load_dotenv
from genai.schemas import GenerateParams
from genai.credentials import Credentials
from langchain.vectorstores import Chroma
from genai.extensions.langchain import LangChainInterface
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings

In [17]:
# retrieve the watsonx.ai credentials
load_dotenv()
api_key = os.getenv("GENAI_KEY", None)
api_url = os.getenv("GENAI_API", None)
credentials = Credentials(api_key, api_endpoint=api_url)

In [18]:
# generate LLM params
params = GenerateParams(
            decoding_method='greedy', 
            min_new_tokens=1,
            max_new_tokens=150,
            stream=False,
            temperature=0.7,
            repetition_penalty=2)

In [19]:
# variables
# ibm/mpt-7b-instruct -> 3/5
# meta-llama/llama-2-7b -> 3/5
# ibm/granite-13b-sft -> 3/5
# google/ul2 -> 3.5/5
model_id = 'google/ul2'
pdf_folder_path = './data'
db_folder_path = './db'

In [20]:
# create a langchain interface to use with retrieved content
langchain_model = LangChainInterface(model=model_id, params=params, credentials=credentials)

In [21]:
# populate chroma db
def generateDB():
    # load PDFs from folder
    loader = PyPDFDirectoryLoader(pdf_folder_path)
    documents = loader.load()    

    # load the document and split it into chunks
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator='\n')
    docs = text_splitter.split_documents(documents)

    # create the open-source embedding function
    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    # save to disk
    db = Chroma.from_documents(docs, embedding_function, persist_directory="./db")
    
    return db

In [22]:
db = None

if [f for f in os.listdir(db_folder_path) if not f.startswith('.')] == []:
    print("Chroma DB is empty. Populating it.")
    
    # generate chroma db
    db = generateDB()
else:
    print("Chroma DB is not empty.")

    # create the open-source embedding function
    embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")

    # load from disk
    db = Chroma(persist_directory="./db", embedding_function=embedding_function)

Chroma DB is not empty.


In [23]:
# create the chain
chain = load_qa_chain(langchain_model, chain_type="stuff")

In [24]:
# generate response
def generateResponse(query, db):
    
    # retrieve results from chroma db
    results = db.similarity_search(query)
    
    # generate the response
    response = chain({"input_documents": results, "question": query})
    
    return response["output_text"]    

In [25]:
query = "What are the features of Operational Risk Management in OpenPages?"
generateResponse(query, db)

  self._read_ready.notifyAll()


'IBM OpenPages Operational Risk Management includes the following key features: • IBM OpenPages Operational Risk Management (ORM) provides a fully integrated operational risk solution, including risk control self-assessments (RCSAs), key risk indicators, (KRIs), loss event data management, and advanced reporting and business intelligence with IBM Cognos ® finance integrated risk management. Dashboard components are available to provide an enterprise-wide view of risk across the business and manage Basel II AMA compliance in the banking industry. • IBM OpenPages Operational Risk Management (ORM) provides a fully integrated operational risk solution, including risk control self-assessments (RCSAs'

In [26]:
query = "List the steps to configure Watson Assistant in OpenPages"
generateResponse(query, db)

  self._read_ready.notifyAll()


"Watson Assistant is a cognitive assistant that helps users to find the right information. Watson Assistant is a cognitive assistant that helps users to find the right information. Watson Assistant is a cognitive assistant that helps users to find the right information. IBM OpenPages with Watson Version 9.0.0: Administrator's Guide IBM OpenPages with Watson Version 9.0.0 Solutions Guide IBM Question: List the steps to configure Watson Assistant in OpenPages Helpful Answer: Watson Assistant is a cognitive assistant that helps users to find the right information. Watson Assistant is a cognitive assistant that helps users to find the right information. IBM OpenPages with Watson Version 9.0.0: Administrator's Guide IBM OpenPages with Watson Version 9.0.0 Solutions Guide IBM"

In [27]:
query = "What is FastMap?"
generateResponse(query, db)

  self._read_ready.notifyAll()


'FastMap is a tool that allows you to import data into the application using a template. The template is a spreadsheet that contains columns for each object type and field in the application. You can use FastMap to import data from an external system into the application. FastMap uses the profile of the logged-on user to determine which object types and fields are valid. For example, if an object type or certain object fields are included in a data load template but are excluded in a user’s profile, then that object type or those object fields will be excluded from the data imported by FastMap. You can override the default profile used by FastMap by explicitly specifying a profile in the Definition'

In [28]:
query = "What is the purpose of Reporting Period in OpenPages?"
generateResponse(query, db)

  self._read_ready.notifyAll()


'Reporting period is a field that allows you to define a reporting period for a report. The reporting period is a date range that can be used to filter the data in a report. For example, you can define a reporting period of January 1, 2015 to December 31, 2015. When you run a report, you can select a reporting period from the drop-down list. If you do not select a reporting period, the report runs for the current time period. Reporting period is a field that allows you to define a reporting period for a report. The reporting period is a date range that can be used to filter the data in a report. For example, you can define '

In [29]:
query = "List the solutions that OpenPages offers?"
generateResponse(query, db)

  self._read_ready.notifyAll()


'OpenPages offers the following solutions: • OpenPages Operational Risk Management • OpenPages Third Party Risk Management • OpenPages Compliance Management • OpenPages Governance, Risk, and Compliance • OpenPages Risk Management for ESG • OpenPages Vendor Risk Management • OpenPages Third Party Vendor Management • OpenPages Vendor Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk Management • OpenPages Operational Risk'

In [30]:
query = "What is the difference between PRE and POST position in Triggers?"
generateResponse(query, db)

  self._read_ready.notifyAll()


'PRE – Events that happen prior to the operation actually being performed by the system For example, during the creation of a GRC Object, a PRE event has all the information about the object to be created, but the system has yet to take action to create the object and persist values. PRE is required for deletes, associations, and disassociations. POST - Events that happen after the operation has been performed by the system and before the transaction has been committed ; allowing for further processing of additional business logic POST is required for creates and updates. For more information, see the OpenPages 8.3 trigger development guide .position="PRE" objectType> The name of the'