In [None]:
# https://smart.com.ph/About/docs/corporate-governance/conflictofinterestpolicy.pdf

In [3]:
import requests
import langchain

from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
from langchain.document_loaders import PyPDFLoader


class MiniLML6V2EmbeddingFunctionLangchain(langchain.embeddings.openai.Embeddings):
    MODEL = SentenceTransformer("all-MiniLM-L6-v2")

    def embed_documents(self, texts):
        return MiniLML6V2EmbeddingFunctionLangchain.MODEL.encode(texts).tolist()

    def embed_query(self, query):
        return MiniLML6V2EmbeddingFunctionLangchain.MODEL.encode([query]).tolist()[0]


api_key = "xxx"
url = "https://iam.cloud.ibm.com/identity/token"
headers = {"Content-Type": "application/x-www-form-urlencoded"}
data = f"apikey={api_key}&grant_type=urn:ibm:params:oauth:grant-type:apikey"
response = requests.post(url, headers=headers, data=data)
iam_token = response.json()["access_token"]

model_name = "google/flan-t5-xxl"
model_parameters = {
    "decoding_method": "sample",
    "max_new_tokens": 300,
    "min_new_tokens": 1,
    "random_seed": 12345,
    "stop_sequences": [],
    "temperature": 0.0,
    "top_k": 50,
    "top_p": 1,
    "repetition_penalty": 1,
}
project_id = "0353fa90-88c0-44d2-b6e7-ab143db3f01d"
url = "https://us-south.ml.cloud.ibm.com/ml/v1-beta/generation/text?version=2023-05-29"
headers = {
    "Content-Type": "application/json",
    "Accept": "application/json",
    "Authorization": f"Bearer {iam_token}",
}
payload = {
    "model_id": model_name,
    "parameters": model_parameters,
    "project_id": project_id,
}


def ask_question(question, db, num_docs):
    context = " ".join(
        [x.page_content for x in db.similarity_search(question, k=num_docs)]
    )

    prompt = f"""Use the following pieces of information to answer the user's question.
    If you don't know the answer, just say that you don't know, don't try to make up an answer.
    Context: {context}

    Question: {question}

    Only return the helpful answer below and nothing else.
    Helpful answer:
    """
    payload.update({"input": prompt})
    r = requests.post(url, headers=headers, json=payload)
    generated_text = r.json()["results"][0]["generated_text"]
    return context, generated_text

In [2]:
loader = PyPDFLoader("data/conflictofinterestpolicy.pdf")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(loader.load())
db = FAISS.from_documents(docs, MiniLML6V2EmbeddingFunctionLangchain())
db.save_local("watsonx/db")

In [4]:
db = FAISS.load_local("watsonx/db", MiniLML6V2EmbeddingFunctionLangchain())

In [5]:
question = "when does a Conflict of Interest occurs?"
context, answer = ask_question(question=question, db=db, num_docs=3)
# print("Context:")
# print(context)
# print()
# print()
# print("-" * 100)
print("Answer:")
print(answer)

Answer:
Conflict of Interest – occurs when the private interest of a Director, Employee, Consultant and/or his Affiliate interferes or appears to interfere in any way with the interest of SMART and/or Group
