In [1]:
from typing import Any, List, Mapping, Optional
import langchain
import requests
from langchain.llms.base import LLM
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

In [2]:
class MiniLML6V2EmbeddingFunctionLangchain(langchain.embeddings.openai.Embeddings):
    MODEL = SentenceTransformer("all-MiniLM-L6-v2")

    def embed_documents(self, texts):
        return MiniLML6V2EmbeddingFunctionLangchain.MODEL.encode(texts).tolist()

    def embed_query(self, query):
        return MiniLML6V2EmbeddingFunctionLangchain.MODEL.encode([query]).tolist()[0]


class IBMWatsonX(LLM):
    api_key: str
    model_name: str
    project_id: str
    model_parameters: Mapping[str, Any]

    @property
    def _llm_type(self) -> str:
        return "IBM watsonx.ai"

    def _get_token(self) -> str:
        url = "https://iam.cloud.ibm.com/identity/token"
        headers = {"Content-Type": "application/x-www-form-urlencoded"}
        data = (
            f"apikey={self.api_key}&grant_type=urn:ibm:params:oauth:grant-type:apikey"
        )
        response = requests.post(url, headers=headers, data=data)
        iam_token = response.json()["access_token"]
        return iam_token

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
    ) -> str:
        iam_token = self._get_token()
        url = "https://us-south.ml.cloud.ibm.com/ml/v1-beta/generation/text?version=2023-05-29"
        headers = {
            "Content-Type": "application/json",
            "Accept": "application/json",
            "Authorization": f"Bearer {iam_token}",
        }
        payload = {
            "model_id": self.model_name,
            "input": prompt,
            "parameters": self.model_parameters,
            "project_id": self.project_id,
        }

        response = requests.post(url, headers=headers, json=payload)
        return response.json()["results"][0]["generated_text"]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {"model_name": self.model_name}

In [3]:
api_key = "xxx"
temperature = 0.0
model_name = "google/flan-ul2"
project_id = "3dce5aff-e4a0-48fc-9210-445d52ef0c34"
model_parameters = {
    "decoding_method": "sample",
    "max_new_tokens": 200,
    "min_new_tokens": 1,
    "random_seed": 12345,
    "stop_sequences": [],
    "temperature": temperature,
    "top_k": 50,
    "top_p": 1,
    "repetition_penalty": 1,
}

llm_ibm = IBMWatsonX(
    api_key=api_key,
    model_name=model_name,
    model_parameters=model_parameters,
    project_id=project_id,
)

In [4]:
loader = PyPDFLoader("../data/Happy-Hunt-T-Cs-Final-1.pdf")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
texts = text_splitter.split_documents(loader.load())
db = FAISS.from_documents(texts, MiniLML6V2EmbeddingFunctionLangchain())
retriever = db.as_retriever(search_kwargs={"k": 8})

In [5]:
qa_ibm = RetrievalQA.from_chain_type(
    llm=llm_ibm,
    retriever=retriever,
    return_source_documents=True,
)

In [6]:
question = "What is the duration of the Campaign?"

result = qa_ibm({"query": question})

result["result"]