In [1]:
from ibm_watsonx_ai.foundation_models import ModelInference
from ibm_watsonx_ai.metanames import GenTextParamsMetaNames as GenParams
from ibm_watsonx_ai.metanames import EmbedTextParamsMetaNames
from ibm_watsonx_ai import Credentials
from langchain_ibm import WatsonxLLM, WatsonxEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.chains import RetrievalQA
from dotenv import load_dotenv
import os
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
load_dotenv(os.getcwd()+"/.env", override=True)

True

In [3]:
# You can use this section to suppress warnings generated by your code:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn
warnings.filterwarnings('ignore')

In [4]:
from langchain.prompts import PromptTemplate

prompt_template = """
You are a knowledgeable assistant. Answer the question based solely on the provided context.
If the answer is not available in the context, respond with 'The information is not available in the provided context.'

Context: {context}

Question: {question}
Answer:
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

In [None]:
def get_llm():
    model_id = 'mistralai/mixtral-8x7b-instruct-v01'
    parameters = {
        GenParams.MAX_NEW_TOKENS: 256,
        GenParams.TEMPERATURE: 1.0,
    }
    
    watsonx_llm = WatsonxLLM(
        model_id=model_id,
        url="https://us-south.ml.cloud.ibm.com",
        apikey=os.getenv("WATSONX_APIKEY", ""),
        project_id=os.getenv("WATSONX_PROJECT", ""),
        params=parameters,
    )
    return watsonx_llm

## Document loader
# def document_loader(file):
#     loader = PyPDFLoader(file)
#     loaded_document = loader.load()
#     return loaded_document

def document_loader(url):
    """
    Loads content from a webpage using WebBaseLoader.

    Args:
        url (str): The URL of the webpage to load.

    Returns:
        list: A list of Document objects extracted from the webpage.
    """
    loader = WebBaseLoader(url)
    loaded_documents = loader.load()
    return loaded_documents

## Text splitter
def text_splitter(data):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1024,
        chunk_overlap=256,
        length_function=len,
    )
    chunks = text_splitter.split_documents(data)
    return chunks

## Vector db
def vector_database(chunks):
    embedding_model = watsonx_embedding()
    vectordb = Chroma.from_documents(chunks, embedding_model)
    return vectordb

## Embedding model
def watsonx_embedding():
    embed_params = {
        EmbedTextParamsMetaNames.TRUNCATE_INPUT_TOKENS: 3,
        EmbedTextParamsMetaNames.RETURN_OPTIONS: {"input_text": True},
    }
    watsonx_embedding = WatsonxEmbeddings(
        model_id="ibm/slate-125m-english-rtrvr",
        url="https://us-south.ml.cloud.ibm.com",
        apikey=os.getenv("WATSONX_APIKEY", ""),
        project_id=os.getenv("WATSONX_PROJECT", ""),
        params=embed_params,
    )
    return watsonx_embedding

## Retriever
def retriever(file):
    splits = document_loader(file)
    chunks = text_splitter(splits)
    vectordb = vector_database(chunks)
    retriever = vectordb.as_retriever()
    return retriever

## QA Chain
def retriever_qa(file, query):
    llm = get_llm()
    retriever_obj = retriever(file)
    qa = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever_obj,
        return_source_documents=True,
        chain_type_kwargs={"prompt": PROMPT}
    )
    response = qa.invoke({"query": query})
    return response

In [6]:
# retriever_qa('LORA.pdf', 'What is LoRA?')
url = 'https://community.ibm.com/community/user/datamanagement/blogs/shaikh-quader/2024/05/07/building-an-in-db-linear-regression-model-with-ibm'
query = 'How to train a Linear Regression model in Db2?'
response = retriever_qa(url, query)

# # print(retriever_qa(url, 'How can I generate summary statistics of a Db2 table?'))

# file = "LORA.pdf"
# query = "What is LoRA?"
# response = retriever_qa(file, query)

# Extract the answer
answer = response['result']
print("Answer:", answer)

# Extract and print the retrieved documents
source_documents = response['source_documents']
print("\nRetrieved Contexts:")
for i, doc in enumerate(source_documents, 1):
    print(f"\nDocument {i}:")
    print(f"Content: {doc.page_content[:500]}...")  # Display the first 500 characters
    print(f"Metadata: {doc.metadata}")

Answer: To train a Linear Regression model in Db2, follow these steps:
1. Divide the records from the GOSALES_FULL table into two partitions: a training partition and a test partition using the SPLIT_DATA stored procedure (SP).
2. Call the LINEAR_REGRESSION SP using the training examples from the GOSALES_TRAIN table, specifying the input features in the incolumn parameter and the output column in the target parameter. Set the intercept parameter to true to learn the value of intercept.
3. After the training completes, the SP will add the new model to Db2's model catalog.
In the provided context, the following command is used to train a Linear Regression model in Db2:
CALL IDAX.LINEAR_REGRESSION('model=GOSALES.GOSALES_LINREG, intable=GOSALES.GOSALES_TRAIN, id=ID, target=PURCHASE_AMOUNT,incolumn=AGE;GENDER;MARITAL_STATUS;PROFESSION, intercept=true')
Note that Db

Retrieved Contexts:

Document 1:
Content: Train / Test Split
First, I will divide the records from the GOSALES_FULL table into