In [None]:
import os
import json
import getpass
from dotenv import load_dotenv
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
# from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.callbacks import BaseCallbackHandler

from ragas import EvaluationDataset, evaluate
from ragas.run_config import RunConfig
from ragas.metrics import faithfulness, answer_relevancy, answer_correctness, context_precision, context_recall, Faithfulness
from ragas.llms import LangchainLLMWrapper
from datasets import Dataset
import time
import itertools
import pandas as pd

In [None]:
local_path = "../pdf/BILLS-119hr1eh.pdf"

if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
else:
    print("Upload a PDF file for processing.")

In [None]:
len(data[0].page_content)

In [None]:
#Split and chunk the data
chunk_size = 750
chunk_overlap = 200

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = text_splitter.split_documents(data)

# Add the chunks to vector database, which uses nomic for model embeddings
vector_db = Chroma.from_documents(
                                    documents=chunks, 
                                    embedding=OllamaEmbeddings(model="nomic-embed-text"),
                                    collection_name="local-rag"
                                )

In [None]:
local_llm = "llama3.2"
llm = ChatOllama(model=local_llm)

#evaluator_llamma= FaithfulnessEvaluator(llm=llm)


# Set up a basic PromptTemplate as the backbones of the solution
# Ask the system to gather several responses and to limit response to 200 words
QUERY_PROMPT = PromptTemplate(
    input_variables = ["question"],
        template="""You are an AI Language model assistant. Your task is to generate three different versions of the given user question 
        to retrieve relevant documents from a vector database. Please be as concise as possible and limit your response to 200 words or less. 
        Original question: {question} """
)

retriever = MultiQueryRetriever.from_llm(vector_db.as_retriever(),llm, prompt=QUERY_PROMPT)

In [None]:
# use a ChatPromptTemplate to initiate a conversation, allowing the System to assume a Role
chat_template = """Answer the question based only on the following context: 
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(chat_template)

chain = (
    {"context":retriever, "question":RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

### Context-Driven responses

In [None]:
q_list = ['Resulting from this Act, which groups would be harmed most?',\
          'Resulting from this Act, which groups would benefit most?']

c_list = ['Role: you are a staunch Democrat',\
          'Role: you are a staunch Republican',\
          'Role: you are a US citizen with no political affiliation',\
          'Role: you are a wealthy investor with interests in oil, gas, and mining',\
          'Role: you are a person that enjoys recreating in public lands while living in Colorado']

In [None]:
def response_gen(q, context):
    """
    Invoke the language model chain with a given question and context, 
    then print the question, context, and the model's response.

    Args:
        q (str): The question to ask.
        context (str): The context or role to provide to the model.
    """
    response = chain.invoke(input={'context': context, 'question': q})
    
    print('*** \n')
    print(f"Question - {q}")
    print(f"Context - {context} \n")
    print("Response: \n", response)

    return response


def iterate_responses(q_list, c_list):

    response_list = []
    df_responses = []
    
    for combo in itertools.product(q_list, c_list):
        response = response_gen(q=combo[0], context=combo[1])

        data = {
        'question': [combo[0]],
        'context': [combo[1]],
        'response': [response]
        }
    
        df = pd.DataFrame(data)
    
        response_list.append(df)

    df_responses = pd.concat(response_list)
    df_responses.reset_index(inplace=True, drop=True)

    return df_responses

In [None]:
q_list1 = ['Resulting from this Act, which groups would be harmed most?']

c_comp_list = ['Role: you are a US citizen with no political affiliation',\
        'Role: you are a staunch Republican',\
        'Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document',\
        'Role: you are a staunch Republican and Trump Supporter, at a bar with only 15 seconds to explain your stance on the document']

df_responses = iterate_responses(q_list=q_list1, c_list=c_comp_list)

In [None]:
pd.set_option('display.max_colwidth', None)
df_responses.head()

### Leverage RAGAS for Evaluating Prompts

In [None]:
# Evaluation questions, contexts, and answers
eval_questions = ["Resulting from this Act, which groups would be harmed most?"]

eval_answers = [
   "The people who would be harmed most by this Act are the non-citizen individuals and foreign corporations. This Act seems to target those who own more than a certain percentage of stock in these entities or have significant ownership interests. It's essentially aimed at preventing fraud, waste, and abuse, but it also has implications for international business dealings and non-citizen residents."
]

contexts = [
    "Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document"
]

# pull ground truth from answer with context = 'US citizen with no political affiliation'
ground_truth = [
   "Based on the provided text, it appears that the legislation may harm certain groups in the following ways: Foreign nationals: The 'unfair foreign tax' provision may affect foreign nationals who are subject to taxation by a foreign government. However, this is not necessarily a group that would be harmed most. Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulations and costs. However, this is not necessarily a group that would be harmed most. Low-income individuals: The legislation includes provisions related to health insurance, such as the 'exchange enrollment verification requirement' and the 'premium adjustment percentage.' These provisions may affect low-income individuals who rely on government-subsidized health insurance plans." 
]

In [None]:
data_samples = {
    'question': [
         eval_questions[0]
    ],
    'answer': [
         eval_answers[0]
    ],
    'contexts': [
        [
            contexts[0]
        ]
    ],
    'ground_truth': [
       'Foreign nationals: The unfair foreign tax provision may affect foreign nationals who are subject to taxation by a foreign government. \
       Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulations and costs. However, this is not necessarily a group that would be harmed most. \
       Low-income individuals: The legislation includes provisions related to health insurance, such as the exchange enrollment verification requirement and the premium adjustment percentage. These provisions may affect low-income individuals who rely on government-subsidized health insurance plans. \
       Health care workers: The legislation includes provisions that may impact health care workers, particularly those involved in administering or managing government-subsidized health insurance programs. \
       Clean energy companies: The legislation includes provisions that may impact clean energy companies, particularly those involved in renewable energy projects and environmental regulations.'
    ]
}

dataset = Dataset.from_dict(data_samples)

json_formatted_string = json.dumps(data_samples, indent=4)
print(json_formatted_string)



In [None]:
# EvaluationDataset standardizes the collected evaluation examples into a structured format
# evaluation_dataset = EvaluationDataset.from_list([dataset_dict])
# evaluation_dataset

In [None]:
class TestCallback(BaseCallbackHandler):

    def on_llm_start(self, serialized, prompts, **kwargs):
        print(f"**********Prompts*********:\n {prompts[0]}\n\n")

    def on_llm_end(self, response, **kwargs):
        print(f"**********Response**********:\n {response}\n\n")

In [None]:
evaluator_llm = LangchainLLMWrapper(llm)
evaluator_embed = LangchainEmbeddingsWrapper("nomic-embed-text")

# test only one metric for debugging
result_faith = evaluate(
    dataset=dataset,
    metrics=[faithfulness],  # Just one metric
    llm=evaluator_llm,
    embeddings=evaluator_embed
)

print(result_faith)

In [None]:


# Evaluate using Ragas
result = evaluate(
    dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall
    ],
    llm=evaluator_llm,
    embeddings=evaluator_embed,
    run_config=RunConfig(timeout=300, max_retries=10, max_wait=300, log_tenacity=False),
    raise_exceptions=True, 
    callbacks=[TestCallback()],
    column_map = []
)

print(result)