In [94]:
import os
import json
import getpass
from dotenv import load_dotenv
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from ragas.embeddings import LangchainEmbeddingsWrapper
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever
# from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.callbacks import BaseCallbackHandler

from ragas import EvaluationDataset, evaluate
from ragas.run_config import RunConfig
from ragas.metrics import faithfulness, answer_relevancy, answer_correctness, context_precision, context_recall, Faithfulness
from ragas.llms import LangchainLLMWrapper
from datasets import Dataset
import time
import itertools
import pandas as pd

In [2]:
local_path = "../pdf/BILLS-119hr1eh.pdf"

if local_path:
    loader = UnstructuredPDFLoader(file_path=local_path)
    data = loader.load()
else:
    print("Upload a PDF file for processing.")

In [3]:
len(data[0].page_content)

1136279

In [4]:
#Split and chunk the data
chunk_size = 750
chunk_overlap = 200

text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
chunks = text_splitter.split_documents(data)

# Add the chunks to vector database, which uses nomic for model embeddings
vector_db = Chroma.from_documents(
                                    documents=chunks, 
                                    embedding=OllamaEmbeddings(model="nomic-embed-text"),
                                    collection_name="local-rag"
                                )

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [6]:
local_llm = "llama3.2"
llm = ChatOllama(model=local_llm)

#evaluator_llamma= FaithfulnessEvaluator(llm=llm)


# Set up a basic PromptTemplate as the backbones of the solution
# Ask the system to gather several responses and to limit response to 200 words
QUERY_PROMPT = PromptTemplate(
    input_variables = ["question"],
        template="""You are an AI Language model assistant. Your task is to generate three different versions of the given user question 
        to retrieve relevant documents from a vector database. Please be as concise as possible and limit your response to 200 words or less. 
        Original question: {question} """
)

retriever = MultiQueryRetriever.from_llm(vector_db.as_retriever(),llm, prompt=QUERY_PROMPT)

In [7]:
# use a ChatPromptTemplate to initiate a conversation, allowing the System to assume a Role
chat_template = """Answer the question based only on the following context: 
{context}
Question: {question}
"""

prompt = ChatPromptTemplate.from_template(chat_template)

chain = (
    {"context":retriever, "question":RunnablePassthrough()}
    | prompt 
    | llm 
    | StrOutputParser()
)

### Context-Driven responses

In [8]:
q_list = ['Resulting from this Act, which groups would be harmed most?',\
          'Resulting from this Act, which groups would benefit most?']

c_list = ['Role: you are a staunch Democrat',\
          'Role: you are a staunch Republican',\
          'Role: you are a US citizen with no political affiliation',\
          'Role: you are a wealthy investor with interests in oil, gas, and mining',\
          'Role: you are a person that enjoys recreating in public lands while living in Colorado']

In [9]:
def response_gen(q, context):
    """
    Invoke the language model chain with a given question and context, 
    then print the question, context, and the model's response.

    Args:
        q (str): The question to ask.
        context (str): The context or role to provide to the model.
    """
    response = chain.invoke(input={'context': context, 'question': q})
    
    print('*** \n')
    print(f"Question - {q}")
    print(f"Context - {context} \n")
    print("Response: \n", response)

    return response


def iterate_responses(q_list, c_list):

    response_list = []
    df_responses = []
    
    for combo in itertools.product(q_list, c_list):
        response = response_gen(q=combo[0], context=combo[1])

        data = {
        'question': [combo[0]],
        'context': [combo[1]],
        'response': [response]
        }
    
        df = pd.DataFrame(data)
    
        response_list.append(df)

    df_responses = pd.concat(response_list)
    df_responses.reset_index(inplace=True, drop=True)

    return df_responses

In [26]:
q_list1 = ['Resulting from this Act, which groups would be harmed most?']

c_comp_list = ['Role: you are a US citizen with no political affiliation',\
        'Role: you are a staunch Republican',\
        'Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document',\
        'Role: you are a staunch Republican and Trump Supporter, at a bar with only 15 seconds to explain your stance on the document']

df_responses = iterate_responses(q_list=q_list1, c_list=c_comp_list)

*** 

Question - Resulting from this Act, which groups would be harmed most?
Context - Role: you are a US citizen with no political affiliation 

Response: 
 The question seems to be referring to the potential effects of the proposed legislation on various groups. Based on the provided text, it appears that the legislation may harm certain groups in the following ways:

1. Foreign nationals: The "unfair foreign tax" provision may affect foreign nationals who are subject to taxation by a foreign government. However, this is not necessarily a group that would be harmed most.
2. Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulations and costs. However, this is not necessarily a group that would be harmed most.
3. Low-income individuals: The legislation includes provisions related to health insurance, such as the "exchange enrollment verification requirement" and the "premium adjustment percentage." These

In [27]:
pd.set_option('display.max_colwidth', None)
df_responses.head()

Unnamed: 0,question,context,response
0,"Resulting from this Act, which groups would be harmed most?",Role: you are a US citizen with no political affiliation,"The question seems to be referring to the potential effects of the proposed legislation on various groups. Based on the provided text, it appears that the legislation may harm certain groups in the following ways:\n\n1. Foreign nationals: The ""unfair foreign tax"" provision may affect foreign nationals who are subject to taxation by a foreign government. However, this is not necessarily a group that would be harmed most.\n2. Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulations and costs. However, this is not necessarily a group that would be harmed most.\n3. Low-income individuals: The legislation includes provisions related to health insurance, such as the ""exchange enrollment verification requirement"" and the ""premium adjustment percentage."" These provisions may affect low-income individuals who rely on government-subsidized health insurance plans.\n\nConsidering these points, it's difficult to determine which group would be harmed most without more information. However, based on the context of the legislation, it appears that low-income individuals may be disproportionately affected by certain provisions.\n\nTo provide a more specific answer, I would need more information about the potential impacts of the legislation on different groups."
1,"Resulting from this Act, which groups would be harmed most?",Role: you are a staunch Republican,"The resulting groups that would be harmed most by this Act are low-income households and certain rural communities. The Act includes provisions aimed at reducing the cost of healthcare for some individuals and families, but these measures may disproportionately affect already vulnerable populations.\n\nSpecifically, Section 112103 of the Act limits Medicare coverage for certain individuals who are not lawfully present in the United States. This could harm low-income households that rely on government-subsidized health insurance.\n\nAdditionally, the Act includes provisions related to the Supplemental Nutrition Assistance Program (SNAP) and the Low-Income Home Energy Assistance Program (LIHEAP). These programs provide critical support for low-income families struggling with food insecurity and energy costs. Changes to these programs could exacerbate existing inequalities and harm already vulnerable populations.\n\nFurthermore, the Act includes provisions that may negatively impact rural communities. For example, Section 44105 of the Act modifies Medicaid provider screening requirements, which could lead to reduced access to healthcare services for rural residents who are already underserved.\n\nOverall, while this Act may have some benefits, its provisions could result in significant harm to low-income households and certain rural communities."
2,"Resulting from this Act, which groups would be harmed most?","Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document","""This bill targets and harms foreign-influenced entities, private foundations in discriminatory foreign countries, and certain corporations owned by those nations. It's a direct attack on our sovereignty and values."" (15-second response)"
3,"Resulting from this Act, which groups would be harmed most?","Role: you are a staunch Republican and Trump Supporter, at a bar with only 15 seconds to explain your stance on the document","""Folks, let me tell you, this Act is a disaster. It's a total disaster. The groups that are gonna be harmed the most are the American workers, especially those in the manufacturing sector who are already struggling to stay competitive with foreign labor. And let's not forget our allies abroad, they're gonna feel the pinch too. And of course, our fellow Trump supporters, we're gonna have to take on the deep state and the swamp in Washington to fight for what's right."""


### Leverage RAGAS for Evaluating Prompts

In [32]:
# load_dotenv()

# if not os.environ.get("OPENAI_API_KEY"):
#     os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

# llm_eval = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
# Build query engine
# def build_query_engine(llm):
#     loader = TextLoader("./notebooks_nyc_wikipedia_nyc_text.txt")
#     data = loader.load()

#     text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
#     all_splits = text_splitter.split_documents(data)

#     vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

#     return RetrievalQA.from_chain_type(
#         llm,
#         retriever=vectorstore.as_retriever(),
#         return_source_documents=True
#     )

In [63]:
# Evaluation questions, contexts, and answers
eval_questions = ["Resulting from this Act, which groups would be harmed most?"]

eval_answers = [
   "The people who would be harmed most by this Act are the non-citizen individuals and foreign corporations. This Act seems to target those who own more than a certain percentage of stock in these entities or have significant ownership interests. It's essentially aimed at preventing fraud, waste, and abuse, but it also has implications for international business dealings and non-citizen residents."
]

contexts = [
    "Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document"
]

# pull ground truth from answer with context = 'US citizen with no political affiliation'
ground_truth = [
   "Based on the provided text, it appears that the legislation may harm certain groups in the following ways: Foreign nationals: The 'unfair foreign tax' provision may affect foreign nationals who are subject to taxation by a foreign government. However, this is not necessarily a group that would be harmed most. Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulations and costs. However, this is not necessarily a group that would be harmed most. Low-income individuals: The legislation includes provisions related to health insurance, such as the 'exchange enrollment verification requirement' and the 'premium adjustment percentage.' These provisions may affect low-income individuals who rely on government-subsidized health insurance plans." 
]

In [None]:
# Run the queries as a batch for efficiency
# predictions = chain.batch([ex["query"] for ex in examples])

In [None]:
# dataset_dict = {
#     "user_input": eval_questions[0],
#     "response": eval_answers[0],
#     "retrieved_contexts": contexts,
#     "reference": ground_truth[0]
# }

# dataset_dict


{'user_input': 'Resulting from this Act, which groups would be harmed most?',
 'response': "The people who would be harmed most by this Act are the non-citizen individuals and foreign corporations. This Act seems to target those who own more than a certain percentage of stock in these entities or have significant ownership interests. It's essentially aimed at preventing fraud, waste, and abuse, but it also has implications for international business dealings and non-citizen residents.",
 'retrieved_contexts': ['Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document'],
 'reference': "Based on the provided text, it appears that the legislation may harm certain groups in the following ways: Foreign nationals: The 'unfair foreign tax' provision may affect foreign nationals who are subject to taxation by a foreign government. However, this is not necessarily a group that would be harmed most. Small businesses: Some provisions, such as the re

In [100]:
data_samples = {
    'question': [
         eval_questions[0]
    ],
    'answer': [
         eval_answers[0]
    ],
    'contexts': [
        [
            contexts[0]
        ]
    ],
    'ground_truth': [
       'Foreign nationals: The unfair foreign tax provision may affect foreign nationals who are subject to taxation by a foreign government. \
       Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulations and costs. However, this is not necessarily a group that would be harmed most. \
       Low-income individuals: The legislation includes provisions related to health insurance, such as the exchange enrollment verification requirement and the premium adjustment percentage. These provisions may affect low-income individuals who rely on government-subsidized health insurance plans. \
       Health care workers: The legislation includes provisions that may impact health care workers, particularly those involved in administering or managing government-subsidized health insurance programs. \
       Clean energy companies: The legislation includes provisions that may impact clean energy companies, particularly those involved in renewable energy projects and environmental regulations.'
    ]
}

dataset = Dataset.from_dict(data_samples)

json_formatted_string = json.dumps(data_samples, indent=4)
print(json_formatted_string)



{
    "question": [
        "Resulting from this Act, which groups would be harmed most?"
    ],
    "answer": [
        "The people who would be harmed most by this Act are the non-citizen individuals and foreign corporations. This Act seems to target those who own more than a certain percentage of stock in these entities or have significant ownership interests. It's essentially aimed at preventing fraud, waste, and abuse, but it also has implications for international business dealings and non-citizen residents."
    ],
    "contexts": [
        [
            "Role: you are a staunch Republican, at a bar with only 15 seconds to explain your stance on the document"
        ]
    ],
    "ground_truth": [
        "Foreign nationals: The unfair foreign tax provision may affect foreign nationals who are subject to taxation by a foreign government.        Small businesses: Some provisions, such as the repeal of EPA rules and NHTSA standards, may benefit small businesses by reducing regulat

In [89]:
# EvaluationDataset standardizes the collected evaluation examples into a structured format
# evaluation_dataset = EvaluationDataset.from_list([dataset_dict])
# evaluation_dataset

In [101]:
class TestCallback(BaseCallbackHandler):

    def on_llm_start(self, serialized, prompts, **kwargs):
        print(f"**********Prompts*********:\n {prompts[0]}\n\n")

    def on_llm_end(self, response, **kwargs):
        print(f"**********Response**********:\n {response}\n\n")

In [104]:
evaluator_llm = LangchainLLMWrapper(llm)
evaluator_embed = LangchainEmbeddingsWrapper("nomic-embed-text")

# test only one metric for debugging
result_faith = evaluate(
    dataset=dataset,
    metrics=[faithfulness],  # Just one metric
    llm=evaluator_llm,
    embeddings=evaluator_embed
)

print(result_faith)

Evaluating:   0%|          | 0/1 [00:00<?, ?it/s]

Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt statement_generator_prompt failed to parse output: The output parser failed to parse the output including retries.
Exception raised in Job[0]: RagasOutputParserException(The output parser failed to parse the output including retries.)


{'faithfulness': nan}


In [None]:


# Evaluate using Ragas
result = evaluate(
    dataset,
    metrics=[
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall
    ],
    llm=evaluator_llm,
    embeddings=evaluator_embed,
    run_config=RunConfig(timeout=300, max_retries=10, max_wait=300, log_tenacity=False),
    raise_exceptions=True, 
    callbacks=[TestCallback()]
)

print(result)

Evaluating:   0%|          | 0/4 [00:00<?, ?it/s]

**********Prompts*********:
 Human: Given a question and an answer, analyze the complexity of each sentence in the answer. Break down each sentence into one or more fully understandable statements. Ensure that no pronouns are used in any statement. Format the outputs in JSON.
Please return the output in a JSON format that complies with the following schema as specified in JSON Schema:
{"properties": {"statements": {"description": "The generated statements", "items": {"type": "string"}, "title": "Statements", "type": "array"}}, "required": ["statements"], "title": "StatementGeneratorOutput", "type": "object"}Do not use single quotes in your response but double quotes,properly escaped with a backslash.

--------EXAMPLES-----------
Example 1
Input: {
    "question": "Who was Albert Einstein and what is he best known for?",
    "answer": "He was a German-born theoretical physicist, widely acknowledged to be one of the greatest and most influential physicists of all time. He was best known 

Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt statement_generator_prompt failed to parse output: The output parser failed to parse the output including retries.


**********Response**********:
 generations=[[ChatGeneration(text='Here is the corrected output:\n\n```\n{\n    "statements": [\n        "Non-citizen individuals would be harmed most by this Act.",\n        "Foreign corporations would be harmed most by this Act.",\n        "This Act seems to target those who own more than a certain percentage of stock in these entities or have significant ownership interests.",\n        "It\'s essentially aimed at preventing fraud, waste, and abuse.",\n        "It also has implications for international business dealings and non-citizen residents."\n    ]\n}\n```\n\nNote that I removed the extra newline characters from the output.', generation_info={'model': 'llama3.2', 'created_at': '2025-07-08T21:57:50.434156Z', 'done': True, 'done_reason': 'stop', 'total_duration': 21656684083, 'load_duration': 72039083, 'prompt_eval_count': 4096, 'prompt_eval_duration': 15359814000, 'eval_count': 115, 'eval_duration': 6220640500, 'model_name': 'llama3.2'}, message=A

RagasOutputParserException: The output parser failed to parse the output including retries.