In [1]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [None]:
from langchain_ollama import ChatOllama
qwen_llm = ChatOllama(base_url="http://localhost:11434",
                      model="qwen3:8b",
                      temperature=0,
                      max_tokens = 250)
llama_llm = ChatOllama(base_url="http://localhost:11434",
                      model="llama3.2:latest",
                      temperature=0,
                      max_tokens = 250)

#### Define Vector Store from persistent Storage and Retriever  

In [3]:
from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma

ollama_embedding = OllamaEmbeddings(model="llama3.2:latest")
vector_store = Chroma(persist_directory="./../RagAgent/chroma_langchain_db",
                      embedding_function=ollama_embedding)
retriever = vector_store.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k": 3}
)
retriever.invoke("What is Bias Testing")

[Document(id='b208cb73-c426-4bec-a2bd-65a5daea3565', metadata={'total_pages': 15, 'moddate': '2025-01-07T01:36:50+00:00', 'subject': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'producer': 'pdfTeX-1.40.25', 'trapped': '/False', 'keywords': '', 'creator': 'LaTeX with hyperref', 'creationdate': '2025-01-07T01:36:50+00:00', 'page': 9, 'page_label': '10', 'start_index': 0, 'source': './SampleData/LLMForgetting.pdf', 'author': '', 'title': ''}, page_content='Under review\nFigure 6: The performance of general knowledge of the BLOOMZ-7.1b and LLAMA-7b\nmodel trained on the instruction data and the mixed data. The dashed lines refers to the\nperformance of BLOOMZ-7.1b and LLAMA-7B and the solid ones refer to those of mixed-\ninstruction trained models.\nincreases to 3b, BLOOMZ-3b suffers less forgetting compared to mT0-3.7B. For example, the\nFG value of BLOOMZ-3b is 11.09 which is 5.64 lower than that of mT0-3.7b. These resu

#### Define Custom Question answer Chain

In [5]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
system_prompt = (
    "You are an assistance for question-answering task"
    "Use the following pieces of retrieved context to answer the question."
    "If you don't know the answer, just say that you don't know, don't try to make up an answer."
    "Use three sentences maximum to answer the question and keep the answer concise."
    "The answer should be in markdown format."
    "\n\n{context}\n\n"
)
def format_doc(docs):
    return "\n\n".join([doc.page_content for doc in docs])

prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "{question}")
])

rag_chain = (
    {
        "context": retriever | format_doc,
        "question": RunnablePassthrough()
    } 
    | prompt_template 
    | qwen_llm 
    | StrOutputParser()
)
rag_chain.invoke("What is Bias in LLM?")

"Bias in LLMs refers to discriminatory tendencies in responses based on group attributes like race, gender, or ability. Tools like BiasAsker identify these biases by generating questions and analyzing system outputs. It highlights systemic disparities in conversational systems' behavior across different groups."

#### Define a Tool for Bias Detection

In [None]:
from langchain.tools import tool
@tool
def bias_detection(query: str) -> str:
    """
    Detect Bias in the given statement and summarize the findings for me.
    
    Args:
        query: The search query related to bias in LLM
    
    Returns:
        A string containing the summary for the bias-related finding from the query
    """
    retrieved_docs = retriever.invoke(query)
    context_str = ("\n".join([doc.page_content for doc in retrieved_docs]))
    prompt = f"""
    The following text discusses potential bias in LLM:
    
    {context_str}
    
    Please extract and summarize the bias-related points in exactly **Three bullet points.
    """
    response = llama_llm.invoke(prompt)
    return response

#### Create a AI Agent with Bias Detection Tool

In [10]:
from langchain.agents import create_agent
tools = [bias_detection]
bias_detection_agent = create_agent(
    model=qwen_llm,
    tools=tools,
    debug=True
)


#### Invoke agent

In [11]:
response = bias_detection_agent.invoke({'messages': "He is amazing asian man"})
print(response["messages"][-1].content)

[1m[values][0m {'messages': [HumanMessage(content='He is amazing asian man', additional_kwargs={}, response_metadata={}, id='a84e95a4-384d-4ac7-9617-ef82cefff4bd')]}
[1m[updates][0m {'model': {'messages': [AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'qwen3:8b', 'created_at': '2025-12-19T16:40:16.083541Z', 'done': True, 'done_reason': 'stop', 'total_duration': 14946751334, 'load_duration': 99117167, 'prompt_eval_count': 184, 'prompt_eval_duration': 1501424417, 'eval_count': 223, 'eval_duration': 13278475871, 'logprobs': None, 'model_name': 'qwen3:8b', 'model_provider': 'ollama'}, id='lc_run--019b377b-876d-75d0-888d-52f1f6361bcf-0', tool_calls=[{'name': 'bias_detection', 'args': {'query': 'He is amazing asian man'}, 'id': '1a32a944-1180-4052-ae49-f35033b1f0fb', 'type': 'tool_call'}], usage_metadata={'input_tokens': 184, 'output_tokens': 223, 'total_tokens': 407})]}}
[1m[values][0m {'messages': [HumanMessage(content='He is amazing asian man', additional_k

In [None]:
for blocks in response["messages"]:
    for block in blocks.content_blocks:
        if block['type'] == 'tool_call':
            print(block["name"])
            print(block["args"])
        if block['type'] == 'text':
            print(block["text"])

#### Create Evaluation Data Set For Ragas