# Policy Navigator Agent

is an agent designed to search, extract, and summarize complex policy and regulation documents 

In [None]:
from Secret import apikey
import os
os.environ["TEAM_API_KEY"] = apikey

from aixplain.factories import BenchmarkFactory, DatasetFactory, MetricFactory, ModelFactory,AgentFactory
from aixplain.modules.agent import OutputFormat
import pandas as pd
from tqdm import tqdm
from aixplain.enums import Function, Supplier
from aixplain.factories import IndexFactory, AgentFactory, ModelFactory
from aixplain.enums import EmbeddingModel
from aixplain.modules.model.record import Record
import requests
import gradio as gr
from notion_client import Client







In [None]:
DEFAULT_CSV = "dataset/gdpr_text.csv" # https://www.kaggle.com/datasets/jessemostipak/gdpr-violations?select=gdpr_text.csv

DEFAULT_URL = "https://www.ftc.gov/business-guidance/privacy-security"

try:
    df = pd.read_csv(DEFAULT_CSV)
except Exception as e:
    raise RuntimeError(f"Failed to load dataset: {e}")

df.head()


Unnamed: 0,chapter,chapter_title,article,article_title,sub_article,gdpr_text,href
0,1,General provisions,1,Subject-matter and objectives,1,This Regulation lays down rules relating to th...,http://gdpr-info.eu/art-1-gdpr/
1,1,General provisions,1,Subject-matter and objectives,2,This Regulation protects fundamental rights an...,http://gdpr-info.eu/art-1-gdpr/
2,1,General provisions,1,Subject-matter and objectives,3,The free movement of personal data within the ...,http://gdpr-info.eu/art-1-gdpr/
3,1,General provisions,2,Material scope,1,This Regulation applies to the processing of p...,http://gdpr-info.eu/art-2-gdpr/
4,1,General provisions,2,Material scope,2,This Regulation does not apply to the processi...,http://gdpr-info.eu/art-2-gdpr/


In [None]:
IndexFactory.list()

# index

In [None]:
index_list = IndexFactory.list(query="General Data Protection Regulation (EU)")
for index in index_list["results"]:
  index.delete()

index_main = IndexFactory.create(
    name="General Data Protection Regulation (EU)",
    description=" General Data Protection Regulation (EU) dataset privacy & security.",
    embedding_model=EmbeddingModel.OPENAI_ADA002
)

records = [
    Record(
        value=row["gdpr_text"],
        attributes={"source": "gdpr_text.csv"}
    )
    for _, row in df.iterrows()
]
index_main.upsert(records)




# Scrape

In [7]:
Scrape_Website_Tool = ModelFactory.get("66f423426eb563fa213a3531")
model_params = Scrape_Website_Tool.get_parameters()
print(model_params)

#model_params.text = DEFAULT_URL # chech if this correct or not 
#print(model_params)


INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/66f423426eb563fa213a3531 - {'Authorization': 'Token 1ed71f66abf8c7ed9f8ddfda752f6bb078c2179ae5bc5038b888cd6d776d9b55', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 66f423426eb563fa213a3531 instantiated.


Parameters:
  - text: Not set (Required)


In [8]:
scrape_response = Scrape_Website_Tool.run({"text": DEFAULT_URL})
scraped_text = scrape_response.data




In [None]:
for index in IndexFactory.list(query="Scrape Text Index")["results"]:
    index.delete()

index_scrape = IndexFactory.create(
    name="Scrape Text Index",
    description="Text content scraped from FTC privacy/security guidance page."
)
index_scrape.upsert(
    [Record(value=scraped_text, attributes={"source": DEFAULT_URL})]
    )


## link with external api coustom python tool

In [None]:
def get_federal_register_docs(term: str, from_date: str = "2024-01-01"):
    """
    Returns documents from the Federal Register API based on a search term and publication date.
    """

    url = "https://www.federalregister.gov/api/v1/documents.json"
    params = {
        "per_page": 2,
        "order": "newest",
        "conditions[publication_date][gte]": from_date,
        "conditions[term]": term
    }

    try:
        response = requests.get(url, params=params)
        data = response.json()

        results = []
        for doc in data.get("results", []):
            title = doc.get("title")
            link = doc.get("html_url")
            date = doc.get("publication_date")
            results.append(f"{date} - {title}\n🔗 {link}")

        return results if results else [f"No documents found for: {term} after {from_date}"]
    except Exception as e:
        return [f"Error: {str(e)}"]



In [15]:
utility_list = ModelFactory.list(query="Get federal policy docs")
for utility in utility_list["results"]:
    utility.delete()

external = ModelFactory.create_utility_model(
    name="Get federal policy docs",
    description="Retrieve latest documents from the U.S. Federal Register based on a topic.",
    code=get_federal_register_docs
)


INFO:root:Start service for POST Models Paginate - https://platform-api.aixplain.com/sdk/models/paginate - {'Authorization': 'Token 1ed71f66abf8c7ed9f8ddfda752f6bb078c2179ae5bc5038b888cd6d776d9b55', 'Content-Type': 'application/json'} - {"q": "Get federal policy docs", "pageNumber": 0, "pageSize": 20}
INFO:root:Listing Models: Status of getting Models on Page 0: 201
INFO:root:Start service for POST Utility Model - https://platform-api.aixplain.com/sdk/utilities - {'x-api-key': '1ed71f66abf8c7ed9f8ddfda752f6bb078c2179ae5bc5038b888cd6d776d9b55', 'Content-Type': 'application/json'} - {'name': 'Get federal policy docs', 'description': 'Retrieve latest documents from the U.S. Federal Register based on a topic.', 'inputs': [{'name': 'term', 'description': 'The term input is a text', 'type': 'text'}, {'name': 'from_date', 'description': 'The from_date input is a text', 'type': 'text'}], 'code': 's3://aixplain-platform-backend-temp/13768/sdk/1748497451425-2cfd5c7c-8a75-4eca-bc1b-44eda9ac9447',

# sql tool

In [10]:
sql_tool = AgentFactory.create_sql_tool(
    name="SQL Tool for GDPR Violations",
    description="Execute SQL queries on list of GDPR violations, including details such as the country(they have another name in dataset (column name)), fine amount, date of violation, and type of violation.",
    source="dataset/gdpr_violations.csv",
    source_type="csv"
)



# SUMMARIZATION TOOL

In [None]:
SUMMARIZATION_tool = AgentFactory.create_model_tool(
    function=Function.TEXT_SUMMARIZATION,
    supplier=Supplier.MICROSOFT,
    description="Summarizes long text into concise bullet points.",
)

# Agentic RAG

In [32]:
Agentic_RAG = AgentFactory.create(
    name="Policy Navigator Agent",
    description="Agent for policy analysis, allowing users to search, extract, and interpret insights from public regulations and legal guidelines.",
    instructions=(
        """
         You are a Policy Navigator Agent. Your role is to help users understand complex regulations, 
        - such as GDPR or U.S. privacy law, by answering their questions based on indexed content and tool outputs. 
        - Use the tools provided to extract accurate, referenced, and actionable responses. 
        - get the federal policy documents from the utility model.
        - Summrize the policy documents .
.
      """

    ),
    tools=[
        AgentFactory.create_model_tool(model=index_main.id,description="GDPR"),                                  # GDPR data
        AgentFactory.create_model_tool(model=index_scrape.id,description="scraped site content"),             #  scraped site content
        SUMMARIZATION_tool,  # LLM tool for summarizing the response
        AgentFactory.create_model_tool(model=external.id), # Utility to fetch federal policy documents
        sql_tool, # SQL tool for GDPR violations



         

        

    ],
    llm_id="669a63646eb56306647e1091"  # GPT-4o Mini
)


INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/6837f407d208307eba00b39a - {'Authorization': 'Token 1ed71f66abf8c7ed9f8ddfda752f6bb078c2179ae5bc5038b888cd6d776d9b55', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 6837f407d208307eba00b39a instantiated.
INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/6837f407d208307eba00b39a - {'Authorization': 'Token 1ed71f66abf8c7ed9f8ddfda752f6bb078c2179ae5bc5038b888cd6d776d9b55', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 6837f407d208307eba00b39a instantiated.
INFO:root:Start service for GET Model  - https://platform-api.aixplain.com/sdk/models/6837f42060cd84dde2fb78c6 - {'Authorization': 'Token 1ed71f66abf8c7ed9f8ddfda752f6bb078c2179ae5bc5038b888cd6d776d9b55', 'Content-Type': 'application/json'}
INFO:root:Model Creation: Model 6837f42060cd84dde2fb78c6 instantiated.
INFO:root:Start service for GET Model  - https://platform-api.ai

In [13]:
Agentic_RAG_response = Agentic_RAG.run("Get Privacy Act of 1974")
Agentic_RAG_response.data.intermediate_steps

INFO:root:Polling for Model: Start polling for model_process


[{'agent': 'Policy Navigator Agent',
  'input': "{'input': 'Get Privacy Act of 1974', 'chat_history': [], 'outputFormat': 'text'}",
  'output': "{'query': {'documents': [{'date': '2025-05-28', 'title': 'Privacy Act of 1974; Matching Program', 'link': 'https://www.federalregister.gov/documents/2025/05/28/2025-09528/privacy-act-of-1974-matching-program'}, {'date': '2025-05-22', 'title': 'Privacy Act of 1974; System of Records', 'link': 'https://www.federalregister.gov/documents/2025/05/22/2025-09194/privacy-act-of-1974-system-of-records'}, {'date': '2025-05-21', 'title': 'Privacy Act of 1974; Systems of Records', 'link': 'https://www.federalregister.gov/documents/2025/05/21/2025-09116/privacy-act-of-1974-systems-of-records'}, {'date': '2025-05-16', 'title': 'Privacy Act of 1974; System of Records', 'link': 'https://www.federalregister.gov/documents/2025/05/16/2025-08695/privacy-act-of-1974-system-of-records'}, {'date': '2025-05-14', 'title': 'Privacy Act of 1974; Exempting a System of Re

In [None]:
Agentic_RAG_response = Agentic_RAG.run(
    query="Under the GDPR, when does the regulation apply to the processing of personal data ? then summrize it.",
)
Agentic_RAG_response.data["output"]


INFO:root:Polling for Model: Start polling for model_process


'The GDPR applies to the processing of personal data in the context of activities in the EU, regardless of where the processing takes place. It applies to data subjects in the EU, even if the controller or processor is not based in the EU. It lays down rules for protecting personal data and allows for the free movement of data. International agreements related to data transfer to third countries remain in force. The Regulation applies to automated and non-automated processing of personal data. It also applies to controllers not established in the EU if Member State law applies. Some obligations do not apply to small enterprises unless their processing poses a risk to data subjects. Specific measures must be in place to safeguard data subject rights. Data subjects can contact the data protection officer for assistance with their rights under this Regulation.'

In [15]:
Agentic_RAG_response.data["intermediate_steps"]

[{'agent': 'Policy Navigator Agent',
  'input': "{'input': 'Under the GDPR, when does the regulation apply to the processing of personal data? then summrize it.', 'chat_history': [], 'outputFormat': 'text'}",
  'output': 'The GDPR applies to the processing of personal data in the context of activities in the EU, regardless of where the processing takes place. It applies to data subjects in the EU, even if the controller or processor is not based in the EU. It lays down rules for protecting personal data and allows for the free movement of data. International agreements related to data transfer to third countries remain in force. The Regulation applies to automated and non-automated processing of personal data. It also applies to controllers not established in the EU if Member State law applies. Some obligations do not apply to small enterprises unless their processing poses a risk to data subjects. Specific measures must be in place to safeguard data subject rights. Data subjects can c

In [16]:
session_id = Agentic_RAG_response.data.session_id
Agentic_RAG_response = Agentic_RAG.run("Which of the following violations led to some of the highest GDPR fines, according to the gdpr_violations dataset?", session_id=session_id)
Agentic_RAG_response.data.output

INFO:root:Polling for Model: Start polling for model_process


"{'query': {'highest_gdpr_fines': [{'name': 'France', 'fine_amount': 50000000, 'date': '01/21/2019', 'authority': 'French Data Protection Authority (CNIL)'}, {'name': 'France', 'fine_amount': 50000000, 'date': '01/21/2019', 'authority': 'French Data Protection Authority (CNIL)'}, {'name': 'Italy', 'fine_amount': 27802946, 'date': '02/01/2020', 'authority': 'Italian Data Protection Authority (Garante)'}, {'name': 'Italy', 'fine_amount': 27802946, 'date': '02/01/2020', 'authority': 'Italian Data Protection Authority (Garante)'}, {'name': 'Austria', 'fine_amount': 18000000, 'date': '10/23/2019', 'authority': 'Austrian Data Protection Authority (DSB)'}]}}"

In [17]:
Agentic_RAG_response.data["intermediate_steps"]

[{'agent': 'Policy Navigator Agent',
  'input': "{'input': 'Which of the following violations led to some of the highest GDPR fines, according to the gdpr_violations dataset?', 'chat_history': [HumanMessage(content='Under the GDPR, when does the regulation apply to the processing of personal data? then summrize it.', additional_kwargs={}, response_metadata={}), AIMessage(content='The GDPR applies to the processing of personal data in the context of activities in the EU, regardless of where the processing takes place. It applies to data subjects in the EU, even if the controller or processor is not based in the EU. It lays down rules for protecting personal data and allows for the free movement of data. International agreements related to data transfer to third countries remain in force. The Regulation applies to automated and non-automated processing of personal data. It also applies to controllers not established in the EU if Member State law applies. Some obligations do not apply to 

In [55]:
session_id = Agentic_RAG_response.data.session_id
Agentic_RAG_response = Agentic_RAG.run("Which country has issued the highest total GDPR fines according to the dataset??", session_id=session_id)
Agentic_RAG_response.data["intermediate_steps"]

INFO:root:Polling for Model: Start polling for model_process


[{'agent': 'Policy Navigator Agent',
  'input': '{\'input\': \'Which country has issued the highest total GDPR fines according to the dataset??\', \'chat_history\': [HumanMessage(content=\'Under the GDPR, when does the regulation apply to the processing of personal data? then summrize it.\', additional_kwargs={}, response_metadata={}), AIMessage(content="The GDPR applies to the processing of personal data in the context of activities in the EU, whether by a controller or processor. It includes rules for protecting data subjects\' rights and the free movement of data. It also applies to data processing that affects multiple EU Member States, whether automated or not. Certain obligations may not apply to small businesses. Data subjects can contact the data protection officer for help with their rights under this Regulation.", additional_kwargs={}, response_metadata={}), HumanMessage(content=\'Which of the following violations led to some of the highest GDPR fines, according to the gdpr_v

In [64]:
# the agent is able to search for the answer even if the column name is different (country == name).

test_df=pd.read_csv("./dataset/gdpr_violations.csv")

country_fines = test_df.groupby("name")["price"].sum().sort_values(ascending=False)

country = country_fines.index[0]
fines = country_fines.iloc[0]

print(f"The country with the highest total fines is {country} with €{fines:,}")



The country with the highest total fines is France with €102,200,000


In [20]:
Agentic_RAG_response = Agentic_RAG.run("Which supervisory authority issued the most fines?")
Agentic_RAG_response.data["intermediate_steps"]

INFO:root:Polling for Model: Start polling for model_process


[{'agent': 'Policy Navigator Agent',
  'input': "{'input': 'Which supervisory authority issued the most fines?', 'chat_history': [], 'outputFormat': 'text'}",
  'output': 'The supervisory authority that issued the most fines is the Spanish Data Protection Authority (AEPD), with a total of 111 fines.',
  'tool_steps': [{'tool': 'SQL Tool for GDPR Violations_query',
    'input': "{'command': 'SELECT authority, COUNT(*) as fine_count FROM gdpr_violations GROUP BY authority ORDER BY fine_count DESC LIMIT 1;', 'database_url': 's3://aixplain-platform-backend-temp/13768/sdk/1748466343350-gdpr_violations.db'}",
    'output': '| authority | fine_count |\n| --- | --- |\n| Spanish Data Protection Authority (AEPD) | 111 |\n'}],
  'thought': None,
  'runTime': 3.688,
  'usedCredits': 0.00047115,
  'apiCalls': 2,
  'task': None}]

# Gradio UI

In [None]:
def app(user_query, csv_file, custom_url):
    try:

        csv_path = csv_file.name if csv_file is not None else DEFAULT_CSV
        final_url = custom_url if custom_url else DEFAULT_URL

       
        response = Agentic_RAG.run(user_query)

        steps = response.data.intermediate_steps
        if not steps:
            return "⚠️ No intermediate steps found."

        sections = []
        for step in steps:
            sections.append("###  Output:")
            sections.append(step.get("output", "N/A"))

            tool_steps = step.get("tool_steps", [])
            if tool_steps:
                sections.append("###  Tool Steps:")
                for tool in tool_steps:
                    sections.append(f"**Tool**: `{tool.get('tool', 'N/A')}`")
                    #sections.append(f"**Output**: {tool.get('output', 'N/A')}")

            sections.append(f"##  Runtime: {step.get('runTime', 'N/A')} sec |  API Calls: {step.get('apiCalls', 'N/A')}")
            sections.append("---")


        sections.append(f"**CSV Source:** `{csv_path}`")
        sections.append(f"**URL Source:** `{final_url}`")

        return "\n\n".join(sections)

    except Exception as e:
        return f" Error: {str(e)}"



In [None]:
with gr.Blocks() as demo:
    gr.Markdown("## Policy Navigator Agent")

    query_input = gr.Textbox(label="Ask about a policy or regulation", placeholder="example: Under the GDPR, when does the regulation apply to the processing of personal data? then summrize it.")
    csv_input = gr.File(file_types=[".csv"], label="Upload CSV file", height=120)
    url_input = gr.Textbox(label="Enter custom URL (optional) or use default", placeholder="Leave blank to use default FTC link")

    response_output = gr.Markdown(label="Formatted Output")

    submit_btn = gr.Button("Submit")
    submit_btn.click(
        fn=app,
        inputs=[query_input, csv_input, url_input],
        outputs=response_output
    )

demo.launch(share=True)
