In [1]:
import os
import pandas as pd
from dotenv import load_dotenv

from langchain_openai import ChatOpenAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser

load_dotenv()

  from .autonotebook import tqdm as notebook_tqdm

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "c:\Users\kevin\anaconda3\envs\torch\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\kevin\anaconda3\envs\torch\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\kevin\anaconda3\envs\torch\Lib\site-packages\ipykernel\kernelapp.py", line 75

True

In [2]:
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0, 
    model_kwargs={'response_format': {'type': 'json_object'}}
)

In [3]:
router_prompt = PromptTemplate(
    template="""You are a Fraud Analysis Assistant with access to two specialized tools:
    
    1. 'sql_analysis_tool': Use this for questions about transaction counts, specific fraud cases, trends over time, or quantitative data stored in the 'TRXFraud' database.
       - Keywords: "how many", "count", "transactions", "highest fraud", "fluctuation", "total value", "statistics".
       
    2. 'policy_rag_tool': Use this for questions about fraud detection techniques, policies, definitions, qualitative descriptions, or interpreting charts/graphs from the PDF.
       - Keywords: "methods", "indicators", "techniques", "policy", "what is", "explain", "chart", "graph".
       
    Analyze the user's question and choose the correct tool. Do not guess.
    
    User Question: {question}
    
    Return JSON with a single key 'tool' with value 'sql_analysis_tool' or 'policy_rag_tool'."""
    ,
    input_variables=["question"]
)

router_chain = router_prompt | llm | JsonOutputParser()

In [4]:
def test_router(question):
    try:
        result = router_chain.invoke({"question": question})
        return result.get("tool", "error")
    except Exception as e:
        return f"error: {str(e)}"

In [None]:
test_cases = [
    # SQL Questions
    ("How many transactions are in the database?", "sql_analysis_tool"),
    ("What is the total value of fraud in 2020?", "sql_analysis_tool"),
    ("Which merchant category has the highest fraud?", "sql_analysis_tool"),
    ("Show me the monthly fluctuation of transactions.", "sql_analysis_tool"),
    ("How many fraud cases happened in Canada?", "sql_analysis_tool"),

    # RAG Questions
    ("What are the techniques for counterfeit cards?", "policy_rag_tool"),
    ("Explain the impact of fraud on cardholders.", "policy_rag_tool"),
    ("What are the core components of a detection system?", "policy_rag_tool"),
    ("Does the document mention neural networks?", "policy_rag_tool"),
    ("What is the difference between online and offline fraud?", "policy_rag_tool")
]

In [None]:
results = []
correct_count = 0

for q, expected in test_cases:
    predicted = test_router(q)
    is_correct = (predicted == expected)
    if is_correct: correct_count += 1
    
    results.append({
        "Question": q,
        "Expected": expected,
        "Predicted": predicted,
        "Correct": "YES" if is_correct else "NO"
    })





In [7]:
df_router = pd.DataFrame(results)
accuracy = correct_count / len(test_cases)

print(f"Router Accuracy: {accuracy:.0%}")
print(df_router.to_string())

Router Accuracy: 100%
                                                   Question           Expected          Predicted Correct
0                How many transactions are in the database?  sql_analysis_tool  sql_analysis_tool     YES
1                 What is the total value of fraud in 2020?  sql_analysis_tool  sql_analysis_tool     YES
2            Which merchant category has the highest fraud?  sql_analysis_tool  sql_analysis_tool     YES
3          Show me the monthly fluctuation of transactions.  sql_analysis_tool  sql_analysis_tool     YES
4                  How many fraud cases happened in Canada?  sql_analysis_tool  sql_analysis_tool     YES
5            What are the techniques for counterfeit cards?    policy_rag_tool    policy_rag_tool     YES
6               Explain the impact of fraud on cardholders.    policy_rag_tool    policy_rag_tool     YES
7       What are the core components of a detection system?    policy_rag_tool    policy_rag_tool     YES
8                Does th