In [None]:
from autogen_ext.models.openai import OpenAIChatCompletionClient
import json
import os


from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_agentchat.messages import TextMessage
from autogen_ext.code_executors.docker import DockerCommandLineCodeExecutor
from autogen_agentchat.ui import Console
import os
from dotenv import load_dotenv


In [None]:
load_dotenv()

In [None]:
# Model configuration
ANTHROPIC_MODEL = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4.1")  
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY2")

# Directory configuration
TEMP_DIR = "temp"

In [None]:
CATEGORIZER_SYSTEM_MESSAGE = """
You are an AI financial analyst. Your purpose is to categorize financial transactions into a few broad categories.

You will receive a JSON object that contains:
1. 'transactions_by_cardholder': a dictionary where each key is a cardholder name and    the value is a list of transaction objects.
2. 'summary': a dictionary with account summary data.

Your job:
- Return the exact same JSON object structure.
- Do NOT remove or rename any keys.
- Do NOT modify the 'summary' section.
- Inside 'transactions_by_cardholder', for each transaction object, add a new key-value   pair: "category": "Category Name".

CRITICAL RULES:
Use ONLY the 6 categories defined below.
For payments, refunds, and fees, use the Financial Transactions category.
If a description is too vague, use Uncategorized.

CATEGORY DEFINITIONS:
Food & Dining: All food-related spending. This includes both groceries from supermarkets and purchases from restaurants, cafes, bars, and food delivery services.
Merchandise & Services: A broad category for general shopping and personal care. This includes retail stores, online marketplaces (like Amazon), electronics, clothing, hobbies, entertainment, streaming services (Netflix), gym memberships, and drugstores (CVS).
Bills & Subscriptions: Recurring charges for essential services. This primarily includes utilities (phone, internet) and insurance payments.
Travel & Transportation: Costs for getting around. This includes daily transport (gas stations, Uber, public transit) and long-distance travel (airlines, hotels, rental cars).
Financial Transactions: All non-spending activities that affect your balance. This includes payments made to your account, refunds from merchants, statement credits, and any fees or interest charges.
Uncategorized: For any transaction that does not clearly fit into the categories above.

Output ONLY the JSON with categories added. Do not include any explanations or markdown formatting. Once you output the categorized JSON, you are done - do not continue the conversation.
"""

In [None]:
def get_openai_client():
    """Get configured OpenAI model client."""
    return OpenAIChatCompletionClient(
        model=OPENAI_MODEL, 
        api_key=OPENAI_API_KEY
    )

model_client = get_openai_client()

# Code executor
code_executor = DockerCommandLineCodeExecutor(
    work_dir=TEMP_DIR,
    image="amancevice/pandas",  
    )
await code_executor.start()

NameError: name 'get_openai_client' is not defined

In [None]:
# STAGE 2: Categorizer processes the parsed JSON
# --- Categorizer Agent ---
categorizer_agent = AssistantAgent(
    name="categorizer",
    model_client=model_client,
    system_message=CATEGORIZER_SYSTEM_MESSAGE,
    reflect_on_tool_use=False
        )

categorizer_task = TextMessage(
    content=f"Here is the parsed JSON to categorize:\n```json\n{json.dumps(parsed_json, indent=2)}\n```",
    source="user"
)

max_message_termination = MaxMessageTermination(max_messages=5)

categorizer_team = RoundRobinGroupChat(
    participants=[categorizer_agent],
    termination_condition=max_message_termination
)

categorization_result = await Console(categorizer_team.run_stream(task=categorizer_task))

# Stop executor safely
await code_executor.stop()

# Search categorization result for final JSON
final_parsed_json = None

# Check categorizer messages first
for msg in categorization_result.messages:
    src = getattr(msg, "source", "")
    content = getattr(msg, "content", None)
    try:
        content_str = content if isinstance(content, str) else str(content)
    except Exception:
        content_str = None

    if not content_str:
        continue

    if src == "categorizer":
        final_parsed_json = extract_json_from_text(content_str)
        if final_parsed_json is not None:
            break

In [None]:
from autogen_ext.models.openai import OpenAIChatCompletionClient
import json
import os
import re

from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination
from autogen_agentchat.messages import TextMessage
from autogen_ext.code_executors.docker import DockerCommandLineCodeExecutor
from autogen_agentchat.ui import Console
from dotenv import load_dotenv

load_dotenv()

# Model configuration (left as-is)
ANTHROPIC_MODEL = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4.1")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY2")

# Directory configuration
TEMP_DIR = "temp"
CSV_PATH = os.path.join(TEMP_DIR, "data.csv")

# Updated system prompt for CSV with known columns
CATEGORIZER_SYSTEM_MESSAGE = """
You are an AI financial analyst. Your purpose is to categorize financial transactions in a CSV file into a few broad categories.

INPUT FORMAT:
- You will receive a comma-separated CSV (plain text) with the following columns exactly, in this order:
  bank_name, cardholder, transaction_date, description, amount

YOUR TASK:
- Return the same CSV content and structure with ALL existing rows and columns preserved and unchanged.
- Append a new final column named "Category".
- For each transaction row, write one category value in the new "Category" column.
- Keep the original delimiter (comma), quoting, header order, and row order.
- Do NOT remove or rename columns.
- Do NOT modify any existing cell values (including amount, dates, names, or descriptions).
- Do NOT add extra commentary or markdown. Output ONLY the CSV text.

CATEGORIZATION RULES:
Use ONLY the 6 categories defined below (do not invent additional categories).
- Food & Dining: Food-related spending (groceries, restaurants, cafes, bars, delivery).
- Merchandise & Services: General shopping and personal services (retail, online marketplaces like Amazon, electronics, clothing, hobbies, entertainment, streaming, gyms, drugstores).
- Bills & Subscriptions: Recurring essential services (utilities like phone/internet, insurance).
- Travel & Transportation: Getting around (gas, rideshare/taxi, public transit, airlines, hotels, rental cars).
- Financial Transactions: Non-spending balance changes (payments to account, refunds, statement credits, fees, interest).
- Uncategorized: If it doesn’t clearly fit the above or the description is too vague.

GUIDANCE:
- Use the description text and any clear intent (e.g., “REFUND”, “PAYMENT”, “FEE”, “CREDIT”) to detect Financial Transactions.
- If the description is ambiguous and not clearly Food, Merchandise/Services, Bills, or Travel/Transportation, use Uncategorized.
- Do not change the amount sign or format; the amount field is informational only for categorization.

Output ONLY the CSV with the additional "Category" column appended. Do not include any explanations or markdown formatting. Once you output the categorized CSV, you are done—do not continue the conversation.
"""

def get_openai_client():
    """Get configured OpenAI model client."""
    return OpenAIChatCompletionClient(
        model=OPENAI_MODEL,
        api_key=OPENAI_API_KEY
    )

model_client = get_openai_client()

# Helper: robustly extract CSV content if the model wraps output in fences
def extract_csv_from_text(text: str):
    if not text:
        return None
    # Prefer the first fenced block if present
    fence_match = re.search(r"```(?:csv)?\s*(.*?)\s*```", text, flags=re.DOTALL | re.IGNORECASE)
    if fence_match:
        return fence_match.group(1).strip()
    # Otherwise assume raw CSV
    # Sanity check: must contain at least one newline and commas
    if ("\n" in text) and ("," in text):
        return text.strip()
    return None

# Ensure the CSV exists
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"CSV not found at {CSV_PATH}. Please place 'data.csv' in the 'temp' directory.")

# Read CSV to text
with open(CSV_PATH, "r", encoding="utf-8") as f:
    input_csv_text = f.read()

# Categorizer Agent
categorizer_agent = AssistantAgent(
    name="categorizer",
    model_client=model_client,
    system_message=CATEGORIZER_SYSTEM_MESSAGE,
    reflect_on_tool_use=False,
)

# User task: provide the CSV and request categorization
categorizer_task = TextMessage(
    content=(
        "The input CSV columns are exactly: bank_name, cardholder, transaction_date, description, amount.\n"
        "Append a new final column named 'Category', categorize each row using ONLY the allowed categories, "
        "and return ONLY the CSV text with the new column included.\n\n"
        f"{input_csv_text}"
    ),
    source="user",
)

# Termination condition to avoid loops
max_message_termination = MaxMessageTermination(max_messages=5)

# Team using RoundRobinGroupChat (single assistant agent)
categorizer_team = RoundRobinGroupChat(
    participants=[categorizer_agent],
    termination_condition=max_message_termination,
)

# Run and stream to console (works in Jupyter via top-level await)
categorization_result = await Console(categorizer_team.run_stream(task=categorizer_task))

# Extract the final CSV output from the assistant messages
final_csv_text = None
for msg in categorization_result.messages:
    src = getattr(msg, "source", "")
    content = getattr(msg, "content", None)
    if not content:
        continue
    content_str = content if isinstance(content, str) else str(content)
    if src == "categorizer":
        maybe_csv = extract_csv_from_text(content_str)
        if maybe_csv:
            final_csv_text = maybe_csv
            break

if not final_csv_text:
    raise RuntimeError("Failed to extract categorized CSV from assistant response.")

# Overwrite the original CSV with the categorized data
with open(CSV_PATH, "w", encoding="utf-8", newline="") as f:
    f.write(final_csv_text if final_csv_text.endswith("\n") else final_csv_text + "\n")

print(f"Categorized CSV written to: {CSV_PATH}")