In [None]:
from autogen_ext.models.openai import OpenAIChatCompletionClient
import json
import os
import re

from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_agentchat.conditions import MaxMessageTermination, TextMentionTermination
from autogen_agentchat.messages import TextMessage
from autogen_ext.code_executors.docker import DockerCommandLineCodeExecutor
from autogen_agentchat.ui import Console
from dotenv import load_dotenv

load_dotenv()

# Model configuration (left as-is)
ANTHROPIC_MODEL = os.getenv("ANTHROPIC_MODEL", "claude-sonnet-4-20250514")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4.1-mini")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY2")

# Directory configuration
TEMP_DIR = "temp"
CSV_PATH = os.path.join(TEMP_DIR, "data.csv")

# Updated system prompt for CSV with known columns
CATEGORIZER_SYSTEM_MESSAGE = """
You are an AI financial analyst. You will receive a csv file named 'data.csv'.
Your purpose is to categorize financial transactions in a CSV file into a few broad categories.

INPUT FORMAT:
- You will receive a comma-separated CSV (plain text) named 'data.csv' with the following columns exactly, in this order:
  bank_name, cardholder, transaction_date, description, amount

YOUR TASK:
- Return the same CSV content and structure with ALL existing rows and columns preserved and unchanged.
- Append a new final column named "category".
- For each transaction row, write one category value in the new "category" column.
- Keep the original delimiter (comma), quoting, header order, and row order.
- Do NOT remove or rename columns.
- Do NOT modify any existing cell values (including amount, dates, names, or descriptions).
- Do NOT add extra commentary or markdown. Output ONLY the CSV text.

CATEGORIZATION RULES:
Use ONLY the 8 categories defined below (do not invent additional categories).

- Food & Dining: Food-related spending (groceries, restaurants, cafes, bars, delivery).
- Merchandise & Services: General shopping and personal/professional services
  (retail stores, online marketplaces like Amazon, electronics, clothing, household goods, gifts, salons/spas, non-auto repair/services).
  Excludes health/medical, entertainment/gyms/streaming, utilities/insurance, and transportation.
- Bills & Subscriptions: Recurring essential services
  (utilities like electricity/water/gas, phone, internet, insurance premiums).
- Travel & Transportation: Getting around
  (gas/EV charging, rideshare/taxi, public transit, parking, tolls, airlines, hotels, rental cars).
- Health & Wellness: Healthcare spending
  (doctors, dentists, hospitals/clinics, labs, pharmacies/drugstores, vision, mental health, medical equipment/supplies).
- Entertainment & Leisure: Non-essential fun/recreation
  (streaming services, gyms/fitness, movies, concerts, sports/events, amusement parks, gaming, books/music/media, hobbies).
- Financial Transactions: Non-spending balance changes
  (payments to account, refunds, statement credits, chargebacks, cash advances, balance transfers,
   fees, interest).
- Uncategorized: If it does not clearly fit the above or the description is too vague.

GUIDANCE:
- Use the description text and any clear intent (e.g., “REFUND”, “PAYMENT”, “FEE”, “CREDIT”) to detect Financial Transactions.
- If the description is ambiguous and not clearly identified, use Uncategorized.
- Do not change the amount sign or format; the amount field is informational only for categorization.

Output ONLY the CSV with the additional "category" column appended. Do not include any explanations or markdown formatting. 
Once you output the categorized CSV, you are done, do not continue the conversation. End with STOP.
"""

def get_openai_client():
    """Get configured OpenAI model client."""
    return OpenAIChatCompletionClient(
        model=OPENAI_MODEL,
        api_key=OPENAI_API_KEY
    )

model_client = get_openai_client()

# Helper: robustly extract CSV content if the model wraps output in fences
def extract_csv_from_text(text: str):
    if not text:
        return None
    # Prefer the first fenced block if present
    fence_match = re.search(r"```(?:csv)?\s*(.*?)\s*```", text, flags=re.DOTALL | re.IGNORECASE)
    if fence_match:
        return fence_match.group(1).strip()
    # Otherwise assume raw CSV
    # Sanity check: must contain at least one newline and commas
    if ("\n" in text) and ("," in text):
        return text.strip()
    return None

# Ensure the CSV exists
if not os.path.exists(CSV_PATH):
    raise FileNotFoundError(f"CSV not found at {CSV_PATH}. Please place 'data.csv' in the 'temp' directory.")

# Read CSV to text
with open(CSV_PATH, "r", encoding="utf-8") as f:
    input_csv_text = f.read()

# Categorizer Agent
categorizer_agent = AssistantAgent(
    name="categorizer",
    model_client=model_client,
    system_message=CATEGORIZER_SYSTEM_MESSAGE,
    reflect_on_tool_use=False,
)

# User task: provide the CSV and request categorization
categorizer_task = TextMessage(
    content=(
        "The input CSV columns are exactly: bank_name, cardholder, transaction_date, description, amount.\n"
        "Append a new final column named 'Category', categorize each row using ONLY the allowed categories, "
        "and return ONLY the CSV text with the new column included.\n\n"
        f"{input_csv_text}"
    ),
    source="user",
)

# Termination condition to avoid loops
max_message_termination = MaxMessageTermination(max_messages=2)

# Team using RoundRobinGroupChat (single assistant agent)
categorizer_team = RoundRobinGroupChat(
    participants=[categorizer_agent],
    termination_condition= TextMentionTermination("STOP") | max_message_termination,
)

# Run and stream to console (works in Jupyter via top-level await)
categorization_result = await Console(categorizer_team.run_stream(task=categorizer_task))

# Extract the final CSV output from the assistant messages
final_csv_text = None
for msg in categorization_result.messages:
    src = getattr(msg, "source", "")
    content = getattr(msg, "content", None)
    if not content:
        continue
    content_str = content if isinstance(content, str) else str(content)
    if src == "categorizer":
        maybe_csv = extract_csv_from_text(content_str)
        if maybe_csv:
            final_csv_text = maybe_csv
            break

if not final_csv_text:
    raise RuntimeError("Failed to extract categorized CSV from assistant response.")

# Overwrite the original CSV with the categorized data
with open(CSV_PATH, "w", encoding="utf-8", newline="") as f:
    f.write(final_csv_text if final_csv_text.endswith("\n") else final_csv_text + "\n")

print(f"Categorized CSV written to: {CSV_PATH}")