In [None]:
import json

import duckdb
from jinja2 import Template

from src.duckdb_prompt_udf import prompt

# Connect to DuckDB
con = duckdb.connect(database=":memory:", read_only=False)

# Register the Python function as a scalar UDF
con.create_function("prompt", prompt, [str, str, str, float], str)

"Done!"

In [None]:
system_prompt = (
    "You are a government auditor reviewing service request resolutions from New York Citys 311 system.\n"
    "Your task is to evaluate if the resolution description demonstrates appropriate action was taken to address the reported issue.\n\n"
    "Guidelines for evaluation:\n"
    "- Check if the resolution matches the likely complaint type\n"
    "- Verify if specific actions were documented\n"
    "- Determine if the response was timely and appropriate\n\n"
    "Given a resolution description, respond ONLY with JSON in this exact format:\n"
    "```json\n"
    "{\n"
    '  "reasoning": "Detailed explanation of why the resolution was or was not appropriate",\n'
    '  "action_taken": true/false\n'
    "}\n"
    "```json\n"
    "Note: action_taken should be true only if concrete steps were documented and appropriately addressed the issue."
)

json_schema = json.dumps(
    {
        "name": "action_taken_response",
        "type": "object",
        "strict": "true",
        "schema": {
            "type": "object",
            "properties": {
                "reasoning": {
                    "type": "string",
                },
                "action_taken": {
                    "type": "boolean",
                },
            },
        },
        "required": ["reasoning", "action_taken"],
    }
)

# Define the SQL query template
query_template = """
COPY (
    WITH llm_reasoning AS (
        SELECT
            regexp_replace(
                prompt(
                    'RESOLUTION_DESCRIPTION: ' || resolution_description,
                    '{{ system_prompt }}'::VARCHAR,
                    '{{ json_schema }}'::VARCHAR,
                    {{ temperature }}::FLOAT
                ),
                '```json|```',
                '',
                'g'
            )::VARCHAR AS llm_response,
            json_extract_string(llm_response, '$.reasoning')::VARCHAR AS reasoning,
            json_extract_string(llm_response, '$.action_taken')::VARCHAR AS action_taken,
            resolution_description,
            description_count
        FROM (
            SELECT 
                IFNULL(resolution_description, 'N/A') AS resolution_description, 
                COUNT(*) AS description_count
            FROM "{{ data_file }}"
            GROUP BY resolution_description
            ORDER BY description_count DESC
            LIMIT {{ limit }}
        )
    )

    SELECT
        resolution_description,
        description_count,
        reasoning,
        action_taken
    FROM llm_reasoning
) TO './output/llm_reasoning_output.csv';
"""

# # Render the template with variables
template = Template(query_template)
query = template.render(
    system_prompt=system_prompt,
    json_schema=json_schema,
    temperature=0.4,
    data_file="./data/cityofnewyork/service_requests.parquet",
    limit=10,
)

# # Execute the query
print("Executing query...")

result = con.execute(query).fetchall()
print(result)