# Sandbox for CheckbookNYC API 

In [1]:
import requests
import xml.etree.ElementTree as ET
import time
from pathlib import Path
import xml.dom.minidom

In [33]:
API_URL = "https://www.checkbooknyc.com/api"
TEMP_XML_FILE = Path("tmp_latest_sandbox_response.xml")  # same temp file each time
TIMEOUT = (10, 300) # connect_timeout, read_timeout for requests

In [34]:
# session set up
session = requests.Session()
session.headers.update({
    'content-type': 'application/xml',
    'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36'
})

# **** visit main site initially to bypass incapsula ****
response = session.get('https://www.checkbooknyc.com/', timeout=TIMEOUT)
print(f"main site status: {response.status_code}")
time.sleep(2)

session.headers.update({
    'content-type': 'application/xml',
    'referer': 'https://www.checkbooknyc.com/',
    'origin': 'https://www.checkbooknyc.com'
})

main site status: 200


## Helper: `fetch_xml`

In [35]:
def fetch_xml(payload: str) -> ET.Element:
    """
    fetch XML response given `payload`: xml body
    then overwrite the temp file.
    """
    response = session.post(API_URL, data=payload, timeout=TIMEOUT)
    response.raise_for_status()

    # decode content
    xml_str = response.content.decode('utf-8')

    #  save to temp file
   
    with open(TEMP_XML_FILE, "w", encoding="utf-8") as f:
        f.write(xml_str)
        print(f"XML response saved to {TEMP_XML_FILE}")

    if not xml_str.strip().startswith("<"):
        raise ValueError("Response is not XML:\n" + xml_str[:300])

    # debug print
    xml_str = response.content.decode("utf-8")
    print(xml_str[:500])  # print first 500 chars
    
    # parse XML
    root = ET.fromstring(xml_str)

    return root # ET.Element: parsed XML root

## XML Requests

### 1. Example from Docs
https://www.checkbooknyc.com/spending-api#sample-request-spending

In [36]:
xml_example="""
<request>
  <type_of_data>Spending</type_of_data>
  <records_from>1</records_from>
  <max_records>1000</max_records>
  <search_criteria>
    <criteria>
      <name>agency_code</name>
      <type>value</type>
      <value>039</value>
    </criteria>
  </search_criteria>
  <response_columns>
    <column>agency</column>
    <column>budget_code</column>
  </response_columns>
</request>
"""

In [37]:
fetch_xml(xml_example)

XML response saved to tmp_latest_sandbox_response.xml
<?xml version="1.0"?>
<response>
  <status>
    <result>success</result>
  </status>
  <request_criteria>
    <request>
      <type_of_data>Spending</type_of_data>
      <records_from>1</records_from>
      <max_records>1000</max_records>
      <search_criteria>
        <criteria>
          <name>agency_code</name>
          <type>value</type>
          <value>039</value>
        </criteria>
      </search_criteria>
      <response_columns>
        <column>agency</column>
        <column>budget_


<Element 'response' at 0x10945e750>

### 2. Without `search_criteria` filters

In [19]:
# simple spending data (no specific criteria)
xml_unfiltered = """<request>
  <type_of_data>Spending</type_of_data>
  <records_from>1</records_from>
  <max_records>3</max_records>
  <response_columns>
    <column>agency</column>
    <column>payee_name</column>
    <column>check_amount</column>
    <column>fiscal_year</column>
  </response_columns>
</request>"""

In [20]:
fetch_xml(xml_unfiltered)

XML response saved to tmp_latest_response.xml
<?xml version="1.0"?>
<response>
  <status>
    <result>success</result>
  </status>
  <request_criteria>
    <request>
      <type_of_data>Spending</type_of_data>
      <records_from>1</records_from>
      <max_records>3</max_records>
      <response_columns>
        <column>agency</column>
        <column>payee_name</column>
        <column>check_amount</column>
        <column>fiscal_year</column>
      </response_columns>
    </request>
  </request_criteria>
  <result_records>
    <record_co


<Element 'response' at 0x1094b1170>

### 3. Overshooting total records

In [21]:
# stop point / base case / returning a nonexistent record
xml_overshoot=f"""
<request>
  <type_of_data>Spending</type_of_data>
  <records_from>44000000</records_from>
  <max_records>2</max_records>
  <search_criteria>
    <name>fiscal_year</name>
    <type>value</type>
    <value>2025</value>
  </search_criteria>
  
  <response_columns>
    <column>agency</column>
    <column>fiscal_year</column>
    <column>budget_code</column>
  </response_columns>
</request>
"""

In [22]:
fetch_xml(xml_overshoot)

XML response saved to tmp_latest_response.xml
<?xml version="1.0"?>
<response>
  <status>
    <result>success</result>
  </status>
  <request_criteria>
    <request>
      <type_of_data>Spending</type_of_data>
      <records_from>44000000</records_from>
      <max_records>2</max_records>
      <search_criteria>
        <name>fiscal_year</name>
        <type>value</type>
        <value>2025</value>
      </search_criteria>
      <response_columns>
        <column>agency</column>
        <column>fiscal_year</column>
        <column>budget_cod


<Element 'response' at 0x1094b3d30>

## Verify XML is well-formed

In [24]:
cur_xml_str = xml_overshoot

In [25]:
try:
    xml.dom.minidom.parseString(cur_xml_str)  # will raise if invalid
    print("XML is well-formed")
except Exception as e:
    print("Invalid XML:", e)


XML is well-formed


## Download XML Response

In [27]:
response = fetch_xml(xml_unfiltered)

XML response saved to tmp_latest_response.xml
<?xml version="1.0"?>
<response>
  <status>
    <result>success</result>
  </status>
  <request_criteria>
    <request>
      <type_of_data>Spending</type_of_data>
      <records_from>1</records_from>
      <max_records>3</max_records>
      <response_columns>
        <column>agency</column>
        <column>payee_name</column>
        <column>check_amount</column>
        <column>fiscal_year</column>
      </response_columns>
    </request>
  </request_criteria>
  <result_records>
    <record_co


# Reference

https://www.checkbooknyc.com/spending-api#sample-request-spending