Notebook for testing the Spending API
- requests: https://requests.readthedocs.io/en/latest/user/advanced/
- xml: https://automatetheboringstuff.com/3e/chapter18.html
- api: https://www.checkbooknyc.com/spending-api#sample-request-spending


In [1]:
import requests
import xml.etree.ElementTree as ET
import time

In [None]:
# session set up
session = requests.Session()
session.headers.update({
    'user-agent': 'mozilla/5.0 (windows nt 10.0; win64; x64) applewebkit/537.36'
})

# **** visit main site initially to bypass incapsula ****
response = session.get('https://www.checkbooknyc.com/')
print(f"main site status: {response.status_code}")
time.sleep(2)

session.headers.update({
    'content-type': 'application/xml',
    'referer': 'https://www.checkbooknyc.com/',
    'origin': 'https://www.checkbooknyc.com'
})

main site status: 200


In [None]:
xml_body="""
<request>
  <type_of_data>Spending</type_of_data>
  <records_from>1</records_from>
  <max_records>1000</max_records>
  <search_criteria>
    <criteria>
      <name>agency_code</name>
      <type>value</type>
      <value>039</value>
    </criteria>
    <criteria>
      <name>department_code</name>
      <type>value</type>
      <value>D22</value>
    </criteria>
    <criteria>
      <name>expense_category</name>
      <type>value</type>
      <value>2200</value>
    </criteria>
    <criteria>
      <name>document_id</name>
      <type>value</type>
      <value>20180011164</value>
    </criteria>
    <criteria>
      <name>spending_category</name>
      <type>value</type>
      <value>cc</value>
    </criteria>
   <criteria>
      <name>mwbe_category</name>
      <type>value</type>
      <value>7</value>
    </criteria>
   <criteria>
      <name>industry</name>
      <type>value</type>
      <value>2</value>
    </criteria>
  </search_criteria>
  <response_columns>
    <column>agency</column>
    <column>fiscal_year</column>
    <column>document_id</column>
    <column>payee_name</column>
    <column>department</column>
    <column>check_amount</column>
    <column>expense_category</column>
    <column>contract_id</column>
    <column>capital_project</column>
    <column>industry</column>
    <column>issue_date</column>
    <column>spending_category</column>
    <column>mwbe_category</column>
    <column>sub_vendor</column>
    <column>associated_prime_vendor</column>
    <column>sub_contract_reference_id</column>
    <column>woman_owned_business</column>
    <column>emerging_business</column>
    <column>budget_code</column>
  </response_columns>
</request>
"""

In [None]:
# api call
response = session.post('https://www.checkbooknyc.com/api', data=xml_body)
root = ET.fromstring(response.content)
status = root.find('.//status/result')

if status.text == 'success':
    # check record count
    record_count = root.find('.//record_count')
    if record_count is not None:
        print(f"record count: {record_count.text}")
    
    # get transactions (not records)
    transactions = root.findall('.//transaction')
    print(f"found {len(transactions)} transactions")
    
    for transaction in transactions:
        print("\ntransaction:")
        for field in transaction:
            print(f"  {field.tag}: {field.text}")

record count: 1
found 1 transactions

transaction:
  agency: Queens Borough Public Library
  associated_prime_vendor: N/A
  budget_code: LQTS (QUEENS LIBRARY SYSTEM: REPLACEMENT OF TE)
  capital_project: 039LQD122TEL001
  contract_id: CT103920171428053
  check_amount: 35775.00
  department: 400-039-D22
  document_id: 20180011164-1-DSB-AD
  expense_category: CAPITAL PURCHASED EQUIPMENT
  fiscal_year: 2018
  industry: Goods
  issue_date: 2017-07-17
  mwbe_category: Non-M/WBE
  woman_owned_business: No 
  emerging_business: No 
  payee_name: NEW CASTLE COMMUNICATIONS INC
  spending_category: Capital Contracts
  sub_contract_reference_id: None
  sub_vendor: No


In [5]:
# simple spending data (no specific criteria)
xml_simple_spending = """<request>
  <type_of_data>Spending</type_of_data>
  <records_from>1</records_from>
  <max_records>3</max_records>
  <response_columns>
    <column>agency</column>
    <column>payee_name</column>
    <column>check_amount</column>
    <column>fiscal_year</column>
  </response_columns>
</request>"""

response = session.post('https://www.checkbooknyc.com/api', data=xml_simple_spending, timeout=60)
root = ET.fromstring(response.content)

transactions = root.findall('.//transaction')
print(f"simple spending: {len(transactions)} transactions")

for transaction in transactions:
    agency = transaction.find('agency').text if transaction.find('agency') is not None else 'unknown'
    payee = transaction.find('payee_name').text if transaction.find('payee_name') is not None else 'unknown'
    amount = transaction.find('check_amount').text if transaction.find('check_amount') is not None else '0'
    year = transaction.find('fiscal_year').text if transaction.find('fiscal_year') is not None else 'unknown'
    print(f"  {year}: {agency} -> {payee}: ${amount}")

simple spending: 3 transactions
  2024: Police Department -> OPERATIONS: $414069787.65
  2019: Department of Education -> GE INSTR & SCH LEADERSHIP - PS: $232670699.67
  2020: Department of Education -> GE INSTR & SCH LEADERSHIP - PS: $219225483.61
