# Set up database tools and load data

In [16]:
# Load duckdb, which lets us efficiently load large files
import duckdb

# Load pandas, which lets us manipulate dataframes
import pandas as pd

# Load polars, which enables loading of parquet files
import polars as pl

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

# Set configrations on jupysql to directly output data to Pandas and to simplify the output that is printed to the notebook.
%config SqlMagic.autopandas = True

%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

# Connect jupysql to DuckDB using a SQLAlchemy-style connection string. Either connect to an in memory DuckDB, or a file backed db.
%sql duckdb:///:memory:

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [34]:
# Load the data into a Polars datafram
nypd_contracts = pl.read_parquet("https://public.datathinking.org/checkbooknyc.com%2Fnypd_contracts.parquet")

In [35]:
nypd_contracts

Document Code,Prime Contract ID,Contract Includes Sub Vendors,Vendor Record Type,Prime Vendor,Prime Vendor M/WBE Category,Prime Contract Purpose,Prime Contract Current Amount,Prime Contract Original Amount,Prime Vendor Spend to Date,Prime Contract Start Date,Prime Contract End Date,Prime Contract Registration Date,Prime Contracting Agency,Prime Contract Version,Parent Contract ID,Prime Contract Type,Prime Contract Award Method,Prime Contract Expense Category,Prime Contract Industry,Prime Contract PIN,Prime Contract APT PIN,Prime Woman Owned Business,Prime Emerging Business,Sub Vendor,Sub Vendor M/WBE Category,Sub Contract Purpose,Sub Vendor Status in PIP,Sub Contract Industry,Sub Contract Current Amount,Sub Contract Original Amount,Sub Vendor Paid To Date,Sub Contract Start Date,Sub Contract End Date,Sub Contract Reference ID,Sub Woman Owned Business,Sub Emerging Business
str,str,str,str,str,str,str,f64,f64,f64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,f64,f64,f64,str,str,str,str,str
"""RCT1""","""RCT10562023880…","""N/A""","""Prime Vendor""","""PROPERTYROOM. …","""Non-M/WBE""","""Revenue contra…",1.875e6,1.875e6,0.0,"""2023-03-08""","""2028-03-07""","""2023-03-20""","""Police Departm…","""1""","""-""","""WORK/LABOR""","""REQUEST FOR P…",,"""Professional S…","""05622P0002001""",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562023880…","""N/A""","""Prime Vendor""","""PROPERTYROOM. …","""Non-M/WBE""","""Internet Based…",1.2e6,1.2e6,0.0,"""2020-03-08""","""2023-03-07""","""2022-11-15""","""Police Departm…","""1""","""-""","""WORK/LABOR""","""NEG ACQUISITSI…",,"""Not Classified…","""05622N0002001""",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562018820…","""N/A""","""Prime Vendor""","""Insurance Auto…","""Non-M/WBE""","""Invoiced Vehic…",0.0,0.0,0.0,"""2017-12-01""","""2022-11-30""","""2018-02-13""","""Police Departm…","""1""","""-""","""OTHER EXPENSE …","""REQUEST FOR P…",,"""Standardized S…","""05617P0003001""",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562018820…","""N/A""","""Prime Vendor""","""PROPERTYROOM. …","""Non-M/WBE""","""09665-Internet…",0.0,0.0,0.0,"""2017-09-08""","""2020-03-07""","""2017-10-19""","""Police Departm…","""1""","""-""","""OTHER EXPENSE …","""RENEWAL OF CON…",,"""Not Classified…","""05610P0003CNVR…","""056090000665""","""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562017820…","""N/A""","""Prime Vendor""","""Regent Caterin…","""Non-M/WBE""","""Develop, Opera…",218000.0,218000.0,0.0,"""2014-12-08""","""2017-12-07""","""2016-10-11""","""Police Departm…","""1""","""-""","""CONCESSIONS""","""ASSIGNMENT""",,"""Standardized S…","""056140000913""",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562015820…","""N/A""","""Prime Vendor""","""PROPERTYROOM. …","""Non-M/WBE""","""09665-Internet…",0.0,0.0,0.0,"""2015-03-08""","""2017-09-07""","""2015-04-20""","""Police Departm…","""1""","""-""","""OTHER EXPENSE …","""RENEWAL OF CON…",,"""Not Classified…","""05610P0003CNVR…",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562015820…","""N/A""","""Prime Vendor""","""PROPERTYROOM. …","""Non-M/WBE""","""09629 Invoicin…",0.0,0.0,0.0,"""2014-11-30""","""2017-11-29""","""2015-06-15""","""Police Departm…","""2""","""-""","""MISCREVENUE-NO…","""RENEWAL OF CON…",,"""Not Classified…","""05609P0006CNVA…","""056090000629""","""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562015820…","""N/A""","""Prime Vendor""","""regent caterin…","""Non-M/WBE""","""Develop, Opera…",525000.0,525000.0,0.0,"""2014-12-08""","""2017-12-07""","""2014-09-16""","""Police Departm…","""2""","""-""","""CONCESSIONS""","""REQUEST FOR P…",,"""Standardized S…","""056140000913""",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562014820…","""N/A""","""Prime Vendor""","""PROPERTYROOM. …","""Non-M/WBE""","""09629 Invoicin…",0.0,0.0,0.0,"""2012-11-30""","""2014-11-29""","""2013-09-05""","""Police Departm…","""1""","""-""","""MISCREVENUE-NO…","""RENEWAL OF CON…",,"""Not Classified…","""05609P0006CNVR…","""056090000629""","""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """
"""RCT1""","""RCT10562011820…","""N/A""","""Prime Vendor""","""ANSWER VENDING…","""Non-M/WBE""","""Vending Machin…",1.094957e6,1.094957e6,0.0,"""2011-05-02""","""2021-05-01""","""2011-04-04""","""Police Departm…","""1""","""-""","""CONCESSIONS""","""COMPETITIVE SE…",,"""Standardized S…","""056090000675""",,"""No ""","""No ""","""-""","""-""","""-""","""-""","""-""",0.0,0.0,0.0,"""-""","""-""","""-""","""No ""","""No """


In [36]:
print(nypd_contracts.schema)

{'Document Code': Utf8, 'Prime Contract ID': Utf8, 'Contract Includes Sub Vendors': Utf8, 'Vendor Record Type': Utf8, 'Prime Vendor': Utf8, 'Prime Vendor M/WBE Category': Utf8, 'Prime Contract Purpose': Utf8, 'Prime Contract Current Amount': Float64, 'Prime Contract Original Amount': Float64, 'Prime Vendor Spend to Date': Float64, 'Prime Contract Start Date': Utf8, 'Prime Contract End Date': Utf8, 'Prime Contract Registration Date': Utf8, 'Prime Contracting Agency': Utf8, 'Prime Contract Version': Utf8, 'Parent Contract ID': Utf8, 'Prime Contract Type': Utf8, 'Prime Contract Award Method': Utf8, 'Prime Contract Expense Category': Utf8, 'Prime Contract Industry': Utf8, 'Prime Contract PIN': Utf8, 'Prime Contract APT PIN': Utf8, 'Prime Woman Owned Business': Utf8, 'Prime Emerging Business': Utf8, 'Sub Vendor': Utf8, 'Sub Vendor M/WBE Category': Utf8, 'Sub Contract Purpose': Utf8, 'Sub Vendor Status in PIP': Utf8, 'Sub Contract Industry': Utf8, 'Sub Contract Current Amount': Float64, 'Sub

# Visualize data

In [37]:


import vegafusion as vf
import polars as pl
import altair as alt
import altair as alt
alt.data_transformers.disable_max_rows()
alt.renderers.enable('html')

# Configure DuckDB connection
vf.runtime.set_connection("duckdb")

# Enable Mime Renderer
vf.enable(row_limit=100000000)



vegafusion.enable(mimetype='html', row_limit=100000000, embed_options=None)

In [38]:
# Create a bar chart
alt.Chart(contracts).mark_bar().encode(
    x='Prime Contract Type:N',
    y='count()',
)



In [39]:
# Create a bar chart
alt.Chart(contracts).mark_bar().encode(
    x='Prime Contracting Agency:N',
    y='count()',
)

In [40]:
# Create a bar chart
alt.Chart(contracts).mark_bar().encode(
    alt.X('Prime Contract Original Amount:Q').bin(maxbins=20),
    y='count()',
).transform_filter(
    alt.FieldGTPredicate(field='Prime Contract Original Amount', gt=10000000)
)