This notebook shows examples of the following tasks:
- Summarizing available data in OpenPoliceData (OPD)
- Exporting data summaries
- Generating your own data summaries

In [2]:
try:
    import openpolicedata as opd
except:
    import sys
    sys.path.append('../openpolicedata')
    import openpolicedata as opd

In [3]:
# Get The number of unique datasets (unique state, source, agency, and table type)
print(f"The OpenPoliceData package has {opd.datasets.num_unique()} unique datasets")

The OpenPoliceData package has 208 unique datasets


In [6]:
# Find how many datasets are available for full states and how many are available for individual agencies
print(f"OPD has {opd.datasets.num_sources(full_states_only=True)} datasets for all agencies in a state")
print(f"OPD has {opd.datasets.num_sources()-opd.datasets.num_sources(full_states_only=True)} datasets for individual agencies")

OPD has 9 datasets for all agencies in a state
OPD has 120 datasets for individual agencies


In [10]:
# Find number of datasets from each state
opd.datasets.summary_by_state().head(10)

Unnamed: 0_level_0,Total
State,Unnamed: 1_level_1
California,
All State Agencies,2.0
Individual Agency,23.0
Texas,20.0
North Carolina,
All State Agencies,1.0
Individual Agency,13.0
New York,12.0
Ohio,10.0
Arizona,9.0


In [16]:
# Find number of datasets from each state broken down by year
opd.datasets.summary_by_state(by="year").head(7)

Unnamed: 0_level_0,Total,N/A,MULTI-YEAR,2022,2021,2020,2019,2018,2017,2016,...,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
California,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,2.0,0.0,0.0,0.0,0.0,2.0,2.0,2.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Individual Agency,23.0,0.0,18.0,0.0,4.0,5.0,4.0,4.0,4.0,4.0,...,2.0,2.0,2.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0
Texas,20.0,0.0,13.0,0.0,0.0,2.0,6.0,6.0,6.0,5.0,...,5.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0
North Carolina,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Individual Agency,13.0,2.0,10.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [17]:
# Find number of datasets from each state broken down by table type
opd.datasets.summary_by_state(by="table").head(7)

Unnamed: 0_level_0,Total,ARRESTS,CALLS FOR SERVICE,CITATIONS,COMPLAINTS,EMPLOYEE,FIELD CONTACTS,OFFICER-INVOLVED SHOOTINGS,OFFICER-INVOLVED SHOOTINGS - CIVILIANS,OFFICER-INVOLVED SHOOTINGS - INCIDENTS,...,TRAFFIC ARRESTS,TRAFFIC CITATIONS,TRAFFIC STOPS,TRAFFIC WARNINGS,USE OF FORCE,USE OF FORCE - CIVILIANS,USE OF FORCE - CIVILIANS/OFFICERS,USE OF FORCE - INCIDENTS,USE OF FORCE - OFFICERS,VEHICLE PURSUITS
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
California,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
Individual Agency,23.0,0.0,5.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,0.0,7.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
Texas,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,...,1.0,1.0,5.0,1.0,2.0,1.0,0.0,0.0,0.0,0.0
North Carolina,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Individual Agency,13.0,0.0,1.0,1.0,0.0,2.0,1.0,0.0,0.0,0.0,...,0.0,0.0,6.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [18]:
# Find number of datasets for each type of table
opd.datasets.summary_by_table_type().head()

Unnamed: 0_level_0,Total,Definition
TableType,Unnamed: 1_level_1,Unnamed: 2_level_1
STOPS,,
TRAFFIC STOPS (Only),65.0,Traffic stops are stops by police of motor veh...
STOPS (All),31.0,Contains data on both pedestrian and traffic s...
PEDESTRIAN STOPS (Only),2.0,Stops of pedestrians based on 'reasonable susp...
CALLS FOR SERVICE,22.0,Includes dispatched calls (911 or non-emergenc...


In [19]:
# Find number of datasets for each type of table broken down by year
opd.datasets.summary_by_table_type(by_year=True).head()

Unnamed: 0_level_0,Total,N/A,MULTI-YEAR,2022,2021,2020,2019,2018,2017,2016,...,2013,2012,2011,2010,2009,2008,2007,2006,2005,Definition
TableType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
STOPS,,,,,,,,,,,...,,,,,,,,,,
TRAFFIC STOPS (Only),65.0,0.0,63.0,0.0,0.0,2.0,2.0,2.0,2.0,2.0,...,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,Traffic stops are stops by police of motor veh...
STOPS (All),31.0,0.0,30.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Contains data on both pedestrian and traffic s...
PEDESTRIAN STOPS (Only),2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Stops of pedestrians based on 'reasonable susp...
CALLS FOR SERVICE,22.0,0.0,14.0,2.0,8.0,8.0,8.0,7.0,7.0,6.0,...,5.0,5.0,4.0,2.0,1.0,1.0,1.0,1.0,1.0,Includes dispatched calls (911 or non-emergenc...


In [20]:
# All returned summary tables are pandas DataFrames so they can be easily exported to CSV files
# Find number of datasets for each type of table broken down by year
opd.datasets.summary_by_table_type(by_year=True).to_csv("table_summary_by_year.csv")