# Summarizing OpenPoliceData Data

This notebook shows examples of the following tasks:
- Summarizing available data in OpenPoliceData (OPD)
- Exporting data summaries
- Generating your own data summaries

In [2]:
import openpolicedata as opd

In [3]:
# Get The number of unique datasets (unique state, source, agency, and table type)
print(f"The OpenPoliceData package has {opd.datasets.num_unique()} unique datasets")

The OpenPoliceData package has 425 unique datasets


In [11]:
# Find how many datasets are available for full states and how many are available for individual agencies
print(f"OPD has at least 1 datasets for all reporting agencies in {opd.datasets.num_sources(full_states_only=True)} states")
print(f"OPD has at least 1 dataset for {opd.datasets.num_sources()-opd.datasets.num_sources(full_states_only=True)} individual agencies")

OPD has at least 1 datasets for all reporting agencies in 10 states
OPD has at least 1 dataset for 158 individual agencies


In [5]:
# Find number of datasets from each state
opd.datasets.summary_by_state().head(10)

Unnamed: 0_level_0,Total
State,Unnamed: 1_level_1
California,
All State Agencies,2.0
Individual Agency,58.0
North Carolina,
All State Agencies,1.0
Individual Agency,31.0
New York,
All State Agencies,1.0
Individual Agency,29.0
Arizona,23.0


In [6]:
# Find number of datasets from each state broken down by year
opd.datasets.summary_by_state(by="year").head(7)

Unnamed: 0_level_0,Total,N/A,MULTI-YEAR,2024,2023,2022,2021,2020,2019,2018,...,2012,2011,2010,2009,2008,2007,2006,2005,2004,2003
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
California,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,2.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Individual Agency,58.0,3.0,42.0,4.0,7.0,9.0,11.0,11.0,14.0,10.0,...,3.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0
North Carolina,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Individual Agency,31.0,3.0,26.0,0.0,1.0,2.0,2.0,2.0,2.0,2.0,...,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,0.0,0.0
New York,,,,,,,,,,,...,,,,,,,,,,


In [7]:
# Find number of datasets from each state broken down by table type
opd.datasets.summary_by_state(by="table").head(7)

Unnamed: 0_level_0,Total,ARRESTS,CALLS FOR SERVICE,CITATIONS,COMPLAINTS,COMPLAINTS - ALLEGATIONS,COMPLAINTS - BACKGROUND,COMPLAINTS - BODY WORN CAMERA,COMPLAINTS - OFFICERS,COMPLAINTS - PENALTIES,...,TRAFFIC STOPS,TRAFFIC STOPS - INCIDENTS,TRAFFIC STOPS - SUBJECTS,TRAFFIC WARNINGS,USE OF FORCE,USE OF FORCE - INCIDENTS,USE OF FORCE - OFFICERS,USE OF FORCE - SUBJECTS,USE OF FORCE - SUBJECTS/OFFICERS,VEHICLE PURSUITS
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
California,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
Individual Agency,58.0,0.0,10.0,3.0,2.0,1.0,1.0,1.0,0.0,0.0,...,7.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,1.0,2.0
North Carolina,,,,,,,,,,,...,,,,,,,,,,
All State Agencies,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Individual Agency,31.0,3.0,3.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,...,7.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0
New York,,,,,,,,,,,...,,,,,,,,,,


In [9]:
# Find number of datasets for each type of table
opd.datasets.summary_by_table_type()

Unnamed: 0_level_0,Total,Definition
TableType,Unnamed: 1_level_1,Unnamed: 2_level_1
STOPS-RELATED,,
Single Table,,
STOPS,37,Contains data on both pedestrian and traffic s...
Multi-Table,,
TRAFFIC STOPS,71,Traffic stops are stops by police of motor veh...
...,...,...
POINTING WEAPON,2,Instances of officers pointing a weapon (firea...
LAWSUITS,2,Lawsuits against a police department
INCIDENTS - SUBJECTS,1,Incidents data may be split into several table...
INCIDENTS - INCIDENTS,1,Incidents data may be split into several table...


In [10]:
# Find number of datasets for each type of table broken down by year
opd.datasets.summary_by_table_type(by_year=True).head()

Unnamed: 0_level_0,Total,N/A,MULTI-YEAR,2024,2023,2022,2021,2020,2019,2018,...,2011,2010,2009,2008,2007,2006,2005,2004,2003,Definition
TableType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
STOPS-RELATED,,,,,,,,,,,...,,,,,,,,,,
Single Table,,,,,,,,,,,...,,,,,,,,,,
STOPS,37.0,0.0,34.0,0.0,3.0,3.0,3.0,2.0,2.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Contains data on both pedestrian and traffic s...
Multi-Table,,,,,,,,,,,...,,,,,,,,,,
TRAFFIC STOPS,71.0,0.0,67.0,0.0,3.0,5.0,5.0,6.0,6.0,6.0,...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,Traffic stops are stops by police of motor veh...


In [None]:
# All returned summary tables are pandas DataFrames so they can be easily exported to CSV files using pandas to_csv.
# Find number of datasets for each type of table broken down by year
opd.datasets.summary_by_table_type(by_year=True).to_csv("table_summary_by_year.csv")