In [1]:
import sys
import os

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import openpolicedata as opd

In [2]:
# Query for the entire table of available data
datasets = opd.datasets_query()
datasets.head()

Unnamed: 0,State,SourceName,Jurisdiction,TableType,Year,Description,DataType,URL,date_field,dataset_id,jurisdiction_field
0,Arizona,Gilbert,Gilbert,STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,
1,Arizona,Mesa,Mesa,STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,
2,Arizona,State Patrol,State Patrol,STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,
3,Arkansas,Little Rock,Little Rock,TRAFFIC STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,
4,California,Anaheim,Anaheim,TRAFFIC STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,


In [3]:
# Query for all available datasets from Maryland
df = opd.datasets_query(state="Maryland")
df.head()

Unnamed: 0,State,SourceName,Jurisdiction,TableType,Year,Description,DataType,URL,date_field,dataset_id,jurisdiction_field
52,Maryland,Baltimore,Baltimore,STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,
53,Maryland,Maryland,MULTI,TRAFFIC STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,department_name
54,Maryland,Montgomery County,Montgomery County,COMPLAINTS,MULTI,This dataset contains allegations brought to t...,Socrata,data.montgomerycountymd.gov,created_dt,usip-62e2,
55,Maryland,Montgomery County,Montgomery County,TRAFFIC STOPS,MULTI,This dataset contains traffic violation inform...,Socrata,data.montgomerycountymd.gov,date_of_stop,4mse-ku6q,


In [4]:
# Query for all stops data containing both traffic and investigatory stops in Virginia
df = opd.datasets_query(table_type=opd.TableTypes.STOPS, state="Virginia")
df.head()

Unnamed: 0,State,SourceName,Jurisdiction,TableType,Year,Description,DataType,URL,date_field,dataset_id,jurisdiction_field
172,Virginia,Virginia,MULTI,STOPS,MULTI,A data collection consisting of all traffic an...,Socrata,data.virginia.gov,incident_date,2c96-texw,agency_name


In [5]:
# To access the data, create a source using a Source Name (usually a police department name). There is an optional state input to clarify ambiguities.
# Create data source for Virginia

src = opd.Source(source_name="Virginia", state="Virginia")
src.datasets.head()

Unnamed: 0,State,SourceName,Jurisdiction,TableType,Year,Description,DataType,URL,date_field,dataset_id,jurisdiction_field
172,Virginia,Virginia,MULTI,STOPS,MULTI,A data collection consisting of all traffic an...,Socrata,data.virginia.gov,incident_date,2c96-texw,agency_name


In [6]:
# Find out what types of data are available from this source

types = src.get_tables_types()

print(types)

['STOPS']


In [7]:
# Find out what years are available from the stops table

years = src.get_years(table_type=types[0])
print(years)

[2020, 2021, 2022]


In [10]:
# Find out what jurisdictions are contained in the stops table
jurisdictions = src.get_jurisdictions()
N = 20
print(f"Printing out {N} of {len(jurisdictions)} jurisdictions")
print(jurisdictions[0:20])  

Printing out 20 of 332 jurisdictions
['Lebanon Police Department', 'Richlands Police Department', 'Charlottesville Police Department', 'Leesburg Police Department', "Southampton County Sheriff's Office", 'Aquia Harbour Police Dept.', 'Blackstone Police Department', 'Boykins Police Department', 'Falls Church Police Department', 'Haymarket Police Department', 'Branchville Police Department', 'Honaker Police Department', 'Northampton County Sheriffs Office', 'Longwood University Police Department', "Norfolk City Sheriff's Office", 'Coeburn Police Department', 'Chincoteague Police Department', 'McKenney Police Department', 'Rocky Mount Police Department', 'CSX Railroad Police']


In [11]:
# Find out what jurisdictions are contained in the stops table that containt the word "Arlington"
jurisdictions = src.get_jurisdictions(partial_name="Arlington")
print(jurisdictions)

['Arlington County Police Department', "Arlington County Sheriff's Office"]


In [12]:
# Load data from the source for the Arlington County Police Department in 2021
agency = "Arlington County Police Department"
t = src.load_from_url(year=2021, table_type=opd.TableTypes.STOPS, agency_filter=agency)

In [13]:
# Show the first 5 rows of the table
t.table.head(n=5)

Unnamed: 0,incident_date,agency_name,jurisdiction,reason_for_stop,race,ethnicity,age,gender,action_taken,violation_type,specific_violation,person_searched,vehicle_searched,additional_arrest
0,2021-01-01,Arlington County Police Department,ARLINGTON CO,OTHER,WHITE,HISPANIC,44,FEMALE,WARNING,LOCAL,14.2-16,NO,NO,NO
1,2021-01-01,Arlington County Police Department,ARLINGTON CO,EQUIPMENT VIOLATION,WHITE,NON-HISPANIC,23,MALE,WARNING,COMMONWEALTH,46.2-1030,NO,NO,NO
2,2021-01-01,Arlington County Police Department,ARLINGTON CO,TRAFFIC VIOLATION,BLACK OR AFRICAN AMERICAN,NON-HISPANIC,27,MALE,WARNING,COMMONWEALTH,46.2-1030,NO,NO,NO
3,2021-01-01,Arlington County Police Department,ARLINGTON CO,EQUIPMENT VIOLATION,WHITE,NON-HISPANIC,53,MALE,CITATION/SUMMONS,COMMONWEALTH,46.2-301,NO,NO,NO
4,2021-01-01,Arlington County Police Department,ARLINGTON CO,EQUIPMENT VIOLATION,WHITE,NON-HISPANIC,53,MALE,WARNING,COMMONWEALTH,46.2-612,NO,NO,NO


In [27]:
from pathlib import Path
# Save to CSV file with default filename. Need double forward slashes in Windows.
output_dir = Path('.') / 'Data'

if not output_dir.exists():
    os.makedirs(output_dir)

if output_dir.is_dir():
    t.to_csv(output_dir=output_dir)
    # For custom filename uncomment below
    # t.to_csv(output_dir=output_dir, filename="custom_filename.csv")     
    print(f"Created csv files in {output_dir.resolve()}")
else:
    print(f"Unable to create the directory {output_dir.resolve()}")

Created csv files in /home/user/cjc/openpolicedata/notebooks/Data


In [28]:
# Load the CSV file back in

# Do not really need to create a new source. Could use t instead
new_src = opd.Source(source_name="Virginia")
new_t = new_src.load_from_csv(output_dir=output_dir, year=2021, agency_filter=agency)

In [29]:
new_t.table.head()

Unnamed: 0,incident_date,agency_name,jurisdiction,reason_for_stop,race,ethnicity,age,gender,action_taken,violation_type,specific_violation,person_searched,vehicle_searched,additional_arrest
0,2021-01-01,Arlington County Police Department,ARLINGTON CO,OTHER,WHITE,HISPANIC,44,FEMALE,WARNING,LOCAL,14.2-16,NO,NO,NO
1,2021-01-01,Arlington County Police Department,ARLINGTON CO,EQUIPMENT VIOLATION,WHITE,NON-HISPANIC,23,MALE,WARNING,COMMONWEALTH,46.2-1030,NO,NO,NO
2,2021-01-01,Arlington County Police Department,ARLINGTON CO,TRAFFIC VIOLATION,BLACK OR AFRICAN AMERICAN,NON-HISPANIC,27,MALE,WARNING,COMMONWEALTH,46.2-1030,NO,NO,NO
3,2021-01-01,Arlington County Police Department,ARLINGTON CO,EQUIPMENT VIOLATION,WHITE,NON-HISPANIC,53,MALE,CITATION/SUMMONS,COMMONWEALTH,46.2-301,NO,NO,NO
4,2021-01-01,Arlington County Police Department,ARLINGTON CO,EQUIPMENT VIOLATION,WHITE,NON-HISPANIC,53,MALE,WARNING,COMMONWEALTH,46.2-612,NO,NO,NO
