This notebook shows how to find datasets for a state


In [1]:
try:
    import openpolicedata as opd   #This import should be last in the try block because the expect block will only try to load it
except:
    import sys
    sys.path.append('../openpolicedata')
    import openpolicedata as opd

In [2]:
# Query for the entire table of available data as a pandas DataFrame (https://pandas.pydata.org/docs/user_guide/10min.html#min)
# This shows all the datasets that are available for access
# This information can be filtered to find a dataset of interest
datasets = opd.datasets.query()
datasets.head()

Unnamed: 0,State,SourceName,Agency,TableType,Year,Description,DataType,URL,date_field,dataset_id,agency_field,min_version,readme
0,Virginia,Virginia,MULTI,STOPS,MULTI,A data collection consisting of all traffic an...,Socrata,data.virginia.gov,incident_date,2c96-texw,agency_name,,https://data.virginia.gov/api/views/2c96-texw/...
1,Virginia,Fairfax County,Fairfax County,TRAFFIC WARNINGS,2019,Traffic Warnings issued by Fairfax County Police,ArcGIS,https://services9.arcgis.com/kYvfX7YK8OobHItA/...,actdate,,,,
2,Virginia,Fairfax County,Fairfax County,TRAFFIC WARNINGS,2020,Traffic Warnings issued by Fairfax County Police,ArcGIS,https://services9.arcgis.com/kYvfX7YK8OobHItA/...,actdate,,,,
3,Virginia,Fairfax County,Fairfax County,TRAFFIC CITATIONS,2019,Traffic Citations issued by Fairfax County Police,ArcGIS,https://services9.arcgis.com/kYvfX7YK8OobHItA/...,tc_date,,,,
4,Virginia,Fairfax County,Fairfax County,TRAFFIC CITATIONS,2020,Traffic Citations issued by Fairfax County Police,ArcGIS,https://services9.arcgis.com/kYvfX7YK8OobHItA/...,tc_date,,,,


In [3]:
# Find out which states data is available for
print(f"These states have datasets: {datasets['State'].unique()}")

These states have datasets: ['Virginia' 'Maryland' 'Colorado' 'North Carolina' 'California' 'Arkansas'
 'Arizona' 'Connecticut' 'Florida' 'Georgia' 'Iowa' 'Idaho' 'Illinois'
 'Indiana' 'Kansas' 'Kentucky' 'Massachusetts' 'Michigan' 'Minnesota'
 'Missouri' 'Mississippi' 'Montana' 'North Dakota' 'Nebraska'
 'New Hampshire' 'Nevada' 'New York' 'Ohio' 'Oklahoma' 'Oregon'
 'Pennsylvania' 'Rhode Island' 'South Carolina' 'South Dakota' 'Tennessee'
 'Texas' 'Vermont' 'Washington' 'Wisconsin' 'Wyoming' 'New Jersey'
 'Louisiana']


In [4]:
# To see all available datasets for a state use the following filter.
df = opd.datasets.query(state="Maryland")
df.head()

Unnamed: 0,State,SourceName,Agency,TableType,Year,Description,DataType,URL,date_field,dataset_id,agency_field,min_version,readme
5,Maryland,Montgomery County,Montgomery County,TRAFFIC STOPS,MULTI,This dataset contains traffic violation inform...,Socrata,data.montgomerycountymd.gov,date_of_stop,4mse-ku6q,,,https://data.montgomerycountymd.gov/Public-Saf...
6,Maryland,Montgomery County,Montgomery County,COMPLAINTS,MULTI,This dataset contains allegations brought to t...,Socrata,data.montgomerycountymd.gov,created_dt,usip-62e2,,,https://data.montgomerycountymd.gov/Public-Saf...
55,Maryland,Baltimore,Baltimore,STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,,,https://github.com/stanford-policylab/opp/blob...
56,Maryland,Maryland,MULTI,TRAFFIC STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,department_name,,https://github.com/stanford-policylab/opp/blob...
367,Maryland,Baltimore,Baltimore,CALLS FOR SERVICE,2017,Police Emergency and Non-Emergency calls to 911,ArcGIS,https://opendata.baltimorecity.gov/egis/rest/s...,,,,,


In [5]:
# Now further filter the dataset by looking for particular data in a particular state
# First look at the table data options for the state
df = opd.datasets.query(state="Maryland")
print(f"{df.iloc[0]['State']} has the following tables available: {df['TableType'].unique()}")


Maryland has the following tables available: ['TRAFFIC STOPS' 'COMPLAINTS' 'STOPS' 'CALLS FOR SERVICE' 'ARRESTS']


In [6]:
# For example query for all traffic stops data containing traffic stops in Maryland select the table_type using the names from the previous cell.
df = opd.datasets.query(table_type='TRAFFIC STOPS', state="Maryland")
df.head()
# To learn how to load the data open the notebook: loading_datasets.ipynb

Unnamed: 0,State,SourceName,Agency,TableType,Year,Description,DataType,URL,date_field,dataset_id,agency_field,min_version,readme
5,Maryland,Montgomery County,Montgomery County,TRAFFIC STOPS,MULTI,This dataset contains traffic violation inform...,Socrata,data.montgomerycountymd.gov,date_of_stop,4mse-ku6q,,,https://data.montgomerycountymd.gov/Public-Saf...
56,Maryland,Maryland,MULTI,TRAFFIC STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,department_name,,https://github.com/stanford-policylab/opp/blob...
