This notebook shows an example of how to load a dataset. 
It assumes you found the dataset using techniques shown in `finding_datasets.ipynb`
The basic steps it demonstrates to load data is:
1. Find available datasets with `opd.datasets.query`
2. Create a data source using `opd.Source` and information from the previous step.
3. Find available data types for given years using `get_tables_types` and `get_years`
4. Load the data type for a given year using `load_from_url`

In [1]:
try:
    import pandas as pd    
    import openpolicedata as opd   #This import should be last in the try block because the expect block will only try to load it
except:
    import sys
    sys.path.append('../openpolicedata')
    import openpolicedata as opd

In [2]:
# We will load Montgormery County, Maryland traffic stop data. First show our dataset options.
df = opd.datasets.query(table_type='TRAFFIC STOPS', state="Maryland")
df.head()


Unnamed: 0,State,SourceName,Agency,TableType,Year,Description,DataType,URL,date_field,dataset_id,agency_field,min_version,readme
5,Maryland,Montgomery County,Montgomery County,TRAFFIC STOPS,MULTI,This dataset contains traffic violation inform...,Socrata,data.montgomerycountymd.gov,date_of_stop,4mse-ku6q,,,https://data.montgomerycountymd.gov/Public-Saf...
56,Maryland,Maryland,MULTI,TRAFFIC STOPS,MULTI,Standardized stop data from the Stanford Open ...,CSV,https://stacks.stanford.edu/file/druid:yg821jf...,date,,department_name,,https://github.com/stanford-policylab/opp/blob...


In [3]:
# To access the data, create a source using a Source Name (usually a police department name). There is an optional state input to clarify ambiguities.
# We will use the above cell's information for Maryland to choose the agency "Montgomery County" which we select for the source_name

src = opd.Source(source_name="Montgomery County", state="Maryland")
src.datasets.head()

Unnamed: 0,State,SourceName,Agency,TableType,Year,Description,DataType,URL,date_field,dataset_id,agency_field,min_version,readme
5,Maryland,Montgomery County,Montgomery County,TRAFFIC STOPS,MULTI,This dataset contains traffic violation inform...,Socrata,data.montgomerycountymd.gov,date_of_stop,4mse-ku6q,,,https://data.montgomerycountymd.gov/Public-Saf...
6,Maryland,Montgomery County,Montgomery County,COMPLAINTS,MULTI,This dataset contains allegations brought to t...,Socrata,data.montgomerycountymd.gov,created_dt,usip-62e2,,,https://data.montgomerycountymd.gov/Public-Saf...


In [4]:
# Find out what types of data are available from this source
types = src.get_tables_types()

print(types)

['TRAFFIC STOPS', 'COMPLAINTS']


In [5]:
# Find out what years are available from the stops table
# IF you do not have a key setup you may see the message: "WARNING:root:Requests made without an app_token will be subject to strict throttling limits." This is normal.
years = src.get_years(table_type=types[0])
print(years)

[2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]


In [6]:
# Load traffic stop data for 2021
t = src.load_from_url(year=2021, table_type='TRAFFIC STOPS')

In [7]:
# Show the first 5 rows of the table
t.table.head(n=5)
# Now you are ready for analyzing the data in the table t.

Unnamed: 0,geometry,seq_id,date_of_stop,time_of_stop,agency,subagency,description,location,latitude,longitude,...,driver_state,dl_state,arrest_type,search_conducted,search_outcome,search_reason_for_stop,search_disposition,search_reason,search_type,search_arrest_reason
0,POINT (-77.27504 39.14653),123add05-d3d2-428d-9932-66bc30831388,2021-01-01,23:03:00,MCP,"5th District, Germantown",DISPLAYING EXPIRED REGISTRATION PLATE ISSUED B...,GREAT SENECA @ WSSC ENTRANCE,39.1465333333333,-77.2750433333333,...,MD,MD,Q - Marked Laser,,,,,,,
1,POINT (-77.27504 39.14653),123add05-d3d2-428d-9932-66bc30831388,2021-01-01,23:03:00,MCP,"5th District, Germantown",EXCEEDING POSTED MAXIMUM SPEED LIMIT: 64 MPH I...,GREAT SENECA @ WSSC ENTRANCE,39.1465333333333,-77.2750433333333,...,MD,MD,Q - Marked Laser,,,,,,,
2,POINT (-77.27504 39.14653),123add05-d3d2-428d-9932-66bc30831388,2021-01-01,23:03:00,MCP,"5th District, Germantown",KNOWINGLY DRIVING UNINSURED VEHICLE,GREAT SENECA @ WSSC ENTRANCE,39.1465333333333,-77.2750433333333,...,MD,MD,Q - Marked Laser,,,,,,,
3,POINT (-77.27285 39.14366),1b7c9229-d80f-4ed2-9692-d24a6fbda5c7,2021-01-01,22:43:00,MCP,"5th District, Germantown",DRIVING VEHICLE IN EXCESS OF REASONABLE AND PR...,GREAT SENECA @ HORN POINT,39.1436583333333,-77.2728533333333,...,MD,MD,A - Marked Patrol,No,Warning,21-801(a),,,,
4,POINT (-77.27405 39.17419),0c6f50ae-d462-4356-8319-e1f035dc00fc,2021-01-01,22:20:00,MCP,"5th District, Germantown",DRIVER CHANGING LANES WHEN UNSAFE,118 @ WALTERJOHNSON,39.174195,-77.274045,...,MD,MD,A - Marked Patrol,No,Warning,21-309(b),,,,
