In [1]:
# import requests
import pandas as pd
# import os
# from dotenv import load_dotenv

## Getting ORI codes

In [2]:
agencies = pd.read_csv('../cleansed data/via api.data.gov U.S. law enforcement agencies and ORI (Originating Agency Identifier) numbers [collected 2020-06-12] - Sheet1.csv')

In [3]:
agencies.shape

(18575, 13)

In [4]:
agencies.head()

Unnamed: 0,ori,agency_name,agency_type_name,state_name,state_abbr,division_name,region_name,region_desc,county_name,nibrs,latitude,longitude,nibrs_start_date
0,AK0010100,Anchorage Police Department,Municipality,Alaska,AK,Pacific,West,Region IV,ANCHORAGE,False,61.17425,-149.284329,
1,AK0010200,Fairbanks Police Department,Borough,Alaska,AK,Pacific,West,Region IV,FAIRBANKS NORTH STAR,False,64.83945,-147.71942,
2,AK0010300,Juneau Police Department,City and Borough,Alaska,AK,Pacific,West,Region IV,JUNEAU,False,58.356556,-134.50731,
3,AK0010400,Ketchikan Police Department,Borough,Alaska,AK,Pacific,West,Region IV,KETCHIKAN GATEWAY,False,55.449938,-131.106685,
4,AK0010500,Kodiak Police Department,Borough,Alaska,AK,Pacific,West,Region IV,KODIAK ISLAND,False,57.8049,-152.37332,


In [5]:
# create a copy of agencies
agencies_copy = agencies.copy()

In [6]:
agencies_copy['agency_type_name'].unique()

array(['Municipality', 'Borough', 'City and Borough', 'Census Area',
       'County', 'City', 'University or College', 'Other State Agency',
       'Other', 'Tribal', 'State Police', 'Parish'], dtype=object)

In [7]:
# agencies_copy = agencies_copy[agencies_copy['agency_type_name']
#                               .isin(['City', 'Municipality', 'Parish', 'City and Borough'])]

In [8]:
# remove ' Police Department' in agency_name
agencies_copy['agency_name'] = agencies_copy['agency_name'] \
    .str.replace(' Police Department', '')

In [9]:
# remove ' Metropolitan' in agency_name
agencies_copy['agency_name'] = agencies_copy['agency_name'] \
    .str.replace(' Metropolitan', '')

In [10]:
# remove ' Bureau of Police' in agency_name
agencies_copy['agency_name'] = agencies_copy['agency_name'] \
    .str.replace(' Bureau of Police', '')

In [11]:
# remove ' Metro' in agency_name
agencies_copy['agency_name'] = agencies_copy['agency_name'] \
    .str.replace(' Metro', '')

In [12]:
cities = pd.read_csv('../raw data/cities.csv')

In [13]:
cities

Unnamed: 0,State,City
0,Alabama,Huntsville
1,Alaska,Anchorage
2,American Samoa,Tafuna
3,Arizona,Phoenix
4,Arkansas,Little Rock
...,...,...
270,Virginia,Arlington
271,Washington,Bellevue
272,West Virginia,Wheeling
273,Wisconsin,Racine


In [14]:
cities['State'].unique()

array(['Alabama', 'Alaska', 'American Samoa', 'Arizona', 'Arkansas',
       'California', 'Colorado', 'Connecticut', 'Delaware',
       'District of Columbia', 'Florida', 'Georgia', 'Guam', 'Hawaii',
       'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky',
       'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Michigan',
       'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska',
       'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
       'North Carolina', 'North Dakota', 'Northern Mariana Islands',
       'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Puerto Rico',
       'Rhode Island', 'South Carolina', 'South Dakota', 'Tennessee',
       'Texas', 'Utah', 'Vermont', 'Virgin Islands (U.S.)', 'Virginia',
       'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype=object)

In [15]:
cities.shape

(275, 2)

In [16]:
# replace Washington, D.C. in cities with Washington
cities['City'] = cities['City'].replace('Washington, D.C.', 'Washington')

In [17]:
cities = cities[~cities['State'].isin(['American Samoa', 
                                       'Virgin Islands (U.S.)', 
                                       'Northern Mariana Islands', 
                                       'Guam'])]

In [18]:
cities.shape

(256, 2)

In [19]:
def filter_agencies(row):
    '''takes a row from the cities dataframe and 
    filters the agencies_copy dataframe based on the State and City columns.'''
    return agencies_copy[(agencies_copy['agency_name'] == row['City']) 
                         & (agencies_copy['state_name'] == row['State'])]

# Use the apply function on the cities dataframe to apply the function to each row.
filtered_agencies = cities.apply(filter_agencies, axis=1)

# axis: {0 or ‘index’, 1 or ‘columns’}, default 0
# Axis along which the function is applied:
# 0 or ‘index’: apply function to each column.
# 1 or ‘columns’: apply function to each row.

In [20]:
# return a single dataframe containing only the rows from agencies_copy 
# that match every City and State pair in cities.
filtered_agencies = pd.concat(cities.apply(filter_agencies, axis=1).tolist())

In [21]:
filtered_agencies

Unnamed: 0,ori,agency_name,agency_type_name,state_name,state_abbr,division_name,region_name,region_desc,county_name,nibrs,latitude,longitude,nibrs_start_date
330,AL0470100,Huntsville,City,Alabama,AL,East South Central,South,Region III,LIMESTONE; MADISON,False,34.764238,-86.551080,
0,AK0010100,Anchorage,Municipality,Alaska,AK,Pacific,West,Region IV,ANCHORAGE,False,61.174250,-149.284329,
817,AZ0072300,Phoenix,City,Arizona,AZ,Mountain,West,Region IV,MARICOPA,False,33.448250,-112.081700,
696,AR0600200,Little Rock,City,Arkansas,AR,West South Central,South,Region III,PULASKI,True,34.749084,-92.277565,08/01/2002
1117,CA0194200,Los Angeles,City,California,CA,Pacific,West,Region IV,LOS ANGELES,False,34.196398,-118.261862,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17265,VT0020100,Bennington,City,Vermont,VT,New England,Northeast,Region I,BENNINGTON,True,43.035325,-73.111460,01/01/1998
17416,WA0170200,Bellevue,City,Washington,WA,Pacific,West,Region IV,KING,True,47.493554,-121.832375,01/01/2015
18325,WV0350100,Wheeling,City,West Virginia,WV,South Atlantic,South,Region III,MARSHALL; OHIO,True,40.064915,-80.720924,02/01/1999
17918,WI0520200,Racine,City,Wisconsin,WI,East North Central,Midwest,Region II,RACINE,True,42.725280,-87.789406,01/01/2017


In [25]:
# select ori, agency_name, state_name, and agency_type_name columns
filtered_agencies = filtered_agencies[['ori', 'agency_name', 'state_name', 'agency_type_name']]

In [27]:
# output filtered_agencies to a csv file
filtered_agencies.to_csv('../cleansed data/agencies.csv', index=False)

In [22]:
# find out which City in cities are not in agencies_copy
missing_cities = cities[~cities['City'].isin(filtered_agencies['agency_name'])]

In [23]:
# order missing_cities by State
missing_cities.sort_values(by='State')

Unnamed: 0,State,City
176,Georgia,Macon
121,Georgia,Augusta
231,Georgia,Savannah
68,Hawaii,East Honolulu
123,Hawaii,Pearl City
233,Hawaii,Kailua
178,Hawaii,Hilo
132,Maryland,Germantown
242,Maryland,Waldorf
187,Maryland,Silver Spring


In [24]:
missing_cities.shape

(23, 2)