In [1]:
from sqlalchemy import create_engine
import pandas as pd
from fuzzywuzzy import process, fuzz
import geopy
from geopy.geocoders import Nominatim
from os import environ
from googlemaps import Client
import math

The database URL contains credentials that should be protected, so we use an environment variable.

In [2]:
connection_url =  environ["ICEURL"]

In [3]:
dataframe = pd.read_sql_table("locations", connection_url, "public")

We scraped TRAC (http://trac.syr.edu) detention center location data in a different Jupyter notebook, and saved as a CSV file.

In [4]:
trac_locations = pd.read_csv('trac_locations_corrected.csv')
choices = trac_locations['Facility Name']

# We're going to use Nominatim (geolocator) and google maps (client)
geolocator = Nominatim(user_agent="iceicedata")
geopy.geocoders.options.default_timeout = 7

client = Client(environ["APIKEY"])


In [5]:
def enrich_row(old_row):
    row = old_row.copy()
    if math.isnan(row['lat']):
        state = ''
        city = ''
        lat = math.nan
        lng = math.nan
        if row['state'] is not None:
            state = row['state']
            if row['city'] is not None:
                city = row['city']
            else:
                city = ''
        else:
            name = row['name']
            match = process.extractOne(name, choices, scorer=fuzz.token_set_ratio)
            if match[1] > 92:
                city = trac_locations.at[match[2], 'City']
                state = trac_locations.at[match[2], 'State']
                location = geolocator.geocode(city + ', ' + state)
                if location is not None:
                    lng = location.longitude
                    lat = location.latitude
                else:
                    lng = math.nan
                    lat = math.nan

        if state == '':
            # Try google
            place = client.places(name)
            if place['status'] == 'OK':
                lat = place['results'][0]['geometry']['location']['lat']
                lng = place['results'][0]['geometry']['location']['lng']
                location = geolocator.reverse(str(lat) + ', ' + str(lng))
                if location is not None:
                    if 'city' in location.raw['address']:
                        city = location.raw['address']['city']
                    elif 'hamlet' in location.raw['address']:
                        city = location.raw['address']['hamlet']
                    elif 'town' in location.raw['address']:
                        city = location.raw['address']['town']
                    elif 'village' in location.raw['address']:
                        city = location.raw['address']['village']
                    elif 'locality' in location.raw['address']:
                        city = location.raw['address']['locality']
                    else:
                        city = ''
        if not math.isnan(lat):
            row['city'] = city
            row['state'] = state
            row['lon'] = lng
            row['lat'] = lat
    return row


In [7]:
dataframe = dataframe.apply(enrich_row, axis=1)


### Notes



In [9]:
dataframe.to_csv('ice_locations.csv')

In [10]:
dataframe

Unnamed: 0,id,type,agency,program_type,name,address1,address2,address3,city,state,...,contact_last_name,date_first_record,date_recent_record,notes,phone,fax,url,email,created_at,updated_at
0,4,ICE ERO Field Office,ICE,,Atlanta Field Office,180 Ted Turner Dr. SW Suite 522,,,Atlanta,GA,...,,2016-07-26,2016-07-29,,(404) 893-1210,,https://www.ice.gov/contact/ero#wcm-survey-tar...,Atlanta.Outreach@ice.dhs.gov,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:46.973700+00:00
1,5,ICE ERO Field Office,ICE,,SND Field Office,,,,,,...,,2015-12-22,2017-01-22,,,,https://www.ice.gov/contact/ero#wcm-survey-tar...,,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:47.152500+00:00
2,6,ICE ERO Field Office,ICE,,San Diego Field Office,880 Front Street #2232,,,San Diego,CA,...,,2015-08-08,2016-09-26,,(619) 436-0410,,https://www.ice.gov/contact/ero#wcm-survey-tar...,SanDiego.Outreach@ice.dhs.gov,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:47.333600+00:00
3,7,ICE ERO Field Office,ICE,,Newark Field Office,970 Broad St. 11th Floor,,,Newark,NJ,...,,2016-02-17,2016-05-06,,(973) 645-3666,,https://www.ice.gov/contact/ero#wcm-survey-tar...,Newark.Outreach@ice.dhs.gov,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:47.514800+00:00
4,8,ICE ERO Field Office,ICE,,San Antonio Field Office,1777 NE Loop 410 Floor 15,,,San Antonio,TX,...,,2015-07-26,2016-04-26,,(210) 283-4750,,https://www.ice.gov/contact/ero#wcm-survey-tar...,SanAntonio.Outreach@ice.dhs.gov,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:47.689000+00:00
5,9,ICE ERO Field Office,ICE,,Phoenix Field Office,2035 N. Central Avenue,,,Phoenix,AZ,...,,2015-09-05,2016-01-06,,(602) 766-7030,,https://www.ice.gov/contact/ero#wcm-survey-tar...,Phoenix.Outreach@ice.dhs.gov,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:47.861700+00:00
6,10,ICE ERO Field Office,ICE,,Los Angeles Field Office,300 North Los Angeles St. Room 7631,,,Los Angeles,CA,...,,2015-12-26,2016-09-26,,(213) 830-7911,,https://www.ice.gov/contact/ero#wcm-survey-tar...,LosAngeles.Outreach@ice.dhs.gov,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:48.029500+00:00
7,12,ICE or Shared Facility,,JUVENILE,ABRAXAS ACADEMY DETENTION CENTER,,,,New Morgan,,...,,2017-02-24,2018-04-20,,,,,,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:48.199900+00:00
8,13,ICE or Shared Facility,,DIGSA,ADELANTO ICE PROCESSING CENTER,,,,Adelanto,,...,,2018-04-16,2018-04-19,,,,,,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:48.379300+00:00
9,14,ICE or Shared Facility,ICE,FAMILY,ARTESIA FAMILY RESIDENTIAL CENTER,,,,Artesia,New Mexico,...,,2014-07-19,2014-12-18,,,,,,2018-07-10 19:44:58.934400+00:00,2018-07-11 06:46:48.547000+00:00
