In [34]:
# Libraries for data munging and reverse geocoding
import pandas as pd
import numpy as np
from geopy.geocoders import ArcGIS
from geopy.extra.rate_limiter import RateLimiter

In [95]:
# Ingest testing data set
data_url = "https://raw.githubusercontent.com/ncov19-us/ds/master/drive_thru_testing_locations/us-drive-thru-testing-locations.csv"
df = pd.read_csv(data_url, index_col=0)
print(df.shape)
df.head()

(447, 4)


Unnamed: 0,Name,URL,Latitude,Longitude
0,Collaborative Effort of Health Care Providers,https://www.adn.com/alaska-news/anchorage/2020...,61.18287,-149.837269
1,Alaska Healthcare,https://www.tomsguide.com/news/drive-through-c...,61.18262,-149.83806
2,Fairbanks\' Foundation Health Partners,http://www.newsminer.com/alerts/fairbanks-laun...,64.83998,-147.71432
3,Fairbanks Memorial Hospital,https://www.adn.com/alaska-news/2020/03/25/som...,64.832649,-147.741562
4,Church of the Highlands- Grant\'s Mill,https://www.google.com/amp/s/www.wvtm13.com/am...,33.516999,-86.655847


In [35]:
# Create geocoder and impose rate limit to keep providers happy (not sure if it works lol)
geolocator = Nominatim(user_agent="drive-thru-testing-reverse-geocoding")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=0.25)

In [75]:
# Create Coordinates series (tuple of latitude and longitude values)
df["Coordinates"] = list(zip(*[df["Latitude"], df["Longitude"]]))
df["Coordinates"] = df["Coordinates"].astype(str)
df.head()

Unnamed: 0,Name,URL,Latitude,Longitude,Coordinates
0,Collaborative Effort of Health Care Providers,https://www.adn.com/alaska-news/anchorage/2020...,61.18287,-149.837269,"(61.1828699, -149.837269)"
1,Alaska Healthcare,https://www.tomsguide.com/news/drive-through-c...,61.18262,-149.83806,"(61.18261999999999, -149.83806)"
2,Fairbanks\' Foundation Health Partners,http://www.newsminer.com/alerts/fairbanks-laun...,64.83998,-147.71432,"(64.83998000000001, -147.71432)"
3,Fairbanks Memorial Hospital,https://www.adn.com/alaska-news/2020/03/25/som...,64.832649,-147.741562,"(64.832649, -147.74156200000004)"
4,Church of the Highlands- Grant\'s Mill,https://www.google.com/amp/s/www.wvtm13.com/am...,33.516999,-86.655847,"(33.51699910000001, -86.6558468)"


In [83]:
# Define a convenience function to clean and reverse geocode coordinates cell contents, returns address
def reverse_geocode(cell_contents):
    cellcontents = cell_contents.replace("(","").replace(")","")
    location = geolocator.reverse(cellcontents)
    return location.address

In [85]:
# Test on first ten rows of dataframe
df.iloc[:10]["Coordinates"].apply(reverse_geocode)

0    2343, East 42nd Avenue, Rogers Park, Green Acr...
1    2301, East 42nd Avenue, Rogers Park, Green Acr...
2    Westmark Fairbanks Hotel, Noble Street, South ...
3    Fairbanks Memorial Hospital, 1650, Cowles Stre...
4    Church of the Highlands, 5901, Overton Road, M...
5    University of Arkansas Medical Sciences Medica...
6    898, South 40th Street, Apple Spur, Rogers, Be...
7    Conway Regional Medical Center, 2302, College ...
8    East Mayo Boulevard, Oakhurst, Phoenix, Marico...
9    Mayo Clinic Hospital Scottsdale Campus, 13400,...
Name: Coordinates, dtype: object

In [86]:
# Apply to whole dataframe and save results as new column
df["Addresses"] = df["Coordinates"].apply(reverse_geocode)

In [87]:
# Display results
df.head()

Unnamed: 0,Name,URL,Latitude,Longitude,Coordinates,Addresses
0,Collaborative Effort of Health Care Providers,https://www.adn.com/alaska-news/anchorage/2020...,61.18287,-149.837269,"(61.1828699, -149.837269)","2343, East 42nd Avenue, Rogers Park, Green Acr..."
1,Alaska Healthcare,https://www.tomsguide.com/news/drive-through-c...,61.18262,-149.83806,"(61.18261999999999, -149.83806)","2301, East 42nd Avenue, Rogers Park, Green Acr..."
2,Fairbanks\' Foundation Health Partners,http://www.newsminer.com/alerts/fairbanks-laun...,64.83998,-147.71432,"(64.83998000000001, -147.71432)","Westmark Fairbanks Hotel, Noble Street, South ..."
3,Fairbanks Memorial Hospital,https://www.adn.com/alaska-news/2020/03/25/som...,64.832649,-147.741562,"(64.832649, -147.74156200000004)","Fairbanks Memorial Hospital, 1650, Cowles Stre..."
4,Church of the Highlands- Grant\'s Mill,https://www.google.com/amp/s/www.wvtm13.com/am...,33.516999,-86.655847,"(33.51699910000001, -86.6558468)","Church of the Highlands, 5901, Overton Road, M..."


In [88]:
# Check for no data values
df["Addresses"].isna().sum()

0

In [94]:
# Save to csv file
df.to_csv("../drive_thru_testing_locations/locations-with-addresses.csv", index=0)