In [1]:
import uszipcode
from uszipcode import SearchEngine
import geopy
from geopy.geocoders import Nominatim
import pandas as pd
import os
import csv
from pathlib import Path
import datetime as dt



In [2]:
# load zip code dictionary created in previous file
zipcode_file = Path('./resources/zipcodes.csv')
zipcodes_df = pd.read_csv(zipcode_file)
zipcodes_df

Unnamed: 0,RegionID,City,State,Zip Code
0,6181,New York,New York,10001
1,17426,Chicago,Illinois,60601
2,18959,Las Vegas,Nevada,89101
3,38128,Dallas,Texas,75201
4,10920,Columbus,Ohio,43085
...,...,...,...,...
1030,49592,New Hope,Pennsylvania,18938
1031,48710,Arnold,California,95223
1032,12306,Langhorne,Pennsylvania,19047
1033,33433,Pingree Grove,Illinois,60081


## CREATE GEOCODING API SEARCH TO ADD COORDINATE DATA

### We'll use the Nominatim API through GeoPy

In [None]:
# Create empty dictionaries to store our results
zipcodes_coords_dict = {"RegionID": [], "City": [], "State": [], "Zip Code": [], "Latitude": [], "Longitude": []}
bad_coords = {"RegionID": [], "City": [], "State": [], "Zip Code": []}
geolocator = Nominatim(user_agent="coords_locator")
for i in zipcodes_df.index:
    try:
        country= "United States"
        place = zipcodes_df["Zip Code"].iloc[i]
        city=zipcodes_df['City'].iloc[i]
        state=zipcodes_df['State'].iloc[i]
        regionId=zipcodes_df['RegionID'].iloc[i]
        location = geolocator.geocode({"postalcode": place, "country": country})
        for loc in location:
            if regionId not in zipcodes_coords_dict["RegionID"]:
                zipcodes_coords_dict['RegionID'].append(regionId)
                zipcodes_coords_dict['City'].append(city)
                zipcodes_coords_dict['State'].append(state)
                zipcodes_coords_dict['Zip Code'].append(place)
                zipcodes_coords_dict['Latitude'].append(location.latitude)
                zipcodes_coords_dict['Longitude'].append(location.longitude)
                print((int(i)+1, city, state, location.latitude, location.longitude))
    except:
        # Add locations with errors to separate dictionary
        bad_coords['RegionID'].append(regionId)
        bad_coords['City'].append(city)
        bad_coords['State'].append(state)
        bad_coords['Zip Code'].append(place)
        print(f"--------No coords found for {city}, {state}--------")
        pass

(1, 'New York', 'New York', 40.74841846379893, -73.99414662538305)
(2, 'Chicago', 'Illinois', 41.88590973888887, -87.62384881079517)
(3, 'Las Vegas', 'Nevada', 36.16754012512261, -115.1397390708225)
(4, 'Dallas', 'Texas', 32.78591838122747, -96.79898740986125)
(5, 'Columbus', 'Ohio', 40.09937865821035, -83.01535635988175)
(6, 'Louisville', 'Kentucky', 38.251258306496524, -85.74953130976665)
(7, 'Orlando', 'Florida', 28.54369969285714, -81.37867734893716)
(8, 'Denver', 'Colorado', 39.75047244283387, -104.99667202434155)
(9, 'Washington', 'District of Columbia', 38.91577904545211, -77.01780075676825)
(10, 'Portland', 'Oregon', 45.50849526040868, -122.6927202533267)
(11, 'Nashville', 'Tennessee', 36.1649490471154, -86.7749866054015)
(12, 'Milwaukee', 'Wisconsin', 43.04878110657971, -87.89918243948371)
(13, 'Omaha', 'Nebraska', 41.26096203127026, -95.93490843192167)
(14, 'Oklahoma City', 'Oklahoma', 35.45077612039714, -97.51630135887922)
(15, 'Raleigh', 'North Carolina', 35.77569627187468,

(119, 'Chapel Hill', 'North Carolina', 35.944320610329264, -79.03800256738151)
(120, 'Rialto', 'California', 34.1151291974359, -117.39435318791276)
(121, 'Columbia', 'Maryland', 39.21138239615816, -76.87965350577723)
(122, 'Meridian', 'Idaho', 43.59069012245927, -116.3883152851739)
(123, 'Lawrence', 'Kansas', 38.96682736099623, -95.23878052268516)
(124, 'Greeley', 'Colorado', 40.41194859375, -104.69730476904851)
(125, 'League City', 'Texas', 29.498070607389014, -95.10865092359022)
(126, 'Mission Viejo', 'California', 33.614618414436414, -117.66557058051434)
(127, 'Livermore', 'California', 37.67517326263379, -121.75509123023458)
(128, 'Portsmouth', 'Virginia', 36.80965797236636, -76.3692236804929)
(129, 'Hawthorne', 'California', 33.92291718197, -118.34873156477205)
(130, 'Johnson City', 'Tennessee', 36.33432894886623, -82.34241039662975)
(131, 'Idaho Falls', 'Idaho', 43.51971125645162, -112.0075389240074)
(132, 'Asheville', 'North Carolina', 35.595068857748934, -82.55510534709673)
(13

In [10]:
# Create a dataframe of locations with no coordinates, to see how many we're losing from our dataset
bad_coords_df = pd.DataFrame(bad_coords, columns=["RegionID", "City", "State", "Zip Code"])
bad_coords_df

Unnamed: 0,RegionID,City,State,Zip Code
0,11722,Greensboro,North Carolina,27395
1,17759,Des Moines,Iowa,50307
2,33058,New Bedford,Massachusetts,2740
3,39558,Lynn,Massachusetts,1901
4,31525,Fall River,Massachusetts,2720
...,...,...,...,...
92,44095,Bedminster Township,New Jersey,7921
93,44310,Brigantine,New Jersey,8203
94,37950,Cold Springs,Nevada,89067
95,27950,Wildwood,New Jersey,8260


In [11]:
# create dataframe from viable locations
zip_coords_df_cleaned = pd.DataFrame(zipcodes_coords_dict, columns=['RegionID', 'City', 'State', 'Zip Code', 'Latitude', 'Longitude'])
zip_coords_df_cleaned

Unnamed: 0,RegionID,City,State,Zip Code,Latitude,Longitude
0,6181,New York,New York,10001,40.748418,-73.994147
1,17426,Chicago,Illinois,60601,41.885910,-87.623849
2,18959,Las Vegas,Nevada,89101,36.167540,-115.139739
3,38128,Dallas,Texas,75201,32.785918,-96.798987
4,10920,Columbus,Ohio,43085,40.099379,-83.015356
...,...,...,...,...,...,...
933,25643,Longboat Key,Florida,34228,27.392012,-82.641345
934,49592,New Hope,Pennsylvania,18938,40.348777,-74.994791
935,48710,Arnold,California,95223,38.257253,-120.328708
936,12306,Langhorne,Pennsylvania,19047,40.178802,-74.913333


In [6]:
# Export zip_coords_df to CSV, which will serve as our finalized dictionary of location data to unique RegionID
#zip_coords_df_cleaned.to_csv("./resources/zipcodes_coordinates.csv", index=False)