In [1]:
import uszipcode
from uszipcode import SearchEngine
import geopy
from geopy.geocoders import Nominatim
import pandas as pd
import os
import csv
from pathlib import Path
import datetime as dt



In [2]:
# load zip code dictionary created in previous file
zipcode_file = Path('./resources/zipcodes.csv')
zipcodes_df = pd.read_csv(zipcode_file)
zipcodes_df

Unnamed: 0,RegionID,City,State,Zip Code
0,6181,New York,New York,10001
1,17426,Chicago,Illinois,60601
2,18959,Las Vegas,Nevada,89101
3,38128,Dallas,Texas,75201
4,10920,Columbus,Ohio,43085
...,...,...,...,...
1030,49592,New Hope,Pennsylvania,18938
1031,48710,Arnold,California,95223
1032,12306,Langhorne,Pennsylvania,19047
1033,33433,Pingree Grove,Illinois,60081


## CREATE GEOCODING API SEARCH TO ADD COORDINATE DATA

### We'll use the Nominatim API through GeoPy

In [3]:
# Create empty dictionaries to store our results
zipcodes_coords_dict = {"RegionID": [], "City": [], "State": [], "Zip Code": [], "Latitude": [], "Longitude": []}
bad_coords = {"RegionID": [], "City": [], "State": [], "Zip Code": []}
geolocator = Nominatim(user_agent="coords_locator")
for i in zipcodes_df.index:
    try:
        country= "United States"
        place = zipcodes_df["Zip Code"].iloc[i]
        city=zipcodes_df['City'].iloc[i]
        state=zipcodes_df['State'].iloc[i]
        regionId=zipcodes_df['RegionID'].iloc[i]
        location = geolocator.geocode({"postalcode": place, "country": country})
        for loc in location:
            if regionId not in zipcodes_coords_dict["RegionID"]:
                zipcodes_coords_dict['RegionID'].append(regionId)
                zipcodes_coords_dict['City'].append(city)
                zipcodes_coords_dict['State'].append(state)
                zipcodes_coords_dict['Zip Code'].append(place)
                zipcodes_coords_dict['Latitude'].append(location.latitude)
                zipcodes_coords_dict['Longitude'].append(location.longitude)
                print((int(i)+1, city, state, location.latitude, location.longitude))
    except:
        # Add locations with errors to separate dictionary
        bad_coords['RegionID'].append(regionId)
        bad_coords['City'].append(city)
        bad_coords['State'].append(state)
        bad_coords['Zip Code'].append(place)
        print(f"--------No coords found for {city}, {state}--------")
        pass

(1, 'New York', 'New York', 40.74841846379893, -73.99414662538305)
(2, 'Chicago', 'Illinois', 41.88590973888887, -87.62384881079517)
(3, 'Las Vegas', 'Nevada', 36.16754012512261, -115.1397390708225)
(4, 'Dallas', 'Texas', 32.78591838122747, -96.79898740986125)
(5, 'Columbus', 'Ohio', 40.09937865821035, -83.01535635988175)
(6, 'Louisville', 'Kentucky', 38.251258306496524, -85.74953130976665)
(7, 'Orlando', 'Florida', 28.54369969285714, -81.37867734893716)
(8, 'Denver', 'Colorado', 39.75047244283387, -104.99667202434155)
(9, 'Washington', 'District of Columbia', 38.91577904545211, -77.01780075676825)
(10, 'Portland', 'Oregon', 45.50849526040868, -122.6927202533267)
(11, 'Nashville', 'Tennessee', 36.165390218181834, -86.77491335920963)
(12, 'Milwaukee', 'Wisconsin', 43.04878110657971, -87.89918243948371)
(13, 'Omaha', 'Nebraska', 41.26096203127026, -95.93490843192167)
(14, 'Oklahoma City', 'Oklahoma', 35.45077612039714, -97.51630135887922)
(15, 'Raleigh', 'North Carolina', 35.775696271874

(119, 'Chapel Hill', 'North Carolina', 35.944320610329264, -79.03800256738151)
(120, 'Rialto', 'California', 34.1151291974359, -117.39435318791276)
(121, 'Columbia', 'Maryland', 39.21138239615816, -76.87965350577723)
(122, 'Meridian', 'Idaho', 43.59069012245927, -116.3883152851739)
(123, 'Lawrence', 'Kansas', 38.96682736099623, -95.23878052268516)
(124, 'Greeley', 'Colorado', 40.41194859375, -104.69730476904851)
(125, 'League City', 'Texas', 29.498070607389014, -95.10865092359022)
(126, 'Mission Viejo', 'California', 33.614618414436414, -117.66557058051434)
(127, 'Livermore', 'California', 37.67517326263379, -121.75509123023458)
(128, 'Portsmouth', 'Virginia', 36.80965797236636, -76.3692236804929)
(129, 'Hawthorne', 'California', 33.92291718197, -118.34873156477205)
(130, 'Johnson City', 'Tennessee', 36.33432894886623, -82.34241039662975)
(131, 'Idaho Falls', 'Idaho', 43.51971125645162, -112.0075389240074)
(132, 'Asheville', 'North Carolina', 35.595068857748934, -82.55510534709673)
(13

(236, 'Montebello', 'California', 34.01425617370489, -118.11237303437503)
(237, 'Saint Cloud', 'Minnesota', 45.526898974076474, -94.1851679709843)
(238, 'Eden Prairie', 'Minnesota', 44.86269202041034, -93.43036091653731)
(239, 'San Clemente', 'California', 33.42867653813865, -117.61328314788199)
(240, 'Broomfield', 'Colorado', 39.921015727564956, -105.08084169543334)
(241, 'Coon Rapids', 'Minnesota', 46.628359980000006, -96.08378365572295)
(242, 'Blaine', 'Minnesota', 44.62206704814815, -93.76992322462141)
(243, 'Pasadena', 'Maryland', 39.127007732932626, -76.51941913576832)
(244, 'Port Orange', 'Florida', 29.111855296293346, -80.986852164141)
(245, 'La Habra', 'California', 33.93124614939487, -117.94943535596474)
(246, 'Burnsville', 'Minnesota', 44.73009131651001, -93.2898034065909)
(247, 'Land O Lakes', 'Florida', 28.268106871999997, -82.46776935485723)
(248, 'Monterey Park', 'California', 34.0503994777145, -118.14234084978287)
(249, 'Albany', 'Oregon', 44.62678736836734, -123.126081

(354, 'Portage', 'Michigan', 42.21680461537523, -85.5674588789129)
(355, 'Littleton', 'Colorado', 39.59144931515734, -105.00632209026107)
(356, 'Kernersville', 'North Carolina', 36.117176557617924, -80.0823898286159)
(357, 'Rowlett', 'Texas', 32.89688608657792, -96.55169998748639)
(358, 'Hammond', 'Louisiana', 30.517430095267617, -90.47423638740541)
(359, 'Wauwatosa', 'Wisconsin', 44.06994201972788, -89.27703594158014)
--------No coords found for Bloomfield, New Jersey--------
(361, 'Collierville', 'Tennessee', 35.05676957246097, -89.6812761920695)
(362, 'Roseville', 'Michigan', 42.504736373394046, -82.93706049771544)
(363, 'Town Of Ithaca', 'New York', 42.447515129506016, -76.49229344437771)
(364, 'Minnetonka', 'Minnesota', 44.91579431822359, -93.485304449135)
(365, 'Elmhurst', 'Illinois', 41.895432758628225, -87.94569578223089)
(366, 'Brighton', 'Michigan', 42.55934217724389, -83.7590011130828)
(367, 'Hinesville', 'Georgia', 31.832126334552846, -81.61616751975241)
(368, 'Mentor', 'Oh

--------No coords found for Braintree, Massachusetts--------
(474, 'District Heights', 'Maryland', 38.855186761891, -76.88671936851603)
(475, 'McMinnville', 'Oregon', 45.20641535809523, -123.19507429998488)
(476, 'Roy', 'Utah', 41.192245790909126, -112.0399548185829)
--------No coords found for Trumbull, Connecticut--------
(478, 'Claremont', 'California', 34.10685855176798, -117.72093207581015)
(479, 'Altadena', 'California', 34.185171679088334, -118.13678881710561)
(480, 'East Meadow', 'New York', 40.714572017965686, -73.55732920325784)
(481, 'Cedar Falls', 'Iowa', 42.52367430234701, -92.45385172972375)
(482, 'Sykesville', 'Maryland', 39.396878916991405, -76.96078083365084)
--------No coords found for Ewing Township, New Jersey--------
(484, 'Grovetown', 'Georgia', 33.46502324942007, -82.2035258428956)
(485, 'Menomonee Falls', 'Wisconsin', 43.15229042573331, -88.11171683054043)
(486, 'San Juan Capistrano', 'California', 33.500003056862624, -117.65808522503639)
(487, 'Richfield', 'Min

(591, 'Selma', 'California', 36.56704752142857, -119.60222791581886)
(592, 'Zeeland', 'Michigan', 42.81693481069584, -86.0134521298613)
(593, 'Clemmons', 'North Carolina', 36.02445738598686, -80.38515283161685)
(594, 'Ozark', 'Missouri', 37.01766076956627, -93.21432560058518)
(595, 'San Carlos', 'California', 37.50418320617285, -122.26091515769629)
(596, 'Town Of Cicero', 'New York', 43.17452624341221, -76.08784566701782)
(597, 'Southlake', 'Texas', 32.94244713379026, -97.14838330574563)
(598, 'Liberty', 'Missouri', 39.24676910867901, -94.42770166799669)
(599, 'College Park', 'Maryland', 38.998429446043126, -76.92584193832796)
(600, 'La Place', 'Louisiana', 30.074070121048006, -90.48587061493494)
(601, 'Brownstown', 'Michigan', 41.84730413599998, -85.1922097036033)
(602, 'Granger', 'Indiana', 41.741181938693074, -86.14954570577834)
(603, 'Brainerd', 'Minnesota', 46.35665005160889, -94.19205261118756)
(604, 'Hermiston', 'Oregon', 45.848520228294234, -119.29366343609782)
(605, 'Nixa', 'M

(709, 'Forest', 'Virginia', 37.35759644156747, -79.29313571572088)
(710, 'Summerfield', 'Florida', 28.99675514782607, -82.02420332099717)
(711, 'Ashland', 'Oregon', 42.19823617976544, -122.69905093962696)
(712, 'Zachary', 'Louisiana', 30.64461050748888, -91.1523657803527)
(713, 'Navarre', 'Florida', 30.41699983716098, -86.84054576818205)
(714, 'Blacklick', 'Ohio', 40.00311996917646, -82.81436607211474)
(715, 'Okemos', 'Michigan', 42.7073021188529, -84.41703153518188)
(716, 'North Wales', 'Pennsylvania', 40.23204461035551, -75.23906920369679)
(717, 'Berea', 'Kentucky', 37.58012345201914, -84.27728170871461)
--------No coords found for Walpole, Massachusetts--------
--------No coords found for Hudson, New Hampshire--------
(720, 'Caledonia', 'Wisconsin', 42.75177689527376, -87.94865956649869)
(721, 'Groveton', 'Virginia', 36.959141169117636, -79.33331307009804)
(722, 'Onalaska', 'Wisconsin', 43.89859544470589, -91.22872858654618)
(723, 'Batavia', 'Illinois', 41.84540926621628, -88.319134

--------No coords found for Sparta Township, New Jersey--------
(828, 'Lafayette', 'Colorado', 39.995903159749886, -105.10598925752025)
(829, 'Key West', 'Florida', 24.563751295499152, -81.77080066437927)
(830, 'Dewitt', 'Michigan', 42.83828769735552, -84.56881128467799)
(831, 'Pinckney', 'Michigan', 42.450936730337666, -83.91889596862413)
(832, 'Walker', 'Louisiana', 30.515270341605948, -90.85102346229824)
(833, 'Blairsville', 'Georgia', 34.87117950362243, -84.00032112914717)
(834, 'Berlin', 'Maryland', 38.35449270199929, -75.17276092314553)
(835, 'Union', 'Kentucky', 38.94487811049342, -84.67734565453912)
(836, 'Parma Heights', 'Ohio', 41.414025266937244, -81.57088519607358)
--------No coords found for New Canaan, Connecticut--------
(838, 'Crestwood', 'Kentucky', 38.339101416581364, -85.4437636790567)
(839, 'Sachse', 'Texas', 32.96435141454353, -96.58650645274818)
(840, 'Pelham', 'Alabama', 33.31783611842106, -86.79325843713235)
--------No coords found for Hazlet Township, New Jerse

(946, 'Aldie', 'Virginia', 38.941545524009285, -77.57231997956907)
(947, 'Locust Grove', 'Virginia', 38.31365194751016, -77.77923869997834)
(948, 'Youngsville', 'North Carolina', 36.004760714259874, -78.45476518823118)
--------No coords found for Barrington, Rhode Island--------
(950, 'Moore', 'South Carolina', 34.88206299865141, -82.02092256621113)
(951, 'Chester Springs', 'Pennsylvania', 40.09331163872953, -75.64793194891806)
(952, 'Clayton', 'California', 37.934099295047176, -121.92266289308414)
(953, 'Chelsea', 'Michigan', 42.31433910363331, -84.01089917820539)
(954, 'Coatesville', 'Pennsylvania', 39.983051276091224, -75.82099240923473)
(955, 'New Albany', 'Ohio', 40.07742914181617, -82.82031715952151)
(956, 'Crowley', 'Texas', 32.57414479690964, -97.3900670678095)
(957, 'Red Bank', 'Tennessee', 36.54032006505375, -85.82654166746624)
(958, 'Woodland Park', 'Colorado', 38.99927291606494, -105.06081115524339)
(959, 'Madisonville', 'Louisiana', 30.428587706349695, -90.17848063400258)


In [4]:
# Create a dataframe of locations with no coordinates, to see how many we're losing from our dataset
bad_coords_df = pd.DataFrame(bad_coords, columns=["RegionID", "City", "State", "Zip Code"])
bad_coords_df

Unnamed: 0,RegionID,City,State,Zip Code
0,11722,Greensboro,North Carolina,27395
1,17759,Des Moines,Iowa,50307
2,33058,New Bedford,Massachusetts,2740
3,39558,Lynn,Massachusetts,1901
4,31525,Fall River,Massachusetts,2720
...,...,...,...,...
92,44095,Bedminster Township,New Jersey,7921
93,44310,Brigantine,New Jersey,8203
94,37950,Cold Springs,Nevada,89067
95,27950,Wildwood,New Jersey,8260


In [5]:
# create dataframe from viable locations
zip_coords_df_cleaned = pd.DataFrame(zipcodes_coords_dict, columns=['RegionID', 'City', 'State', 'Zip Code', 'Latitude', 'Longitude'])
zip_coords_df_cleaned

Unnamed: 0,RegionID,City,State,Zip Code,Latitude,Longitude
0,6181,New York,New York,10001,40.748418,-73.994147
1,17426,Chicago,Illinois,60601,41.885910,-87.623849
2,18959,Las Vegas,Nevada,89101,36.167540,-115.139739
3,38128,Dallas,Texas,75201,32.785918,-96.798987
4,10920,Columbus,Ohio,43085,40.099379,-83.015356
...,...,...,...,...,...,...
933,25643,Longboat Key,Florida,34228,27.392012,-82.641345
934,49592,New Hope,Pennsylvania,18938,40.348777,-74.994791
935,48710,Arnold,California,95223,38.257253,-120.328708
936,12306,Langhorne,Pennsylvania,19047,40.178802,-74.913333


In [6]:
# Export zip_coords_df to CSV, which will serve as our finalized dictionary of location data to unique RegionID
zip_coords_df_cleaned.to_csv("./resources/zipcodes_coordinates.csv", index=False)