In [1]:
import uszipcode
from uszipcode import SearchEngine
import geopy
from geopy.geocoders import Nominatim
import pandas as pd
import os
import csv
from pathlib import Path
import datetime as dt



In [2]:
# Load reduced data source with city and state names
city_file = Path("./resources/Sale_Prices_City/Sale_Prices_City_sample.csv")
city_data = pd.read_csv(city_file, low_memory=False)
city_data.head(10)

Unnamed: 0,RegionID,RegionName,StateName,SizeRank,2011-01,2011-02,2011-03,2011-04,2011-05,2011-06,...,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01,2020-02,2020-03
0,6181,New York,New York,1,462700.0,466200.0,465600.0,469500.0,471300.0,468600.0,...,563200.0,570500.0,572800.0,569900.0,560800.0,571500.0,575100.0,571700.0,568300.0,573600.0
1,17426,Chicago,Illinois,4,188500.0,189400.0,186300.0,177700.0,167900.0,174700.0,...,271500.0,266500.0,264900.0,265000.0,264100.0,264300.0,270000.0,281400.0,302900.0,309200.0
2,18959,Las Vegas,Nevada,8,115600.0,114000.0,110800.0,109600.0,108200.0,107500.0,...,266000.0,268200.0,268400.0,270400.0,272200.0,274400.0,277500.0,278900.0,283000.0,284700.0
3,38128,Dallas,Texas,10,191900.0,184500.0,174500.0,152000.0,162900.0,171400.0,...,300600.0,301100.0,301500.0,310100.0,314600.0,316900.0,307100.0,321500.0,315500.0,321700.0
4,10920,Columbus,Ohio,19,89100.0,86000.0,79800.0,79300.0,78600.0,82300.0,...,152600.0,154800.0,158500.0,157700.0,162900.0,166600.0,172700.0,175300.0,173600.0,177600.0
5,12455,Louisville,Kentucky,20,125900.0,124000.0,118300.0,117600.0,113800.0,115300.0,...,172700.0,175900.0,176000.0,173900.0,175400.0,174600.0,176600.0,170800.0,173100.0,176200.0
6,13121,Orlando,Florida,21,82000.0,82800.0,85500.0,87800.0,88800.0,87500.0,...,244200.0,244900.0,241900.0,241500.0,243600.0,247400.0,251100.0,254800.0,263500.0,270500.0
7,11093,Denver,Colorado,23,201400.0,205300.0,198800.0,198500.0,190900.0,195200.0,...,414800.0,418100.0,421400.0,433300.0,440500.0,445400.0,435500.0,435100.0,436900.0,442700.0
8,41568,Washington,District of Columbia,27,401100.0,386000.0,376200.0,362800.0,370300.0,378500.0,...,540000.0,532600.0,530600.0,542600.0,548000.0,566400.0,577600.0,580100.0,575700.0,586100.0
9,13373,Portland,Oregon,28,265700.0,261000.0,248100.0,238300.0,235400.0,235300.0,...,415100.0,416500.0,412900.0,422100.0,428900.0,432200.0,431900.0,435400.0,435200.0,439500.0


In [3]:
# check to see if there are duplicate city names
city_names_pivot = city_data.pivot_table(columns=['RegionName'], aggfunc='size').sort_values(ascending=False)
city_duplicates_df = city_names_pivot.reset_index()
city_duplicates_df.head(66)

Unnamed: 0,RegionName,0
0,Springfield,4
1,Washington,3
2,Franklin,3
3,Westfield,3
4,Bristol,3
...,...,...
61,Louisville,2
62,Medford,2
63,Richmond,2
64,Helena,2


## CREATE ENGINE TO PAIR ZIP CODES W/ USZIPCODE LIBRARY

In [4]:
engine = SearchEngine()
zipcodes_dict = {"RegionID": [], "City": [], "State": [], "Zip Code": []}
bad_cities = {"RegionID": [], "City": [], "State": []}
for i in city_data.index:
    try:
        city=city_data['RegionName'].iloc[i]
        state=city_data['StateName'].iloc[i]
        regionId=city_data['RegionID'].iloc[i]
        zipcodes = engine.by_city_and_state(city=city, state=state)
    except:
        # Add cities with errors to a dictionary of bad locations
        bad_cities["RegionID"].append(regionId)
        bad_cities["City"].append(city)
        bad_cities["State"].append(state)
        print(f"{city}, {state} not found")
        pass
    for zipcode in zipcodes:
        # Add only the first located zip code for each location to dictionary
        if regionId not in zipcodes_dict["RegionID"]:
            zipcodes_dict["RegionID"].append(regionId)
            zipcodes_dict["City"].append(city)
            zipcodes_dict['State'].append(state)
            zipcodes_dict['Zip Code'].append(zipcode.zipcode)
            print(int(i)+1, zipcode.zipcode, zipcode.major_city, zipcode.state)

1 10001 New York NY
2 60601 Chicago IL
3 89101 Las Vegas NV
4 75201 Dallas TX
5 43085 Columbus OH
6 40202 Louisville KY
7 32801 Orlando FL
8 80202 Denver CO
9 20001 Washington DC
10 97201 Portland OR
11 37201 Nashville TN
12 53202 Milwaukee WI
13 68102 Omaha NE
14 73102 Oklahoma City OK
15 27601 Raleigh NC
16 80902 Colorado Springs CO
17 55401 Minneapolis MN
18 70801 Baton Rouge LA
19 80010 Aurora CO
20 92801 Anaheim CA
21 27395 Greensboro NC
22 40502 Lexington KY
23 46802 Fort Wayne IN
24 55101 Saint Paul MN
25 34102 Naples FL
26 32301 Tallahassee FL
27 89002 Henderson NV
28 68502 Lincoln NE
29 99201 Spokane WA
30 32501 Pensacola FL
31 53703 Madison WI
32 92602 Irvine CA
33 89030 North Las Vegas NV
34 22201 Arlington VA
35 65802 Springfield MO
36 50307 Des Moines IA
37 22191 Woodbridge VA
38 32601 Gainesville FL
39 10701 Yonkers NY
40 60502 Aurora IL
41 28401 Wilmington NC
42 92646 Huntington Beach CA
43 97401 Eugene OR
44 34952 Port Saint Lucie FL
45 93901 Salinas CA
46 72201 Little 

356 75104 Cedar Hill TX
357 60169 Hoffman Estates IL
358 96744 Kaneohe HI
359 94568 Dublin CA
360 97470 Roseburg OR
361 74017 Claremore OK
362 47025 Lawrenceburg IN
363 49002 Portage MI
Murray, Utah not found
364 49002 Portage MI
365 80120 Littleton CO
366 27284 Kernersville NC
367 75088 Rowlett TX
368 70401 Hammond LA
369 55007 Brook Park MN
370 54982 Wautoma WI
371 07003 Bloomfield NJ
372 38017 Collierville TN
373 48066 Roseville MI
374 14850 Ithaca NY
375 55345 Minnetonka MN
376 60126 Elmhurst IL
Dunwoody, Georgia not found
377 60126 Elmhurst IL
378 48114 Brighton MI
379 31313 Hinesville GA
380 44060 Mentor OH
381 60115 Dekalb IL
382 08816 East Brunswick NJ
383 75098 Wylie TX
384 96707 Kapolei HI
385 30809 Evans GA
386 96720 Hilo HI
Evesham Township, New Jersey not found
387 96720 Hilo HI
388 77449 Katy TX
389 27526 Fuquay Varina NC
390 23690 Yorktown VA
391 55379 Shakopee MN
392 14586 West Henrietta NY
393 08807 Bridgewater NJ
394 45613 Beaver OH
395 27360 Thomasville NC
396 70448 

698 97116 Forest Grove OR
699 01880 Wakefield MA
700 64443 Easton MO
701 32578 Niceville FL
Shoreview, Minnesota not found
702 32578 Niceville FL
703 28734 Franklin NC
704 22025 Dumfries VA
705 43537 Maumee OH
706 32132 Edgewater FL
707 60091 Wilmette IL
708 23139 Powhatan VA
709 63755 Jackson MO
710 27560 Morrisville NC
711 35125 Pell City AL
712 28031 Cornelius NC
713 32789 Winter Park FL
715 91945 Lemon Grove CA
716 07480 West Milford NJ
717 21627 Crocheron MD
718 81212 Canon City CO
719 94510 Benicia CA
720 11784 Selden NY
721 03054 Merrimack NH
722 28658 Newton NC
723 47933 Crawfordsville IN
724 80401 Golden CO
725 35752 Hollywood AL
726 07450 Ridgewood NJ
727 37355 Manchester TN
728 86326 Cottonwood AZ
729 80439 Evergreen CO
730 37771 Lenoir City TN
731 54956 Neenah WI
732 84054 North Salt Lake UT
733 24551 Forest VA
734 34491 Summerfield FL
735 97520 Ashland OR
736 70791 Zachary LA
737 32566 Navarre FL
738 43004 Blacklick OH
739 48864 Okemos MI
740 19454 North Wales PA
741 40403

1042 37377 Signal Mountain TN
1043 03818 Conway NH
1044 72719 Centerton AR
1045 38060 Oakland TN
1046 19007 Bristol PA
1048 07928 Chatham NJ
1049 17050 Mechanicsburg PA
1050 32951 Melbourne Beach FL
1051 18951 Quakertown PA
1052 28613 Conover NC
1053 39525 Diamondhead MS
1054 08879 South Amboy NJ
1055 19067 Morrisville PA
1056 32569 Mary Esther FL
1057 80549 Wellington CO
1058 33947 Rotonda West FL
1059 07921 Bedminster NJ
1060 93546 Mammoth Lakes CA
1061 55362 Monticello MN
1062 08203 Brigantine NJ
1063 17011 Camp Hill PA
1064 89067 Coyote Springs NV
Williamsburg, Florida not found
1067 33931 Fort Myers Beach FL
1068 17356 Red Lion PA
1069 80421 Bailey CO
1070 30549 Jefferson GA
1071 80517 Estes Park CO
1072 19053 Feasterville Trevose PA
1073 75409 Anna TX
1074 19002 Ambler PA
1075 80534 Johnstown CO
1076 18055 Hellertown PA
1077 32003 Fleming Island FL
1078 15108 Coraopolis PA
Timber Pines, Florida not found
1081 19426 Collegeville PA
1082 15017 Bridgeville PA
1083 33957 Sanibel FL
1

In [5]:
# Create a dataframe of the bad locations to see how many we're removing from the datasource
bad_cities_df = pd.DataFrame(bad_cities, columns=["RegionID", "City", "State"])
bad_cities_df

Unnamed: 0,RegionID,City,State
0,38992,Highlands Ranch,Colorado
1,26561,Plymouth,Minnesota
2,37840,Cheektowaga,New York
3,17845,Eagan,Minnesota
4,10264,Bartlett,Tennessee
5,25383,Kentwood,Michigan
6,12751,Milwaukie,Oregon
7,6102,Murray,Utah
8,49352,Dunwoody,Georgia
9,51952,Evesham Township,New Jersey


In [6]:
# Create new dataframe from dictionary of locations with zip codes
zipcodes_df = pd.DataFrame(zipcodes_dict, columns=['RegionID', 'City', 'State', 'Zip Code'])
zipcodes_df

Unnamed: 0,RegionID,City,State,Zip Code
0,6181,New York,New York,10001
1,17426,Chicago,Illinois,60601
2,18959,Las Vegas,Nevada,89101
3,38128,Dallas,Texas,75201
4,10920,Columbus,Ohio,43085
...,...,...,...,...
1071,49592,New Hope,Pennsylvania,18938
1072,48710,Arnold,California,95223
1073,12306,Langhorne,Pennsylvania,19047
1074,33433,Pingree Grove,Illinois,60081


## CREATE GEOCODING API SEARCH TO ADD COORDINATE DATA

### We'll use the Nominatim API through GeoPy

In [7]:
# Create empty dictionaries to store our results
zipcodes_coords_dict = {"RegionID": [], "City": [], "State": [], "Zip Code": [], "Latitude": [], "Longitude": []}
bad_coords = {"RegionID": [], "City": [], "State": [], "Zip Code": []}
geolocator = Nominatim(user_agent="coord_locator")
for i in zipcodes_df.index:
    try:
        country= "United States"
        place = zipcodes_df["Zip Code"].iloc[i]
        city=zipcodes_df['City'].iloc[i]
        state=zipcodes_df['State'].iloc[i]
        regionId=zipcodes_df['RegionID'].iloc[i]
        location = geolocator.geocode({"postalcode": place, "country": country})
        # Add positive results to lcation dictionary
        zipcodes_coords_dict['RegionID'].append(regionId)
        zipcodes_coords_dict['City'].append(city)
        zipcodes_coords_dict['State'].append(state)
        zipcodes_coords_dict['Zip Code'].append(place)
        zipcodes_coords_dict['Latitude'].append(location.latitude)
        zipcodes_coords_dict['Longitude'].append(location.longitude)
        print((int(i)+1, city, state, location.latitude, location.longitude))
    except:
        # Add locations with errors to separate dictionary
        bad_coords['RegionID'].append(regionId)
        bad_coords['City'].append(city)
        bad_coords['State'].append(state)
        bad_coords['Zip Code'].append(place)
        print(f"No coords found for {city}, {state}")
        pass

(1, 'New York', 'New York', 40.74841846379893, -73.99414662538305)
(2, 'Chicago', 'Illinois', 41.88590973888887, -87.62384881079517)
(3, 'Las Vegas', 'Nevada', 36.16754012512261, -115.1397390708225)
(4, 'Dallas', 'Texas', 32.78591838122747, -96.79898740986125)
(5, 'Columbus', 'Ohio', 40.09937865821035, -83.01535635988175)
(6, 'Louisville', 'Kentucky', 38.251258306496524, -85.74953130976665)
(7, 'Orlando', 'Florida', 28.54369969285714, -81.37867734893716)
(8, 'Denver', 'Colorado', 39.75047244283387, -104.99667202434155)
(9, 'Washington', 'District of Columbia', 38.91577904545211, -77.01780075676825)
(10, 'Portland', 'Oregon', 45.50849526040868, -122.6927202533267)
(11, 'Nashville', 'Tennessee', 36.16546734623656, -86.77459649986355)
(12, 'Milwaukee', 'Wisconsin', 43.04878110657971, -87.89918243948371)
(13, 'Omaha', 'Nebraska', 41.26096203127026, -95.93490843192167)
(14, 'Oklahoma City', 'Oklahoma', 35.45077612039714, -97.51630135887922)
(15, 'Raleigh', 'North Carolina', 35.7756962718746

(119, 'Chapel Hill', 'North Carolina', 35.944320610329264, -79.03800256738151)
(120, 'Rialto', 'California', 34.1151291974359, -117.39435318791276)
(121, 'Columbia', 'Maryland', 39.21138239615816, -76.87965350577723)
(122, 'Meridian', 'Idaho', 43.59069012245927, -116.3883152851739)
(123, 'Lawrence', 'Kansas', 38.96682736099623, -95.23878052268516)
(124, 'Highlands Ranch', 'Colorado', 38.96682736099623, -95.23878052268516)
(125, 'Greeley', 'Colorado', 40.41194859375, -104.69730476904851)
(126, 'League City', 'Texas', 29.498070607389014, -95.10865092359022)
(127, 'Mission Viejo', 'California', 33.614618414436414, -117.66557058051434)
(128, 'Livermore', 'California', 37.67517326263379, -121.75509123023458)
(129, 'Portsmouth', 'Virginia', 36.80965797236636, -76.3692236804929)
(130, 'Hawthorne', 'California', 33.92291718197, -118.34873156477205)
(131, 'Johnson City', 'Tennessee', 36.33432894886623, -82.34241039662975)
(132, 'Idaho Falls', 'Idaho', 43.51971125645162, -112.0075389240074)
(133

(234, 'Laguna Niguel', 'California', 33.53017962548027, -117.70212118646866)
(235, 'Rosemead', 'California', 34.06076914114569, -118.08367745797122)
(236, 'La Crosse', 'Wisconsin', 43.79767536254342, -91.2110172676229)
(237, 'Medina', 'Ohio', 41.143182706218624, -81.84717010069821)
(238, 'Reston', 'Virginia', 38.963261133423686, -77.34394910169539)
(239, 'Shawnee', 'Kansas', 39.01953563738063, -94.70773395613492)
(240, 'Carson City', 'Nevada', 39.15438629356608, -119.76101134372485)
(241, 'Montebello', 'California', 34.01425617370489, -118.11237303437503)
(242, 'Saint Cloud', 'Minnesota', 45.526898974076474, -94.1851679709843)
(243, 'Eden Prairie', 'Minnesota', 44.86269202041034, -93.43036091653731)
(244, 'San Clemente', 'California', 33.42867653813865, -117.61328314788199)
(245, 'Broomfield', 'Colorado', 39.921015727564956, -105.08084169543334)
(246, 'Coon Rapids', 'Minnesota', 46.628359980000006, -96.08378365572295)
(247, 'Blaine', 'Minnesota', 44.62206704814815, -93.76992322462141)


(350, 'Mount Juliet', 'Tennessee', 36.19604224194263, -86.50438443175196)
(351, 'Cerritos', 'California', 33.85972120619913, -118.07362790425233)
(352, 'Eastvale', 'California', 38.88281737083333, -121.13657811690257)
(353, 'Aliso Viejo', 'California', 33.57894532202073, -117.73062895101404)
(354, 'Milwaukie', 'Oregon', 33.57894532202073, -117.73062895101404)
(355, 'Cedar Hill', 'Texas', 32.6018331402199, -96.93369104604061)
(356, 'Hoffman Estates', 'Illinois', 42.048074917948725, -88.10838884696948)
(357, 'Kaneohe', 'Hawaii', 21.407838182558145, -157.80356933108837)
(358, 'Dublin', 'California', 37.71943828001036, -121.85839620135594)
(359, 'Roseburg', 'Oregon', 43.220651508823536, -123.33912232081636)
(360, 'Claremore', 'Oklahoma', 36.325532922590334, -95.60830689079212)
(361, 'Lawrence', 'Indiana', 39.15051771636129, -84.86420325233094)
(362, 'Portage', 'Michigan', 42.21680461537523, -85.5674588789129)
(363, 'Murray', 'Utah', 42.21680461537523, -85.5674588789129)
(364, 'Littleton', 

(465, 'Nottingham', 'Maryland', 39.39080081103655, -76.4866081212859)
(466, 'Wentzville', 'Missouri', 38.80834774347015, -90.86436606726708)
(467, 'Saratoga Springs', 'New York', 43.076574890358415, -73.76840466630624)
(468, 'Ringgold', 'Georgia', 34.915755404996546, -85.14533509043231)
(469, 'Marion', 'Iowa', 42.04061347424608, -91.59297404614492)
(470, 'Miamisburg', 'Ohio', 39.624924492682574, -84.24956622373242)
(471, 'La Quinta', 'California', 33.70177421374999, -116.28340401735592)
(472, 'Montclair', 'California', 34.0739327803509, -117.69901092067785)
(473, 'Pacifica', 'California', 37.630637352659576, -122.4917100461351)
(474, 'Salem', 'Virginia', 37.28932081586419, -80.07091898535181)
(475, 'Park Ridge', 'Illinois', 42.01754238946743, -87.83432449928918)
(476, 'Arnold', 'Missouri', 38.429543760770095, -90.38986802726623)
(477, 'Plymouth', 'Michigan', 42.368213513716114, -83.47513524104653)
(478, 'Ocoee', 'Florida', 28.564656847692316, -81.53875667829283)
(479, 'Perrysburg', 'Oh

(580, 'Mebane', 'North Carolina', 36.0853270223493, -79.24297670710064)
(581, 'Alabaster', 'Alabama', 33.23484633253233, -86.81623431073565)
(582, 'Vestavia Hills', 'Alabama', 34.375362109803916, -88.06047283137258)
(583, 'Burlingame', 'California', 37.581902429027664, -122.35632550290207)
(584, 'Parkville', 'Maryland', 39.388101103137664, -76.53869865380965)
(585, 'North Tonawanda', 'New York', 43.054498933811814, -78.85972531094087)
(586, 'Westfield', 'Indiana', 40.016314999174284, -86.15455916883063)
(587, 'White Lake', 'Michigan', 42.65923447055091, -83.52900325996447)
(588, 'Dover', 'New Hampshire', 43.19234460395155, -70.87878365192631)
(589, 'Cibolo', 'Texas', 29.58323517586302, -98.23520950099307)
(590, 'Leesburg', 'Florida', 28.787542138087563, -81.88178718368995)
(591, 'Monterey', 'California', 36.59716145757533, -121.88561734663755)
(592, 'Abingdon', 'Maryland', 39.46758679339352, -76.30097614415097)
(593, 'Front Royal', 'Virginia', 38.96618932410092, -78.16518043537279)
(59

(695, 'Forest Grove', 'Oregon', 45.5292186040654, -123.11752964132653)
(696, 'Wakefield', 'Massachusetts', 42.50082085459199, -71.06757083968294)
(697, 'Gladstone', 'Missouri', 39.74534668866669, -94.66393596840007)
(698, 'Niceville', 'Florida', 30.489404629970846, -86.4310206487681)
(699, 'Shoreview', 'Minnesota', 30.489404629970846, -86.4310206487681)
(700, 'Franklin', 'North Carolina', 35.179531110935436, -83.38359138330344)
(701, 'Dumfries', 'Virginia', 38.59780882983053, -77.33910025559008)
(702, 'Maumee', 'Ohio', 41.57808075418779, -83.68029425253943)
(703, 'Edgewater', 'Florida', 28.979384432834078, -80.91359693193498)
(704, 'Wilmette', 'Illinois', 42.07528327015999, -87.72350286188153)
(705, 'Powhatan', 'Virginia', 37.54191252483095, -77.88424176627402)
(706, 'Jackson', 'Missouri', 37.41233130719299, -89.65734847772865)
(707, 'Morrisville', 'North Carolina', 35.87664664656601, -78.83100971372065)
(708, 'Pell City', 'Alabama', 33.59834694553438, -86.29584302953228)
(709, 'Cornel

(811, 'New Brighton', 'Minnesota', 44.31182122272728, -94.4655375667796)
(812, 'Belton', 'Missouri', 38.81234923420495, -94.53434546082029)
(813, 'Kenmore', 'Washington', 47.75531208709408, -122.24532354086679)
(814, 'Boiling Springs', 'South Carolina', 35.036021182170124, -81.97793957158062)
(815, 'Maryland Heights', 'Missouri', 38.72167513606661, -90.44551136275864)
(816, 'Valley Center', 'California', 33.25021021386714, -117.03019563481682)
(817, 'Villa Park', 'Illinois', 41.87621649870878, -87.97594241872339)
(818, 'Saline', 'Michigan', 42.1709900377615, -83.79136550045185)
(819, 'Blythewood', 'South Carolina', 34.191775115830715, -80.97728021479274)
(820, 'Washougal', 'Washington', 45.582723002193546, -122.34916363789043)
(821, 'Raritan Township', 'New Jersey', 40.57235083028847, -74.63661925650646)
(822, 'Wilsonville', 'Oregon', 45.306266258156676, -122.77249062157944)
(823, 'Phelan', 'California', 34.34454101428572, -117.48709882853223)
(824, 'West Carson', 'California', 33.8472

(927, 'New Haven', 'Indiana', 41.07251813868341, -85.00874845877561)
(928, 'Town of East Greenbush', 'New York', 42.59624354216158, -73.68401796894877)
(929, 'Tinton Falls', 'New Jersey', 42.59624354216158, -73.68401796894877)
(930, 'Waukee', 'Iowa', 41.59654392333975, -93.86454835002986)
(931, 'Peyton', 'Colorado', 38.952036408215456, -104.60095141511407)
(932, 'Newburyport', 'Massachusetts', 42.810326008757045, -70.87537450476654)
(933, 'Rose Hill', 'Virginia', 36.6497121969567, -83.3323496570131)
(934, 'Gulf Breeze', 'Florida', 30.359376363185792, -87.1698203534162)
(935, 'Watkinsville', 'Georgia', 33.86096878815262, -83.42438756256763)
(936, 'Susquehanna Township', 'Pennsylvania', 41.948507984375006, -75.60491459358283)
(937, 'Sevierville', 'Tennessee', 35.814339770670735, -83.58115302739566)
(938, 'Manchester', 'Missouri', 40.556456100000005, -92.52485795)
(939, 'Danville', 'Indiana', 39.76532519553844, -86.51597475462539)
(940, 'Alamo', 'California', 37.852278533333326, -122.0306

(1040, 'Mary Esther', 'Florida', 30.414488367805756, -86.71553511187314)
(1041, 'Wellington', 'Colorado', 40.7080217, -104.99429719572572)
(1042, 'Rotonda West', 'Florida', 26.896855437864083, -82.27314814180178)
(1043, 'Bedminster Township', 'New Jersey', 40.65632370144218, -74.65100449679626)
(1044, 'Mammoth Lakes', 'California', 37.64092964883681, -118.96681964637393)
(1045, 'Monticello', 'Minnesota', 45.30159188124684, -93.82077146594783)
(1046, 'Brigantine', 'New Jersey', 39.401529775333465, -74.38048314107502)
(1047, 'Camp Hill', 'Pennsylvania', 40.23856676354714, -76.92913729611783)
No coords found for Cold Springs, Nevada
(1049, 'Fort Myers Beach', 'Florida', 26.449262481483206, -81.9356444175053)
(1050, 'Red Lion', 'Pennsylvania', 39.9043122322498, -76.61221500517516)
(1051, 'Bailey', 'Colorado', 39.48827578928571, -105.37812436477454)
(1052, 'Jefferson', 'Georgia', 34.10844982147438, -83.57659503015205)
(1053, 'Roxborough Park', 'Colorado', 40.382589403765685, -105.5206122538

In [8]:
# Create a dataframe of locations with no coordinates, to see how many we're losing from our dataset
bad_coords_df = pd.DataFrame(bad_coords, columns=["RegionID", "City", "State", "Zip Code"])
bad_coords_df

Unnamed: 0,RegionID,City,State,Zip Code
0,11722,Greensboro,North Carolina,27395
1,17759,Des Moines,Iowa,50307
2,37950,Cold Springs,Nevada,89067


In [9]:
# create dataframe from uneven array, then drop the null values
zip_coords_df = pd.DataFrame({ key:pd.Series(value) for key, value in zipcodes_coords_dict.items() })
zip_coords_df = zip_coords_df.dropna()
zip_coords_df

Unnamed: 0,RegionID,City,State,Zip Code,Latitude,Longitude
0,6181,New York,New York,10001,40.748418,-73.994147
1,17426,Chicago,Illinois,60601,41.885910,-87.623849
2,18959,Las Vegas,Nevada,89101,36.167540,-115.139739
3,38128,Dallas,Texas,75201,32.785918,-96.798987
4,10920,Columbus,Ohio,43085,40.099379,-83.015356
...,...,...,...,...,...,...
1068,47409,Roanoke,Texas,76262,40.348777,-74.994791
1069,21021,Vail,Arizona,85641,38.257253,-120.328708
1070,25643,Longboat Key,Florida,34228,40.178802,-74.913333
1071,49592,New Hope,Pennsylvania,18938,42.440309,-88.217048


In [10]:
# Export zip_coords_df to CSV, which will serve as our finalized dictionary of location data to unique RegionID
zip_coords_df.to_csv("./resources/zipcodes_coordinates.csv", index=False)