In [1]:
import json

In [2]:
with open('sanborn-with-fips.json') as f:
    sanborn = json.load(f)

In [18]:
def getCities():
    cityList = []
    for state in sanborn:
        stateName = state['state']
        for county in state['counties']:
            for city in county['cities']:
                cityList.append({'state': stateName, 'city': city['city']})
    return cityList

In [23]:
city_list = getCities()

In [24]:
len(city_list)

8508

## Prepare for Geocoding

Now, we need to write these cities and states into a csv file in order to use an online batch geocoder.

In [25]:
import csv

In [26]:
with open('city-coordinates.csv', 'w') as csv_file:
    fieldnames = ['city', 'state']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    writer.writeheader()
    for item in city_list:
        writer.writerow(item)

The [geocoder I'm using](https://www.geocod.io/) only allows 2,500 free lookups at once, so the next cells show how to limit the number of records that go into a file. I know I have 8,508 records, so I'll have 4 files.

In [27]:
with open('city-coordinates-1.csv', 'w') as csv_file:
    fieldnames = ['city', 'state']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    writer.writeheader()
    for n in range(2400):
        writer.writerow(city_list[n])

In [28]:
with open('city-coordinates-2.csv', 'w') as csv_file:
    fieldnames = ['city', 'state']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    writer.writeheader()
    for n in range(2400, 4800): #the range function includes the start but not the end parameter - this will include number 2400 but not 4800
        writer.writerow(city_list[n])

In [29]:
with open('city-coordinates-3.csv', 'w') as csv_file:
    fieldnames = ['city', 'state']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    writer.writeheader()
    for n in range(4800, 7200):
        writer.writerow(city_list[n])

In [30]:
with open('city-coordinates-4.csv', 'w') as csv_file:
    fieldnames = ['city', 'state']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    
    writer.writeheader()
    for n in range(7200, 8508): #we started counting at 0, so the indices go up to 8507
        writer.writerow(city_list[n])

## Use Geocoded Results

The files are now saved as csv files in the same directory, under the names city-coordinates-1-geocodio.csv, city-coordinates-2-geocodio.csv, etc. The next step is to read in these files and add the latitude and longitudes to the original Sanborn data file. After that, we'll create a GeoJSON file that can be used to visualize the results.

Let's read the files into a Python dictionary:

In [43]:
coordsDictionary = dict()

with open('city-coordinates-1_geocodio.csv', 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        coordsDictionary[row['city'], row['state']] = [float(row['Latitude']), float(row['Longitude'])]

In [45]:
with open('city-coordinates-2_geocodio.csv', 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        coordsDictionary[row['city'], row['state']] = [float(row['Latitude']), float(row['Longitude'])]

In [46]:
with open('city-coordinates-3_geocodio.csv', 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        coordsDictionary[row['city'], row['state']] = [float(row['Latitude']), float(row['Longitude'])]

In [47]:
with open('city-coordinates-4_geocodio.csv', 'r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        coordsDictionary[row['city'], row['state']] = [float(row['Latitude']), float(row['Longitude'])]

In [49]:
coordsDictionary

{('Abbeville', 'Alabama'): [31.559402, -85.222965],
 ('Headland', 'Alabama'): [31.357917, -85.30124],
 ('Alabama City', 'Alabama'): [0.0, 0.0],
 ('Attalla', 'Alabama'): [34.051848, -86.061397],
 ('Gadsden', 'Alabama'): [34.049367, -85.934285],
 ('Alexander City', 'Alabama'): [32.916507, -85.940602],
 ('Dadeville', 'Alabama'): [32.794796, -85.744428],
 ('Aliceville', 'Alabama'): [33.12957, -88.15142],
 ('Anniston', 'Alabama'): [33.64916, -85.879642],
 ('Jacksonville', 'Alabama'): [33.853627, -85.758295],
 ('Oxford', 'Alabama'): [33.614271, -85.834965],
 ('Piedmont', 'Alabama'): [33.92455, -85.61135],
 ('Athens', 'Alabama'): [34.764976, -87.103488],
 ('Auburn', 'Alabama'): [32.547542, -85.468222],
 ('Opelika', 'Alabama'): [32.694763, -85.395626],
 ('Bessemer', 'Alabama'): [33.402429, -86.951784],
 ('Birmingham', 'Alabama'): [33.456412, -86.801904],
 ('Brewton', 'Alabama'): [31.129427, -87.096126],
 ('Bridgeport', 'Alabama'): [34.94758, -85.71442],
 ('Scottsboro', 'Alabama'): [34.750724, 

Now that the files have been read into a dictionary, we can iterate through the Sanborn data and add in the city coordinates. You'll notice that some of the cities have 0.0, 0,0 as their coordinates — those are the cities that the geocoder failed to find. So, they'll be ignored for now but will need to be fixed later.

In [51]:
def addCoordstoSanborn():
    for state in sanborn:
        stateName = state['state']
        for county in state['counties']:
            for city in county['cities']:
                city['lat'] = coordsDictionary[city['city'], stateName][0]
                city['long'] = coordsDictionary[city['city'], stateName][1]

In [52]:
addCoordstoSanborn()

Let's take a look at the first state (Alabama) to make sure it worked:

In [55]:
sanborn[0]

{'state': 'Alabama',
 'counties': [{'county': 'Henry County',
   'cities': [{'city': 'Abbeville',
     'items': [{'name': 'Sanborn Fire Insurance Map from Abbeville, Henry County, Alabama.',
       'date': '1907-06',
       'thumbnail_urls': ['https://tile.loc.gov/storage-services/service/gmd/gmd397m/g3974m/g3974am/g3974am_g000011907/00001_1907-0001.gif',
        'https://tile.loc.gov/storage-services/service/gmd/gmd397m/g3974m/g3974am/g3974am_g000011907/00001_1907-0001.gif#h=150&w=126'],
       'iiif_urls': ['https://tile.loc.gov/image-services/iiif/service:gmd:gmd397m:g3974m:g3974am:g3974am_g000011907:00001_1907-0001/full/pct:12.5/0/default.jpg',
        'https://tile.loc.gov/image-services/iiif/service:gmd:gmd397m:g3974m:g3974am:g3974am_g000011907:00001_1907-0001/full/pct:12.5/0/default.jpg'],
       'item_url': 'https://www.loc.gov/item/sanborn00001_001/'},
      {'name': 'Sanborn Fire Insurance Map from Abbeville, Henry County, Alabama.',
       'date': '1913-08',
       'thumbnai

Let's also add them to a dictionary of objects to write into a GeoJSON. Each entry will be a feature, with a geometry of type Point. That geometry will then contain the coordinates. Using a feature will allow it to have an id (I'm going to base it on the latitude and longitude) and properties (I'm going to include state, county, and city for my project).

In [97]:
allCoords = dict()

allCoords['type'] = 'FeatureCollection'
allCoords['features'] = []

In [98]:
for i in range(len(sanborn)):
    state = sanborn[i]
    for j in range(len(state['counties'])):
        county = state['counties'][j]
        for k in range(len(county['cities'])):
            city = county['cities'][k]
            if (city['lat'] != 0 or city['long'] != 0):
                temp_coord = dict()
                temp_coord['type'] = 'Feature'
                temp_coord['geometry'] = dict()
                temp_coord['geometry']['type'] = 'Point'
                temp_coord['geometry']['coordinates'] = [city['long'], city['lat']]
                temp_coord['id'] = str(city['lat']) + ':' + str(city['long'])
                temp_coord['properties'] = {'state': i, 'county': j, 'city': k}
                allCoords['features'].append(temp_coord)

In [99]:
allCoords

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-85.222965, 31.559402]},
   'id': '31.559402:-85.222965',
   'properties': {'state': 0, 'county': 0, 'city': 0}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-85.30124, 31.357917]},
   'id': '31.357917:-85.30124',
   'properties': {'state': 0, 'county': 0, 'city': 1}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-86.061397, 34.051848]},
   'id': '34.051848:-86.061397',
   'properties': {'state': 0, 'county': 1, 'city': 1}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-85.934285, 34.049367]},
   'id': '34.049367:-85.934285',
   'properties': {'state': 0, 'county': 1, 'city': 2}},
  {'type': 'Feature',
   'geometry': {'type': 'Point', 'coordinates': [-85.940602, 32.916507]},
   'id': '32.916507:-85.940602',
   'properties': {'state': 0, 'county': 2, 'city': 0}},
  {'type': 'Feature',
   'geometry'

Finally, let's write this into a GeoJSON file. From there, we'll be able to use online tools to convert it into other formats if desired.

In [101]:
f = open('us-cities.json', 'w')
f.write(json.dumps(allCoords))
f.close()