## Author: Akash

### Spatial imputation approach for harmonization of historical redlining maps with contemporary maps

Data Source : Redlining Shapefiles (University of Richmond) and Census Tract Shapefiles (US Census Bureau)

In [None]:
#Importing needed libraries
import pandas as pd
import geopandas as gpd
import os

In [None]:
## Investigating whether the redlining shapefiles contains polygon_id as 0 or NULL
shp = gpd.read_file('/home/jovyan/work/COVIDRedlining/data/stlouis/stlouis redlining/stlouis_redlining.shp')
#Adding a polygon_id index
shp['polygon_id'] = shp.index + 1
shp.head()

Unnamed: 0,polygon_id,state,city,name,holc_id,holc_grade,area_descr,geometry
0,1,MO,St. Louis,,A1,A,"{ """" : """" }","POLYGON ((-90.25958 38.56262, -90.26404 38.564..."
1,2,MO,St. Louis,,A10,A,"{ """" : """" }","POLYGON ((-90.24106 38.71593, -90.24632 38.717..."
2,3,MO,St. Louis,,A11,A,"{ """" : """" }","POLYGON ((-90.30306 38.64950, -90.30286 38.650..."
3,4,MO,St. Louis,,A12,A,"{ """" : """" }","POLYGON ((-90.34311 38.59365, -90.34424 38.593..."
4,5,MO,St. Louis,,A13,A,"{ """" : """" }","POLYGON ((-90.35231 38.59203, -90.35033 38.591..."


In [None]:
#Converting the shapefile into geojson
shp.to_file('stlouis_new_shp.geojson',driver='GeoJSON')

### Approach 1 - Getting proportion of intersection between two shapefiles

In [None]:
#"A Method to Construct Geographical Crosswalks with an Application to US Counties since 1790"
#www.fpeckert.me/eglp

## A generic code to construct your own crosswalk, from two shapefiles

## defining variables 
origin_path = '/home/jovyan/work/COVIDRedlining/data/stlouis/stlouis census tracts'
origin_fname = 'tl_2019_29_tract.shp'
origin_geoid = 'GEOID'

destination_path = '/home/jovyan/work/COVIDRedlining/data/stlouis'
destination_fname = 'stlouis_new_shp.geojson'
destination_geoid = 'polygon_id'

output_path = '/home/jovyan/work/COVIDRedlining/data/stlouis'
output_fname = 'stlouis_redline_intersection.csv'


## read in starting shapefile
os.chdir(origin_path)
shp_origin = gpd.GeoDataFrame.from_file(origin_fname)
shp_origin['area_base'] = shp_origin.area

## read in ending shapefile
os.chdir(destination_path)
shp_destination = gpd.GeoDataFrame.from_file(destination_fname)

## intersecting the file
intersect = gpd.overlay(shp_origin, shp_destination, how = 'intersection')
intersect['area'] = intersect.area

## computing weights
intersect['weight'] = intersect['area'] / intersect['area_base']

## renormalizing weights - this isn't necesary, but without it, if the shapefiles do not perfectly line up where they should, you may lose small fractions of area here and there
reweight = intersect.groupby(origin_geoid)['weight'].sum().reset_index()
reweight['new_weight'] = reweight['weight']
reweight = reweight.drop('weight', axis = 1)

intersect = intersect.merge(reweight, left_on = origin_geoid, right_on = origin_geoid)
intersect['weight'] = intersect['weight'] / intersect['new_weight']

intersect = intersect.drop('new_weight', axis =1)

## keeping only relevant columns - again isn't necessary, but will help trim down the size of the crosswalk at the end
output = intersect[[origin_geoid, destination_geoid, 'weight']]

## saving output
output.to_csv(output_fname, index = False)


Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: EPSG:4269
Right CRS: EPSG:4326




In [None]:
#Reading the converted csv
output = pd.read_csv('/home/jovyan/work/COVIDRedlining/data/tampa/t_redline_intersection.csv')
output.head(20)

Unnamed: 0,GEOID,polygon_id,weight
0,12057001100,3983.0,0.300331
1,12057001100,3987.0,0.699669
2,12057001200,3983.0,1.0
3,12057001400,3983.0,1.0
4,12057001500,3983.0,1.0
5,12057001700,3983.0,0.959587
6,12057001700,3987.0,0.040413
7,12057002100,3983.0,0.39873
8,12057002100,3986.0,0.317201
9,12057002100,3985.0,0.284069


In [None]:
import shutil
shutil.move("/work/COVIDRedlining/spatial_outlining.ipynb", "/work/COVIDRedlining/Geoprocessing/spatial_outlining.ipynb")