## Author: Akash

### Spatial imputation approach for harmonization of historical redlining maps with contemporary maps

Data Source : Redlining Shapefiles (University of Richmond) and Census Tract Shapefiles (US Census Bureau)

In [None]:
#Importing needed libraries
import pandas as pd
import geopandas as gpd
import fiona
from shapely.geometry import shape, mapping
import rtree
import fiona.crs
import os

In [None]:
## Investigating whether the redlining shapefiles contains polygon_id as 0 or NULL
shp = gpd.read_file('/home/jovyan/work/COVIDRedlining/data/boston/boston redlining/boston_redlining.shp')
#Adding a polygon_id index
#shp['polygon_id'] = shp.index + 1
shp.head()

Unnamed: 0,polygon_id,state,city,name,holc_id,holc_grade,area_descr,geometry
0,9543.0,MA,Boston,,A1,A,"{ ""32"" : """", ""3k"" : ""new construction "", ""3q""...","POLYGON ((-71.11985 42.32301, -71.11990 42.321..."
1,5015.0,MA,Boston,,B1,B,"{ ""1d"" : ""98%"", ""3i"" : ""$6,500-$7,500 1933-36 ...","POLYGON ((-71.17193 42.34689, -71.17222 42.346..."
2,9545.0,MA,Boston,,B2,B,"{ ""1a"" : ""high - hilly"", ""3j"" : "" $8,000-$12,0...","POLYGON ((-71.14608 42.34329, -71.14567 42.342..."
3,4949.0,MA,Boston,,B3,B,"{ ""2a"" : ""business and professional men"", ""2b""...","POLYGON ((-71.07162 42.35945, -71.07215 42.357..."
4,4946.0,MA,Boston,,B4,B,"{ ""3d"" : ""good good"", ""2c"" : ""0% "", ""1d"" : ""9...","POLYGON ((-71.11527 42.30196, -71.11761 42.302..."


In [None]:
#Converting the shapefile into geojson
shp.to_file('stlouis_new_shp.geojson',driver='GeoJSON')

### Approach 1 - Getting proportion of intersection between two shapefiles

In [None]:
#"A Method to Construct Geographical Crosswalks with an Application to US Counties since 1790"
#www.fpeckert.me/eglp

## A generic code to construct your own crosswalk, from two shapefiles

## defining variables 
origin_path = '/home/jovyan/work/COVIDRedlining/data/boston/boston census tracts'
origin_fname = 'tl_2019_25_tract.shp'
origin_geoid = 'GEOID'

destination_path = '/home/jovyan/work/COVIDRedlining/data/boston/boston redlining'
destination_fname = 'boston_redlining.shp'
destination_geoid = 'polygon_id'

output_path = '/home/jovyan/work/COVIDRedlining/data/boston'
output_fname = 'boston_redline_intersection.csv'


## read in starting shapefile
os.chdir(origin_path)
shp_origin = gpd.GeoDataFrame.from_file(origin_fname).to_crs(fiona.crs.from_epsg(4326))
shp_origin['area_base'] = shp_origin.area

## read in ending shapefile
os.chdir(destination_path)
shp_destination = gpd.GeoDataFrame.from_file(destination_fname).to_crs(fiona.crs.from_epsg(4326))

## intersecting the file
intersect = gpd.overlay(shp_origin, shp_destination, how = 'intersection')
intersect['area'] = intersect.area

## computing weights
intersect['weight'] = intersect['area'] / intersect['area_base']

## renormalizing weights - this isn't necesary, but without it, if the shapefiles do not perfectly line up where they should, you may lose small fractions of area here and there
reweight = intersect.groupby(origin_geoid)['weight'].sum().reset_index()
reweight['new_weight'] = reweight['weight']
reweight = reweight.drop('weight', axis = 1)

intersect = intersect.merge(reweight, left_on = origin_geoid, right_on = origin_geoid)
intersect['weight'] = intersect['weight'] / intersect['new_weight']

intersect = intersect.drop('new_weight', axis =1)

## keeping only relevant columns - again isn't necessary, but will help trim down the size of the crosswalk at the end
output = intersect[[origin_geoid, destination_geoid, 'weight']]

## saving output
output.to_csv(output_fname, index = False)

  return _prepare_from_string(" ".join(pjargs))




In [None]:
#Reading the converted csv
output = pd.read_csv('/home/jovyan/work/COVIDRedlining/data/boston/boston_redline_intersection.csv')
output.head(20)

Unnamed: 0,GEOID,polygon_id,weight
0,25025010600,4947.0,0.095512
1,25025010600,4918.0,0.904488
2,25025010204,4947.0,1.0
3,25025010404,4947.0,0.998484
4,25025010404,4919.0,0.001516
5,25025010405,4947.0,0.241212
6,25025010405,4918.0,0.009213
7,25025010405,4920.0,0.018089
8,25025010405,4919.0,0.731486
9,25025081001,4947.0,0.683236


In [None]:
import shutil
shutil.move("/work/COVIDRedlining/spatial_outlining.ipynb", "/work/COVIDRedlining/Geoprocessing/spatial_outlining.ipynb")