## Author: Akash

### Spatial imputation approach for harmonization of historical redlining maps with contemporary maps

Data Source : Redlining Shapefiles (University of Richmond) and Census Tract Shapefiles (US Census Bureau)

In [None]:
#Importing needed libraries
import pandas as pd
import geopandas as gpd
import fiona
from shapely.geometry import shape, mapping
import rtree
import fiona.crs
import os

  shapely_geos_version, geos_capi_version_string


In [None]:
## Investigating whether the redlining shapefiles contains polygon_id as 0 or NULL
shp = gpd.read_file('/work/COVIDRedlining/data/boston/boston redlining/cambridge_redlining.shp')
#Adding a polygon_id index
shp['polygon_id'] = shp.index + 1
shp.columns

Index(['name', 'holc_id', 'holc_grade', 'geometry', 'polygon_id'], dtype='object')

In [None]:
#Converting the shapefile into geojson
shp.to_file('cambridge_redlining.geojson',driver='GeoJSON')

### Approach 1 - Getting proportion of intersection between two shapefiles

In [None]:
#"A Method to Construct Geographical Crosswalks with an Application to US Counties since 1790"
#www.fpeckert.me/eglp

## A generic code to construct your own crosswalk, from two shapefiles

## defining variables 
origin_path = '/home/jovyan/work/COVIDRedlining/data/boston/boston census tracts'
origin_fname = 'tl_2019_25_tract.shp'
origin_geoid = 'GEOID'

destination_path = '/home/jovyan/work/COVIDRedlining/data/boston/boston redlining'
destination_fname = 'cambridge_redlining.geojson'
destination_geoid = 'polygon_id'

output_path = '/home/jovyan/work/COVIDRedlining/data/boston'
output_fname = 'cambridge_redline_intersection.csv'


## read in starting shapefile
os.chdir(origin_path)
shp_origin = gpd.GeoDataFrame.from_file(origin_fname).to_crs(fiona.crs.from_epsg(4326))
shp_origin['area_base'] = shp_origin.area

## read in ending shapefile
os.chdir(destination_path)
shp_destination = gpd.GeoDataFrame.from_file(destination_fname).to_crs(fiona.crs.from_epsg(4326))

## intersecting the file
intersect = gpd.overlay(shp_origin, shp_destination, how = 'intersection')
intersect['area'] = intersect.area

## computing weights
intersect['weight'] = intersect['area'] / intersect['area_base']

## renormalizing weights - this isn't necesary, but without it, if the shapefiles do not perfectly line up where they should, you may lose small fractions of area here and there
reweight = intersect.groupby(origin_geoid)['weight'].sum().reset_index()
reweight['new_weight'] = reweight['weight']
reweight = reweight.drop('weight', axis = 1)

intersect = intersect.merge(reweight, left_on = origin_geoid, right_on = origin_geoid)
intersect['weight'] = intersect['weight'] / intersect['new_weight']

intersect = intersect.drop('new_weight', axis =1)

## keeping only relevant columns - again isn't necessary, but will help trim down the size of the crosswalk at the end
output = intersect[[origin_geoid, destination_geoid, 'weight']]

## saving output
output.to_csv(output_fname, index = False)

  return _prepare_from_string(" ".join(pjargs))




In [None]:
#Reading the converted csv
output = pd.read_csv('/work/COVIDRedlining/data/boston/cambridge_redline_intersection.csv')
output.head(20)

Unnamed: 0,GEOID,polygon_id,weight
0,25017353101,15,0.661821
1,25017353101,16,0.338179
2,25017350103,15,1.0
3,25017351404,15,1.0
4,25017351203,15,0.988708
5,25017351203,16,0.008698
6,25017351203,12,0.002594
7,25017353000,15,0.168739
8,25017353000,16,0.290003
9,25017353000,12,0.541258


In [None]:
import shutil
shutil.move("/work/COVIDRedlining/spatial_outlining.ipynb", "/work/COVIDRedlining/Geoprocessing/spatial_outlining.ipynb")

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c9a4f701-31a5-4164-b3f3-c09cddf1309e' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>