## Author: Akash

### Creating Reweighted Redlining Index and merging with ACS Census Tracts

Data Source: Redlining and Census Tract Intersection files for each city extracted using the spatial outlining step

In [None]:
# Importing needed libraries
import pandas as pd
import geopandas as gpd

In [None]:
#Load the spatial intersection file generated using spatial outlining notebook
intersection = pd.read_csv('/home/jovyan/work/COVIDRedlining/data/stlouis/stlouis_redline_intersection.csv')
intersection.head()

Unnamed: 0,GEOID,polygon_id,weight
0,29189215400,19,0.917719
1,29189215400,87,0.082281
2,29189217400,19,0.349177
3,29189217400,49,0.00126
4,29189217400,84,0.520468


In [None]:
#Load the Redlining Shapefile
redline = gpd.read_file('/home/jovyan/work/COVIDRedlining/data/stlouis/stlouis_new_shp.geojson')
redline.head()

Unnamed: 0,polygon_id,state,city,name,holc_id,holc_grade,area_descr,geometry
0,1,MO,St. Louis,,A1,A,{'': ''},"POLYGON ((-90.25958 38.56262, -90.26404 38.564..."
1,2,MO,St. Louis,,A10,A,{'': ''},"POLYGON ((-90.24106 38.71593, -90.24632 38.717..."
2,3,MO,St. Louis,,A11,A,{'': ''},"POLYGON ((-90.30306 38.64950, -90.30286 38.650..."
3,4,MO,St. Louis,,A12,A,{'': ''},"POLYGON ((-90.34311 38.59365, -90.34424 38.593..."
4,5,MO,St. Louis,,A13,A,{'': ''},"POLYGON ((-90.35231 38.59203, -90.35033 38.591..."


In [None]:
#Visualise value counts for each HOLC grade obtained from the Redlining Shapefile
redline['holc_grade'].value_counts()

B    40
C    39
A    31
D    16
Name: holc_grade, dtype: int64

In [None]:
#Convert HOLC Grade to Ordinal Values
def numerical_redlining_value(row):
    if row['holc_grade'] == 'A':
      val = 1
    elif row['holc_grade'] == 'B':
      val = 2
    elif row['holc_grade'] == 'C':
      val = 3
    elif row['holc_grade'] == 'D':
      val = 4
    elif row['holc_grade'] == 'E':
      val = 5
    else:
      val = 0

    return val

#Assign ordinal values to a new column
redline['numerical_grade'] = redline.apply(numerical_redlining_value, axis=1)

In [None]:
#Merge intersection shapefile with redline shapefile
output_combined=pd.merge(redline, intersection, how='left', on='polygon_id')
output_combined.head()

Unnamed: 0,polygon_id,state,city,name,holc_id,holc_grade,area_descr,geometry,numerical_grade,GEOID,weight
0,1,MO,St. Louis,,A1,A,{'': ''},"POLYGON ((-90.25958 38.56262, -90.26404 38.564...",1,29510101200,0.321272
1,1,MO,St. Louis,,A1,A,{'': ''},"POLYGON ((-90.25958 38.56262, -90.26404 38.564...",1,29510101300,0.658956
2,2,MO,St. Louis,,A10,A,{'': ''},"POLYGON ((-90.24106 38.71593, -90.24632 38.717...",1,29189212001,0.008838
3,2,MO,St. Louis,,A10,A,{'': ''},"POLYGON ((-90.24106 38.71593, -90.24632 38.717...",1,29510108100,0.009263
4,2,MO,St. Louis,,A10,A,{'': ''},"POLYGON ((-90.24106 38.71593, -90.24632 38.717...",1,29510108200,0.510705


In [None]:
#Viewing the shape of the data
output_combined.shape

(575, 11)

In [None]:
#Check if rows are NA - some polygons have NULL IDs 
output_combined[output_combined['polygon_id'].isna()]

Unnamed: 0,polygon_id,state,city,name,holc_id,holc_grade,area_descr,geometry,numerical_grade,GEOID,weight


In [None]:
#Removing polygon_ids with NA/NULL values (Check if polygons have corresponding GEOIDs before deleteing)
output_combined = output_combined[output_combined['polygon_id'].notna()]
output_combined.shape

(575, 11)

In [None]:
#Subsetting redline numerical index and spatial weights
redline_index = output_combined[['GEOID','polygon_id','numerical_grade','weight']]
redline_index[redline_index['GEOID']==29189217400]

Unnamed: 0,GEOID,polygon_id,numerical_grade,weight
57,29189217400,19,1,0.349177
180,29189217400,47,2,0.096627
192,29189217400,49,2,0.00126
369,29189217400,82,3,0.017403
383,29189217400,84,3,0.520468
520,29189217400,115,4,0.015065


In [None]:
#Reweighting the redlining numerical index
redline_index['reweighted_redline_index'] = redline_index['numerical_grade'] * redline_index['weight']
redline_index[redline_index['GEOID']==29189217400]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,GEOID,polygon_id,numerical_grade,weight,reweighted_redline_index
57,29189217400,19,1,0.349177,0.349177
180,29189217400,47,2,0.096627,0.193254
192,29189217400,49,2,0.00126,0.002519
369,29189217400,82,3,0.017403,0.05221
383,29189217400,84,3,0.520468,1.561403
520,29189217400,115,4,0.015065,0.060259


In [None]:
#Grouping reweighted redline index by GEOID
redline_index_grouped = redline_index[['GEOID','reweighted_redline_index']].groupby('GEOID').sum()
redline_index_grouped[redline_index_grouped.index==29189217400]

Unnamed: 0_level_0,reweighted_redline_index
GEOID,Unnamed: 1_level_1
29189217400,2.218823


In [None]:
#Visualising the dataset
redline_index_grouped.head()

Unnamed: 0_level_0,reweighted_redline_index
GEOID,Unnamed: 1_level_1
29189210502,3.0
29189211700,2.933171
29189211801,3.0
29189211802,3.0
29189211900,3.0


In [None]:
#Reconverting reweighted values to ordinal
def ordinal_values(row):
    if row['reweighted_redline_index'] > 0 and row['reweighted_redline_index'] <= 1:
      val = 1
    elif row['reweighted_redline_index'] > 1 and row['reweighted_redline_index'] <= 2:
      val = 2
    elif row['reweighted_redline_index'] > 2 and row['reweighted_redline_index'] <= 3:
      val = 3
    elif row['reweighted_redline_index'] > 3 and row['reweighted_redline_index'] <= 4.1:
      val = 4
    elif row['reweighted_redline_index'] > 4.1 and row['reweighted_redline_index'] <= 5:
      val = 5
    else:
      val = 0

    return val

redline_index_grouped['ordinal_value'] = redline_index_grouped.apply(ordinal_values, axis=1)

In [None]:
redline_index_grouped.head()

Unnamed: 0_level_0,reweighted_redline_index,ordinal_value
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1
29189210502,3.0,3
29189211700,2.933171,3
29189211801,3.0,3
29189211802,3.0,3
29189211900,3.0,3


In [None]:
#Load Census Tract Data
ct_shape = gpd.read_file('/home/jovyan/work/COVIDRedlining/data/stlouis/stlouis census tracts/tl_2019_29_tract.shp')
ct_shape.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry
0,29,55,450302,29055450302,4503.02,Census Tract 4503.02,G5020,S,59019556,54839,38.0699995,-91.3834407,"POLYGON ((-91.42897 38.05010, -91.42867 38.050..."
1,29,55,450102,29055450102,4501.02,Census Tract 4501.02,G5020,S,215515312,158937,38.1505661,-91.1929142,"POLYGON ((-91.31192 38.15072, -91.31192 38.150..."
2,29,55,450200,29055450200,4502.0,Census Tract 4502,G5020,S,785265618,714683,37.9120761,-91.208638,"POLYGON ((-91.36840 38.09352, -91.36820 38.093..."
3,29,55,450400,29055450400,4504.0,Census Tract 4504,G5020,S,518540939,475755,37.8958096,-91.3892205,"POLYGON ((-91.52872 37.79422, -91.52861 37.801..."
4,29,15,460400,29015460400,4604.0,Census Tract 4604,G5020,S,216350354,11553444,38.3016635,-93.1718555,"POLYGON ((-93.32762 38.26968, -93.32739 38.270..."


In [None]:
#Converting Census Tract Object Datatype to Float
ct_shape['GEOID'] = ct_shape['GEOID'].astype(float)
#Merging redline index scores with Census Tract data
ct_shape = ct_shape.merge(redline_index_grouped,how='inner',on='GEOID')
ct_shape.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,GEOID,NAME,NAMELSAD,MTFCC,FUNCSTAT,ALAND,AWATER,INTPTLAT,INTPTLON,geometry,reweighted_redline_index,ordinal_value
0,29,189,215400,29189220000.0,2154,Census Tract 2154,G5020,S,11066541,0,38.6496278,-90.3799454,"POLYGON ((-90.40568 38.63569, -90.40567 38.636...",1.164561,2
1,29,189,215800,29189220000.0,2158,Census Tract 2158,G5020,S,3140503,0,38.6625792,-90.3479207,"POLYGON ((-90.36661 38.65838, -90.36661 38.658...",1.173842,2
2,29,510,102300,29510100000.0,1023,Census Tract 1023,G5020,S,1282334,49882,38.5642278,-90.2834473,"POLYGON ((-90.29702 38.56237, -90.29696 38.562...",2.0,2
3,29,510,102400,29510100000.0,1024,Census Tract 1024,G5020,S,584695,0,38.5771575,-90.2778545,"POLYGON ((-90.28452 38.57405, -90.28448 38.574...",2.0,2
4,29,189,217300,29189220000.0,2173,Census Tract 2173,G5020,S,2236243,0,38.6202341,-90.3412412,"POLYGON ((-90.34931 38.61281, -90.34922 38.613...",3.16834,4


In [None]:
#Saving file as geojson
ct_shape.to_file('stlouis_redlined_numerical_values.geojson',driver='GeoJSON')

In [None]:
import shutil
shutil.move("/work/COVIDRedlining/redline_geoprocess.ipynb", "/work/COVIDRedlining/Geoprocessing/")