## Author: Akash

### Creating Reweighted Redlining Index and merging with ACS Census Tracts

Data Source: Redlining and Census Tract Intersection files for each city extracted using the spatial outlining step

In [None]:
# Importing needed libraries
import pandas as pd
import geopandas as gpd

  shapely_geos_version, geos_capi_version_string


In [None]:
#Load the spatial intersection file generated using spatial outlining notebook
intersection = pd.read_csv('/home/jovyan/work/COVIDRedlining/data/boston/cambridge_redline_intersection.csv')
intersection.head()

Unnamed: 0,GEOID,polygon_id,weight
0,25017353101,15,0.661821
1,25017353101,16,0.338179
2,25017350103,15,1.0
3,25017351404,15,1.0
4,25017351203,15,0.988708


In [None]:
#Load the Redlining Shapefile
redline = gpd.read_file('/work/COVIDRedlining/data/boston/boston redlining/cambridge_redlining.geojson')
redline.columns

Index(['name', 'holc_id', 'holc_grade', 'polygon_id', 'geometry'], dtype='object')

In [None]:
#Visualise value counts for each HOLC grade obtained from the Redlining Shapefile
redline['holc_grade'].value_counts()

B    7
C    6
D    2
A    1
Name: holc_grade, dtype: int64

In [None]:
#Convert HOLC Grade to Ordinal Values
def numerical_redlining_value(row):
    if row['holc_grade'] == 'A':
      val = 1
    elif row['holc_grade'] == 'B':
      val = 2
    elif row['holc_grade'] == 'C':
      val = 3
    elif row['holc_grade'] == 'D':
      val = 4
    elif row['holc_grade'] == 'E':
      val = 5
    else:
      val = 0

    return val

#Assign ordinal values to a new column
redline['numerical_grade'] = redline.apply(numerical_redlining_value, axis=1)

In [None]:
#Merge intersection shapefile with redline shapefile
output_combined=pd.merge(redline, intersection, how='left', on='polygon_id')
output_combined.shape

(89, 8)

In [None]:
#Viewing the shape of the data
output_combined.shape

(89, 8)

In [None]:
#Check if rows are NA - some polygons have NULL IDs 
output_combined[output_combined['polygon_id'].isna()]

Unnamed: 0,name,holc_id,holc_grade,polygon_id,geometry,numerical_grade,GEOID,weight


In [None]:
#Removing polygon_ids with NA/NULL values (Check if polygons have corresponding GEOIDs before deleteing)
output_combined = output_combined[output_combined['polygon_id'].notna()]
output_combined.shape

(89, 8)

In [None]:
#Subsetting redline numerical index and spatial weights
redline_index = output_combined[['GEOID','polygon_id','numerical_grade','weight']]
redline_index[redline_index['GEOID']==25017353101]

Unnamed: 0,GEOID,polygon_id,numerical_grade,weight
51,25017353101,15,4,0.661821
75,25017353101,16,4,0.338179


In [None]:
#Reweighting the redlining numerical index
redline_index['reweighted_redline_index'] = redline_index['numerical_grade'] * redline_index['weight']
redline_index[redline_index['GEOID']==25017353101]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0,GEOID,polygon_id,numerical_grade,weight,reweighted_redline_index
51,25017353101,15,4,0.661821,2.647285
75,25017353101,16,4,0.338179,1.352715


In [None]:
#Grouping reweighted redline index by GEOID
redline_index_grouped = redline_index[['GEOID','reweighted_redline_index']].groupby('GEOID').sum()
redline_index_grouped[redline_index_grouped.index==25017353101]

Unnamed: 0_level_0,reweighted_redline_index
GEOID,Unnamed: 1_level_1
25017353101,4.0


In [None]:
#Visualising the dataset
redline_index_grouped.head()

Unnamed: 0_level_0,reweighted_redline_index
GEOID,Unnamed: 1_level_1
25017350103,4.0
25017350104,4.0
25017350200,4.0
25017350900,3.0
25017351000,3.155308


In [None]:
#Reconverting reweighted values to ordinal
def ordinal_values(row):
    if row['reweighted_redline_index'] > 0 and row['reweighted_redline_index'] <= 1:
      val = 1
    elif row['reweighted_redline_index'] > 1 and row['reweighted_redline_index'] <= 2:
      val = 2
    elif row['reweighted_redline_index'] > 2 and row['reweighted_redline_index'] <= 3:
      val = 3
    elif row['reweighted_redline_index'] > 3 and row['reweighted_redline_index'] <= 4.1:
      val = 4
    elif row['reweighted_redline_index'] > 4.1 and row['reweighted_redline_index'] <= 5:
      val = 5
    else:
      val = 0

    return val

redline_index_grouped['ordinal_value'] = redline_index_grouped.apply(ordinal_values, axis=1)

In [None]:
redline_index_grouped.head()

Unnamed: 0_level_0,reweighted_redline_index,ordinal_value
GEOID,Unnamed: 1_level_1,Unnamed: 2_level_1
25017350103,4.0,4
25017350104,4.0,4
25017350200,4.0,4
25017350900,3.0,3
25017351000,3.155308,4


In [None]:
redline_index_grouped.shape

(43, 2)

In [None]:
#Load Census Tract Data
ct_shape = gpd.read_file('/home/jovyan/work/COVIDRedlining/data/boston/boston census tracts/tl_2019_25_tract.shp')
ct_shape.shape

(1478, 13)

In [None]:
#Converting Census Tract Object Datatype to Float
ct_shape['GEOID'] = ct_shape['GEOID'].astype(float)
#Merging redline index scores with Census Tract data
ct_shape = ct_shape.merge(redline_index_grouped,how='inner',on='GEOID')
ct_shape.shape

(43, 15)

In [None]:
#Saving file as geojson
ct_shape.to_file('cambridge_redlined_numerical_values.geojson',driver='GeoJSON')

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=c9a4f701-31a5-4164-b3f3-c09cddf1309e' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>