In [1]:
# Import Redlining Data

In [2]:
## get Aarronson replication data to work from https://www.aeaweb.org/articles?id=10.1257/pol.20190414

In [3]:
import os
import geopandas as gpd
import pandas as pd

# Fix PROJ path if needed (for coordinate transforms)
os.environ["PROJ_LIB"] = "/home/idies/mambaforge/envs/econark/share/proj"

# === 1. Load HOLC data ===
holc = gpd.read_file("mappinginequality.gpkg")

# === 2. Filter for Chicago ===
holc_chi = holc[holc["city"].str.contains("Chicago", case=False, na=False)].copy()
print(f"HOLC polygons loaded for Chicago: {len(holc_chi)}")

# Keep relevant columns
holc_chi = holc_chi[["area_id", "grade", "geometry"]]

# Display a preview table
display(holc_chi.head())

# === 3. Ensure coordinate system is projected in meters ===
# This step is required for buffer distances
holc_chi = holc_chi.to_crs(epsg=26916)  # NAD83 / UTM zone 16N (covers Chicago region)

# === 4. Create 0.25-mile (≈402 meters) buffers around each HOLC boundary ===
buffer_distance = 402  # meters
holc_buffers = holc_chi.copy()
holc_buffers["geometry"] = holc_buffers.buffer(buffer_distance)

print(f"Created buffer zones of {buffer_distance} meters around each HOLC area.")

# Preview buffers (optional)
display(holc_buffers.head())

# === 5. (Optional) Save buffers for reference ===
holc_buffers.to_file("holc_chicago_buffers.geojson", driver="GeoJSON")


HOLC polygons loaded for Chicago: 703


Unnamed: 0,area_id,grade,geometry
2014,11474,C,"POLYGON ((-87.86999 41.8197, -87.87006 41.8233..."
2015,1065,A,"MULTIPOLYGON (((-87.83054 42.25361, -87.82979 ..."
2016,1091,A,"MULTIPOLYGON (((-87.76217 42.12934, -87.76233 ..."
2017,1097,A,"MULTIPOLYGON (((-87.75082 42.13867, -87.75053 ..."
2018,1098,A,"MULTIPOLYGON (((-87.74436 42.12695, -87.74135 ..."


Created buffer zones of 402 meters around each HOLC area.


Unnamed: 0,area_id,grade,geometry
2014,11474,C,"POLYGON ((426501.391 4631102.169, 426501.357 4..."
2015,1065,A,"POLYGON ((430989.571 4678536.142, 430973.129 4..."
2016,1091,A,"POLYGON ((436051.194 4664575.432, 436017.698 4..."
2017,1097,A,"POLYGON ((437691.579 4665139.54, 437664.869 46..."
2018,1098,A,"POLYGON ((437876.497 4664646.627, 437715.765 4..."


In [4]:
# Table 1
## There are 703 mapped areas (neighborhood polygons) in Chicago that were graded by the Home Owners’ Loan Corporation (HOLC) in the 1930s.
## Each of these polygons corresponds to a distinct neighborhood “area” on the redlining map.

# Table 2
## created 0.25-mile buffer polygons (≈402 meters) around each HOLC area.
## The geometry numbers are now in meters, not degrees**,** because we switched to a projected coordinate system — this allows accurate measurement of distances like “0.25 miles.”

In [None]:
# Import School Data

In [1]:
import pandas as pd

# === 1. Load both files ===
locations = pd.read_excel("school-locations.xlsx", dtype=str)
school_info = pd.read_csv("school-level.csv", dtype=str)

# === 2. Merge on NCESSCH (the unique school code) ===
merged = locations.merge(school_info, on="NCESSCH", how="inner")

# === 3. Filter for Chicago high schools ===
# Check column names that describe school type (they vary by dataset)
# Common ones include "LEVEL", "SCH_LEVEL", or "SCHOOL_TYPE"
# You can inspect available columns with:
# print(merged.columns)

# Example: assuming the column indicating school level is "LEVEL"
# and that high schools are labeled "High" or contain the word "High"
merged_chi = merged[
    (merged["CITY"].str.contains("Chicago", case=False, na=False)) &
    (merged["LEVEL"].str.contains("High", case=False, na=False))
].copy()

# === 4. Keep relevant columns ===
cols_to_keep = ["NCESSCH", "NAME", "LAT", "LON", "CITY", "STATE"]
merged_chi = merged_chi[cols_to_keep]

# === 5. Convert coordinates to numeric (for mapping / GeoDataFrame use later) ===
merged_chi["LAT"] = merged_chi["LAT"].astype(float)
merged_chi["LON"] = merged_chi["LON"].astype(float)

# === 6. Display the resulting table ===
print("\n=== Chicago High Schools with Coordinates ===")
print(merged_chi.head(20).to_string(index=False))

# === 7. (Optional) Save to CSV for use in GIS or later analysis ===
merged_chi.to_csv("chicago_high_schools_with_coords.csv", index=False)

FileNotFoundError: [Errno 2] No such file or directory: 'school-locations.xlsx'

In [None]:
# === 2. Load CRDC data ===
crdc_file = "crdc_advanced_stem.csv"  # Update with your path
crdc = pd.read_csv(crdc_file, dtype=str)

# Filter for Chicago schools using NCES school ID
crdc_chi = crdc[crdc["NCESID"].isin(nces_chi["NCESSCH"])].copy()

# === 3. Create indicator if school offers any advanced STEM classes ===
# Adjust these column names to match your CRDC dataset
stem_cols = [col for col in crdc_chi.columns if "ADV_STEM" in col]

# Binary column: any advanced STEM offered
crdc_chi["offers_any_advanced_stem"] = crdc_chi[stem_cols].apply(lambda row: any(row == "1"), axis=1)

# Column listing all advanced STEM courses
def list_stem_courses(row):
    return ", ".join([col.replace("ADV_STEM_", "") for col in stem_cols if row[col] == "1"])

crdc_chi["advanced_stem_courses"] = crdc_chi.apply(list_stem_courses, axis=1)

# === 4. Merge NCES and CRDC ===
chi_schools = nces_chi.merge(
    crdc_chi[["NCESID", "offers_any_advanced_stem", "advanced_stem_courses"]],
    left_on="NCESSCH",
    right_on="NCESID",
    how="left"
)

# Fill NaNs for schools with no CRDC record
chi_schools["offers_any_advanced_stem"] = chi_schools["offers_any_advanced_stem"].fillna(False)
chi_schools["advanced_stem_courses"] = chi_schools["advanced_stem_courses"].fillna("None")

# === 5. Display preview ===
display(chi_schools.head())

# Optionally, save to CSV
#chi_schools.to_csv("chicago_schools_advanced_stem.csv", index=False)