In [1]:
from pathlib import Path
import sys

import geopandas as gpd
import pandas as pd

sys.path.append("..")
import correlate

tiger = Path("../data-raw/tiger2020pl")
rdh = Path("../data-raw/rdh/pa")
alarm = Path("../data-raw/alarm-redist-census-2020")

In [2]:
counties = gpd.read_file(
    "zip://"
    + str((tiger / "tl_2020_42_all.zip").absolute())
    + "!tl_2020_42_county20.shp",
    ignore_geometry=True,
)[["STATEFP20", "COUNTYFP20", "NAME20"]].rename(columns={"NAME20": "CountyName"})

vtds = (
    gpd.read_file(
        "zip://"
        + str((tiger / "tl_2020_42_all.zip").absolute())
        + "!tl_2020_42_vtd20.shp"
    )
    .merge(counties)
    .set_index("GEOID20")
    [["STATEFP20", "COUNTYFP20", "NAME20", "CountyName", "geometry"]]
)

vtds.head()


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903..."
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788..."
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719..."
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892..."
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836..."


In [3]:
subdivisions = gpd.read_file(
    "zip://"
    + str((tiger / "tl_2020_42_all.zip").absolute())
    + "!tl_2020_42_cousub20.shp"
).merge(
    # https://www2.census.gov/geo/pdfs/reference/LSADCodes.pdf
    pd.DataFrame(
        [
            ("44", "Township"),
            ("21", "Borough"),
            ("25", "City"),
            ("37", "Municipality"),
            ("00", ""),
            ("43", "Town"),
        ],
        columns=["LSAD20", "SubdivisionType"],
    ),
    how="left",
    on=["LSAD20"],
)
assert not subdivisions.SubdivisionType.isna().any()

with_subdivisions = correlate.overlay(
    vtds,
    subdivisions.rename(columns={"NAME20": "SubdivisionName"})[
        ["SubdivisionName", "SubdivisionType", "geometry"]
    ],
)
print(with_subdivisions.format_summary())


Within: 9177
Contained: 1
Partial: 0
No match: 0


In [4]:
with_subdivisions.overlaid.SubdivisionName.value_counts()


Philadelphia    1703
Pittsburgh       402
Upper Darby       71
Erie              63
Allentown         55
                ... 
Walnutport         1
Wind Gap           1
Cornplanter        1
Barkeyville        1
Watts              1
Name: SubdivisionName, Length: 1886, dtype: int64

In [5]:
sldl20 = gpd.read_file(
    "zip://"
    + str((tiger / "tl_2020_42_all.zip").absolute())
    + "!tl_2020_42_sldl20.shp",
).set_index("SLDLST20", drop=False)[["SLDLST20", "geometry"]]

with_sldl = correlate.overlay(with_subdivisions.overlaid, sldl20)
print(with_sldl.format_summary())
with_sldl.overlaid.head()

Within: 9092
Contained: 0
Partial: 86
No match: 0


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91


In [6]:
sldu20 = gpd.read_file(
    "zip://"
    + str((tiger / "tl_2020_42_all.zip").absolute())
    + "!tl_2020_42_sldu20.shp",
).set_index("SLDUST20", drop=False)[["SLDUST20", "geometry"]]

with_sldu = correlate.overlay(with_sldl.overlaid, sldu20)
print(with_sldu.format_summary())
with_sldu.overlaid.head()


Within: 9164
Contained: 0
Partial: 14
No match: 0


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20,SLDUST20
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91,33
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91,33
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91,33
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193,33
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91,33


In [7]:
cd116 = gpd.read_file(
    "zip://" + str((tiger / "tl_2020_42_all.zip").absolute()) + "!tl_2020_42_cd116.shp",
).set_index("CD116FP", drop=False)[["CD116FP", "geometry"]]

with_cd116 = correlate.overlay(with_sldu.overlaid, cd116)
print(with_cd116.format_summary())
with_cd116.overlaid.head()


Within: 9170
Contained: 0
Partial: 8
No match: 0


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20,SLDUST20,CD116FP
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91,33,13
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91,33,13
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91,33,13
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193,33,13
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91,33,13


In [8]:
sldl22 = (
    gpd.read_file("zip://" + str((rdh / "pa_sldl_adopted_2022.zip").absolute()))
    .rename(columns={"DISTRICT": "SLDLST22"})
    .set_index("SLDLST22", drop=False)[["SLDLST22", "geometry"]]
)

with_sldl22 = correlate.overlay(with_cd116.overlaid, sldl22)
print(with_sldl22.format_summary())
with_sldl22.overlaid.head()


Within: 4809
Contained: 0
Partial: 4369
No match: 0


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20,SLDUST20,CD116FP,SLDLST22
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91,33,13,91
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91,33,13,91
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91,33,13,91
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193,33,13,193
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91,33,13,91


In [9]:
sldu22 = (
    gpd.read_file("zip://" + str((rdh / "pa_sldu_adopted_2022.zip").absolute()))
    .rename(columns={"DISTRICT": "SLDUST22"})
    .set_index("SLDUST22", drop=False)[["SLDUST22", "geometry"]]
)

with_sldu22 = correlate.overlay(with_sldl22.overlaid, sldu22)
print(with_sldu22.format_summary())
with_sldu22.overlaid.head()


Within: 6761
Contained: 0
Partial: 2417
No match: 0


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20,SLDUST20,CD116FP,SLDLST22,SLDUST22
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91,33,13,91,33
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91,33,13,91,33
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91,33,13,91,33
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193,33,13,193,33
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91,33,13,91,33


In [10]:
cdnew = (
    gpd.read_file("zip://" + str((rdh / "pa_cong_adopted_2022.zip").absolute()))
    .rename(columns={"DISTRICT": "CDNew"})
    .set_index("CDNew", drop=False)[["CDNew", "geometry"]]
)

with_cdnew = correlate.overlay(with_sldu22.overlaid, cdnew)
print(with_cdnew.format_summary())
with_cdnew.overlaid.head()


Within: 7712
Contained: 0
Partial: 1466
No match: 0


Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20,SLDUST20,CD116FP,SLDLST22,SLDUST22,CDNew
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91,33,13,91,33,13
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91,33,13,91,33,13
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91,33,13,91,33,13
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193,33,13,193,33,13
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91,33,13,91,33,13


In [16]:
alarm_df = pd.read_csv(alarm / "census-vest-2020/pa_2020_vtd.csv").drop(columns=["state", "county", "vtd"])
with_alarm = with_cdnew.overlaid.merge(alarm_df, on="GEOID20", how="left", validate="1:1").set_index("GEOID20")

with_alarm.head()

Unnamed: 0_level_0,STATEFP20,COUNTYFP20,NAME20,CountyName,geometry,SubdivisionName,SubdivisionType,SLDLST20,SLDUST20,CD116FP,...,gov_18_dem_wol,gov_18_rep_wag,arv_16,adv_16,arv_18,adv_18,arv_20,adv_20,nrv,ndv
GEOID20,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
42001000170,42,1,FRANKLIN DISTRICT 01,Adams,"POLYGON ((-77.46876 39.90392, -77.46766 39.903...",Franklin,Township,91,33,13,...,330.8,571.9,767.8,306.2,577.5,325.1,,,691.7,313.8
42001000190,42,1,FREEDOM,Adams,"POLYGON ((-77.32236 39.78708, -77.32187 39.788...",Freedom,Township,91,33,13,...,179.4,266.8,352.2,160.6,270.5,173.9,,,319.6,165.9
42001000200,42,1,GERMANY,Adams,"POLYGON ((-77.16709 39.71987, -77.16685 39.719...",Germany,Township,91,33,13,...,336.0,771.0,994.3,273.3,784.5,319.0,,,910.4,291.6
42001000260,42,1,HAMILTON,Adams,"POLYGON ((-77.07583 39.89272, -77.07452 39.892...",Hamilton,Township,193,33,13,...,367.8,697.6,991.3,314.0,715.2,344.4,,,880.8,326.1
42001000230,42,1,GETTYSBURG WARD 02,Adams,"POLYGON ((-77.24600 39.83528, -77.24575 39.836...",Gettysburg,Borough,91,33,13,...,689.0,174.0,389.3,799.0,183.5,680.5,,,307.0,751.6


In [17]:
with_alarm.columns

Index(['STATEFP20', 'COUNTYFP20', 'NAME20', 'CountyName', 'geometry',
       'SubdivisionName', 'SubdivisionType', 'SLDLST20', 'SLDUST20', 'CD116FP',
       'SLDLST22', 'SLDUST22', 'CDNew', 'pop', 'pop_hisp', 'pop_white',
       'pop_black', 'pop_aian', 'pop_asian', 'pop_nhpi', 'pop_other',
       'pop_two', 'vap', 'vap_hisp', 'vap_white', 'vap_black', 'vap_aian',
       'vap_asian', 'vap_nhpi', 'vap_other', 'vap_two', 'pre_16_dem_cli',
       'pre_16_rep_tru', 'uss_16_dem_mcg', 'uss_16_rep_too', 'atg_16_dem_sha',
       'atg_16_rep_raf', 'uss_18_dem_cas', 'uss_18_rep_bar', 'gov_18_dem_wol',
       'gov_18_rep_wag', 'arv_16', 'adv_16', 'arv_18', 'adv_18', 'arv_20',
       'adv_20', 'nrv', 'ndv'],
      dtype='object')