In [49]:
import pandas as pd
import geopandas as gp

# 2020 CVAP Data Aggregated to Nassau County VTDs

## Background:
- Received a request to aggregate block-level 2020 CVAP data to the 2020 PL VTD level in Nassau County, NY.

## Approach:
- Download block-level PL data to get block to VTD correspondence
- Load in block-level CVAP data
- Add in the VTD correspondence information to the block-level CVAP file
- Filter down to Nassau County
- Groupby VTD and sum the CVAP data columns

## Links to datasets used:
[New York block PL 94-171 2020](https://redistrictingdatahub.org/dataset/new-york-block-pl-94171-2020/)

[New York CVAP Data Disaggregated to the 2020 Block Level (2020)](https://redistrictingdatahub.org/dataset/new-york-cvap-data-disaggregated-to-the-2020-block-level-2020/)
- Note: Field definitions can be found in the metadata for this file

For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

In [54]:
# Load in block-level PL data, which contains the correspondence to VTDs
blocks_csv = pd.read_csv("./raw-from-source/ny_pl2020_b/ny_pl2020_b.csv")

# Create a VTDID
blocks_csv["VTDID"] = "36" + blocks_csv["COUNTY"].astype(str).str.zfill(3)+ blocks_csv["VTD"].astype(str).str.zfill(6)

# Confirm that it matches the expected number of VTDs for the state
assert(len(blocks_csv["VTDID"].unique())==14191)

# Filter down to relevant columns
blocks_csv = blocks_csv[["GEOCODE", "VTDID"]]

# Rename the columns for the join
blocks_csv.rename(columns = {"GEOCODE":"GEOID20"}, inplace = True)

  blocks_csv = pd.read_csv("./raw-from-source/ny_pl2020_b/ny_pl2020_b.csv")
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  blocks_csv.rename(columns = {"GEOCODE":"GEOID20"}, inplace = True)
  vtd_cvap = combined.groupby(["VTDID"])[[i for i in combined.columns if i not in ["GEOID20", "COUNTYFP20"]]].sum()


In [None]:
# Load in CVAP data that has been disaggregated to the block-level
blocks_cvap = pd.read_csv("./raw-from-source/ny_cvap_2020_2020_b/ny_cvap_2020_2020_b.csv")

# Join the two files
combined = pd.merge(blocks_csv, blocks_cvap, how = "outer", on = "GEOID20", indicator = True)

# Filter down to Nassau County
combined = combined[combined["COUNTYFP20"]==59]

# Groupby VTDID and grab the columns we need
vtd_cvap = combined.groupby(["VTDID"])[[i for i in combined.columns if i not in ["GEOID20", "COUNTYFP20"]]].sum()

# Clean index
vtd_cvap.reset_index(inplace = True, drop = False)

# Export
vtd_cvap.to_csv("./Nassau_County_NY_VTDs_CVAP.csv", index = False)