In [None]:
#USE_PYGEOS=1
import geopandas as gpd
from pathlib import Path
import pandas as pd
#import pygeos
#gpd.options.use_pygeos = True

## Input Data Shapefiles

In [None]:
# Senate
input_folder = Path("./gis/NM_Senate")
fp = input_folder / "NM_Senate.shp"
sd = gpd.read_file(fp)

In [None]:
print(sd.crs)
sd.head()

In [None]:
# House
input_folder = Path("./gis/NM_House")
fp = input_folder / "NM_House.shp"
hd = gpd.read_file(fp)

In [None]:
hd.head()

In [None]:
# ZTCA (Zip codes)
input_folder = Path("./gis/NM_ZCTA")
fp = input_folder / "tl_2010_35_zcta510.shp"
zd_raw = gpd.read_file(fp)

In [None]:
zd_raw.head()

In [None]:
# get only select columns
zd = zd_raw[['ZCTA5CE10', 'geometry']]
print(zd.crs)
zd.head()

## Join ZCTA and Districts

In [None]:
# join senate with zcta, senate on left
sx = gpd.sjoin(sd, zd, how = 'left', predicate = 'intersects')[["DISTRICT", "ZCTA5CE10"]]
sx.rename(columns = {'DISTRICT':'senate_district', 
                     'ZCTA5CE10':'ZCTA'}, inplace = True)
sx.head()

In [None]:
# join house with zcta
hx = gpd.sjoin(hd, zd, how = 'left', predicate = 'intersects')[["DISTRICT", "ZCTA5CE10"]]
hx.rename(columns = {'DISTRICT':'house_district', 
                     'ZCTA5CE10':'ZCTA'}, inplace = True)
hx.head()

In [56]:
# join zcta with house and senate
zx1 = gpd.sjoin(zd, hd, how = 'left', predicate = 'intersects')[['ZCTA5CE10','DISTRICT', 'geometry']]
zx1.rename(columns = {'ZCTA5CE10':'ZCTA', 
                      'DISTRICT':'house_district'}, inplace = True)
zx2 = gpd.sjoin(zx1, sd, how = 'left', predicate = 'intersects')[['ZCTA', 'house_district', 'DISTRICT']]
zx2.rename(columns = {'DISTRICT':'senate_district'}, inplace = True)

zx2.head()

Unnamed: 0,ZCTA,house_district,senate_district
0,87108,10,16
0,87108,10,17
0,87108,10,18
0,87108,19,16
0,87108,19,17


In [None]:
# output 
sx.to_csv('crosswalks/senate-zcta-crosswalk.csv', index = False)
hx.to_csv('crosswalks/house-zcta-crosswalk.csv', index = False)

## Data Exploration

In [None]:
# count zips per senate district
sx.groupby(['DISTRICT']).size().reset_index(name='COUNT').sort_values(by='COUNT', ascending = False)

In [None]:
# definitely a many to many relationship here
zx.groupby(['ZCTA5CE10']).size().reset_index(name='COUNT').sort_values(by='COUNT', ascending = False)

In [None]:
# South Valley, Albuquerque has 8 districts!
zx.query("ZCTA5CE10 == '87105'")