In [66]:
import os
import geopandas as gp
import pandas as pd
import maup
import numpy as np

# El_Paso_County_CO_Precincts_Population_12_14_2022

## Background:
- We received a request to have total population data on El Paso County, Colorado, precincts.

## Approach:
- Download 2020 PL 94-171 block level data and El Paso County precinct shapefile
- Query data to El Paso County (FIPS 041)
- Aggregate data P0010001 (total population) data from blocks to precincts using the [maup library](https://github.com/mggg/maup)
- Confirm that all data is correctly aggregated

## Links to datasets used:
- [El Paso County, Colorado, Precinct Shapefile (6/1/22)](https://admin.elpasoco.com/free-gis-data/)
- [Colorado block PL 94-171 2020 (by table)](https://redistrictingdatahub.org/dataset/colorado-block-pl-94171-2020-by-table/)

For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

In [67]:
def add_demo_columns_totals(dataframe):
    '''
    Function that creates the 8 OMB columns in the dataframe
    '''
    
    #Non-Hispanic White
    dataframe["OMB_1"]=dataframe["P0020005"]
    
    #Non-Hispanic Black plus Non-Hispanic Black and White
    dataframe["OMB_2"]=(dataframe["P0020006"]+dataframe["P0020013"])
    
    #Non-Hispanic Asian plus Non-Hispanic Asian and White
    dataframe["OMB_3"]=(dataframe["P0020008"]+dataframe["P0020015"])
    
    #Non-Hispanic American Indian plus Non-Hispanic American Indian and White
    dataframe["OMB_4"]=(dataframe["P0020007"]+dataframe["P0020014"])
    
    #Non-Hispanic Pacific Islander plus Non-Hispanic Pacific Islander and White
    dataframe["OMB_5"]=(dataframe["P0020009"]+dataframe["P0020016"])
    
    #Non-Hispanic Some Other Race plus Non-Hispanic Some Other Race and White
    dataframe["OMB_6"]=(dataframe["P0020010"]+dataframe["P0020017"])
    
    #Non-Hispanic Other multiple-race(where more than one minority race is listed)
    dataframe["OMB_7"]=(dataframe["P0020011"]-dataframe["P0020013"]-dataframe["P0020014"]-dataframe["P0020015"]-dataframe["P0020016"]-dataframe["P0020017"])
    
    #Hispanic
    dataframe["OMB_8"]=dataframe["P0020002"]
    
    return dataframe

def add_demo_columns_percentages(dataframe):
    '''
    Function that creates the 8 OMB columns in the dataframe with percent data
    '''
    
    #Non-Hispanic White
    dataframe["OMB_1_pct"]=np.where(dataframe["P0010001"]==0,0,dataframe["P0020005"]/dataframe["P0010001"])
    
    #Non-Hispanic Black plus Non-Hispanic Black and White
    dataframe["OMB_2_pct"]=np.where(dataframe["P0010001"]==0,0,(dataframe["P0020006"]+dataframe["P0020013"])/dataframe["P0010001"])
    
    #Non-Hispanic Asian plus Non-Hispanic Asian and White
    dataframe["OMB_3_pct"]=np.where(dataframe["P0010001"]==0,0,(dataframe["P0020008"]+dataframe["P0020015"])/dataframe["P0010001"])
    
    #Non-Hispanic American Indian plus Non-Hispanic American Indian and White
    dataframe["OMB_4_pct"]=np.where(dataframe["P0010001"]==0,0,(dataframe["P0020007"]+dataframe["P0020014"])/dataframe["P0010001"])
    
    #Non-Hispanic Pacific Islander plus Non-Hispanic Pacific Islander and White
    dataframe["OMB_5_pct"]=np.where(dataframe["P0010001"]==0,0,(dataframe["P0020009"]+dataframe["P0020016"])/dataframe["P0010001"])
    
    #Non-Hispanic Some Other Race plus Non-Hispanic Some Other Race and White
    dataframe["OMB_6_pct"]=np.where(dataframe["P0010001"]==0,0,(dataframe["P0020010"]+dataframe["P0020017"])/dataframe["P0010001"])
    
    #Non-Hispanic Other multiple-race(where more than one minority race is listed)
    dataframe["OMB_7_pct"]=np.where(dataframe["P0010001"]==0,0,(dataframe["P0020011"]-dataframe["P0020013"]-dataframe["P0020014"]-dataframe["P0020015"]-dataframe["P0020016"]-dataframe["P0020017"])/dataframe["P0010001"])
    
    #Hispanic
    dataframe["OMB_8_pct"]=np.where(dataframe["P0010001"]==0,0,dataframe["P0020002"]/dataframe["P0010001"])
    
    #dataframe.drop(pl_cols_keep, axis = 1, inplace = True)
    
    return format_percentages(dataframe)

def format_percentages(dataframe):
    '''
    Formats the OMB columns to one decimal place and changes names
    '''
    for col_name in ["OMB_1_pct","OMB_2_pct","OMB_3_pct","OMB_4_pct","OMB_5_pct","OMB_6_pct","OMB_7_pct","OMB_8_pct"]:
        dataframe[col_name] = dataframe[col_name].map('{:.1%}'.format)
        
    return dataframe

Read in block shapefile with population and precinct shapefile

In [68]:
prec = gp.read_file("./raw-from-source/Precinct.shp")
b = gp.read_file("./raw-from-source/co_pl2020_b/co_pl2020_b.shp")

Query block data to El Paso County, CO (FIPS 041) 

In [69]:
b['GEOID20'] = b['GEOID20'].astype(str)
b['COUNTY'] = b['COUNTY'].astype(str)
b = b[b['COUNTY']=='041']
display(prec.head())
display(b.head())

Unnamed: 0,PRECINCT,COM_DIST,SENATE,REP,STATENUM,FEATDATE,POLLCODE,SHAPE_STAr,SHAPE_STLe,geometry
0,644,4,12,21,5122122000.0,2009-09-01,,10656770.0,17423.556705,"POLYGON ((3229553.461 1329378.609, 3229553.384..."
1,417,2,10,15,5101521000.0,2002-03-15,,9451139.0,15232.559449,"POLYGON ((3227057.043 1393834.607, 3227079.190..."
2,405,2,10,15,5101521000.0,2002-03-15,,11368390.0,17327.832633,"POLYGON ((3230247.684 1393968.067, 3230252.120..."
3,643,4,12,21,5122122000.0,2009-09-01,,12027280.0,21852.789709,"POLYGON ((3233173.230 1327299.421, 3233013.107..."
4,406,2,10,15,5101521000.0,2005-04-28,,11231900.0,15111.64795,"POLYGON ((3230725.987 1393821.389, 3230912.080..."


Unnamed: 0,STATEFP20,COUNTYFP20,TRACTCE20,BLOCKCE20,GEOID20,NAME20,MTFCC20,FUNCSTAT20,ALAND20,AWATER20,...,P0050002,P0050003,P0050004,P0050005,P0050006,P0050007,P0050008,P0050009,P0050010,geometry
2,8,41,1000,2024,80410010002024,Block 2024,G5040,S,11556,0,...,0,0,0,0,0,0,0,0,0,"POLYGON ((-104.82316 38.85724, -104.82314 38.8..."
3,8,41,3911,4000,80410039114000,Block 4000,G5040,S,250920,0,...,0,0,0,0,0,0,0,0,0,"POLYGON ((-104.18051 39.12874, -104.17961 39.1..."
4,8,41,7402,1003,80410074021003,Block 1003,G5040,S,114057,0,...,0,0,0,0,0,0,0,0,0,"POLYGON ((-104.79440 39.12753, -104.79421 39.1..."
5,8,41,5125,1008,80410051251008,Block 1008,G5040,S,12880,0,...,0,0,0,0,0,0,0,0,0,"POLYGON ((-104.68660 38.93196, -104.68639 38.9..."
6,8,41,1101,2002,80410011012002,Block 2002,G5040,S,5014,0,...,0,0,0,0,0,0,0,0,0,"POLYGON ((-104.82157 38.87092, -104.82155 38.8..."


Assign blocks to precincts and aggregate total population to precincts

In [71]:
prec.index = prec["PRECINCT"]

In [72]:
crs = prec.crs
b = b.to_crs(crs)
assignment = maup.assign(b, prec)
precs_data = b.groupby(assignment).sum()
precs_data.reset_index(inplace = True, drop = False)
precs_data.rename(columns = {"index":"PRECINCT"}, inplace = True)

precs_data = add_demo_columns_totals(precs_data)
precs_data = add_demo_columns_percentages(precs_data)


  geometry.index = i
  self.spatial_index = STRtree(self.geometries)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, name=name, **kwargs)
  s = pd.Series(data, index=index, 

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSeries 's', you can use 's.is_empty | s.isna()' to get back the old behaviour.

  return intersections[-(intersections.is_empty | intersections.isna())]
Given a GeoSerie

  precs_data = b.groupby(assignment).sum()


In [125]:
prec.reset_index(inplace = True, drop = True)

joined = gp.GeoDataFrame(pd.merge(precs_data, prec, how = "outer", on = "PRECINCT"))

joined["VAP_pct"] = np.where(joined["P0010001"]==0, 0, round(joined["P0030001"]/joined["P0010001"],2))
joined["VAP_pct"] = joined["VAP_pct"].map('{:.1%}'.format)

prec_col_order = ['PRECINCT','P0010001', 'COM_DIST','SENATE', 'REP',"VAP_pct"] + [i for i in joined.columns if "OMB" in i] + ['geometry']
joined = joined[prec_col_order]

joined.rename(columns = {"P0010001":"TOTPOP20"}, inplace = True)


Confirm that the total population of blocks in El Paso County match total population of precincts

In [126]:
print(joined['TOTPOP20'].sum())
print(b['P0010001'].sum())

730395
730395


Export data

In [127]:
if not os.path.exists("./el_paso_county_2022_prec_2020_pop/"):
    os.mkdir("./el_paso_county_2022_prec_2020_pop/")
joined.to_file('./el_paso_county_2022_prec_2020_pop/el_paso_county_2022_prec_2020_pop.shp')


  pd.Int64Index,


In [123]:
joined.drop(columns = 'geometry',inplace=True)
joined.to_csv('./el_paso_county_2022_prec_2020_pop.csv',index=False)

  pd.Int64Index,
