In [1]:
import os
import geopandas as gp
import pandas as pd
import maup
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
from pandas.core.common import SettingWithCopyWarning
warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
wd = os.getcwd()

# Boston_CouncilDistricts_11_29_2022

## Background:
- We received a request for aggregated 2010 and 2020 Census data and population projections on Boston's new City Council Districts from the Boston City Council.
- The Council was also interested in percent change over time between 2010 and 2020.

## Approach:
- Use RDH PL datasets and population projections at the the block level.
- Query out fields which the user expressed interest in and for blocks in Boston.
- Aggregate block level data to the precinct and council district levels using [maup library](https://github.com/mggg/maup)
- Create percent change fields by using the following formula: ((2020 population - 2010 population)/2010 population)*100

## Links to datasets used:
- [Massachusetts block PL 94-171 2010](https://redistrictingdatahub.org/dataset/massachusetts-block-pl-94171-2010/)
- [Massachusetts block boundaries (2010)](https://redistrictingdatahub.org/dataset/massachusetts-block-boundaries-2010/)
- [Massachusetts Block boundaries (2020)](https://redistrictingdatahub.org/dataset/massachusetts-block-boundaries-2020/)
- [Massachusetts block PL 94-171 2020](https://redistrictingdatahub.org/dataset/massachusetts-block-pl-94171-2020/)
- [2021-2030 MA HastaqDNA Population Projections joined to 2020 Census Blocks, P2](https://redistrictingdatahub.org/dataset/20212030-ma-hastaqdna-population-projections-joined-to-2020-census-blocks-p2/)
- [Boston City Council Districts](https://bostonopendata-boston.opendata.arcgis.com/datasets/boston::new-city-council-districts-passed-9-4-on-november-2-2022-effective-for-the-2023-municipal-election/explore?location=42.332151%2C-70.907754%2C13.12)
- Boston City Council Block Assignment File to 2020 Blocks (via personal communication from the Boston City Council)

For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

Import all required files

In [2]:
pl20  = pd.read_csv(os.path.join(os.path.join(wd,'ma_pl2020_b_csv'),'ma_pl2020_b.csv'))
b20 = gp.read_file(os.path.join(os.path.join(wd,'ma_b_2020_bound'),'ma_b_2020_bound.shp'))
b20_sub = b20[['GEOID20','geometry']]
pl10  = pd.read_csv(os.path.join(os.path.join(wd,'ma_pl2010_b'),'ma_pl2010_b.csv'))
b10 = gp.read_file(os.path.join(os.path.join(wd,'ma_2010_b_bound'),'ma_2010_b_bound.shp'))
b10_sub = b10[['GEOID','geometry']]
co_dist = gp.read_file(os.path.join(os.path.join(wd,'new_council_dists'),'New_City_Council_Districts%2C_Passed_9-4_on_November_2%2C_2022_effective_for_the_2023_Municipal_Election.shp'))
co_dist = co_dist[['DISTRICT','geometry']]
proj = pd.read_csv(os.path.join(os.path.join(wd,'ma_b_proj_P2_2020tiger'),'ma_b_proj_P2_2020tiger.csv'))
co_baf = pd.read_csv(os.path.join(wd,'Docket_1275_Committee_Report.txt'),header=None)
co_baf.rename(columns={0:'DISTRICT',1:'GEOID20'},inplace=True)

  pl20  = pd.read_csv(os.path.join(os.path.join(wd,'ma_pl2020_b_csv'),'ma_pl2020_b.csv'))
  pl10  = pd.read_csv(os.path.join(os.path.join(wd,'ma_pl2010_b'),'ma_pl2010_b.csv'))


Query and clean PL data for 2010 and 2020

In [3]:
p2_cols_to_keep = ['P0020001','P0020002','P0020005','P0020006','P0020007','P0020008','P0020009','P0020010','P0020011']
p4_cols_to_keep = ['P0040001','P0040002','P0040005','P0040006','P0040007','P0040008','P0040009','P0040010','P0040011']
p5_cols_to_keep = ['P0050001','P0050002','P0050003','P0050004','P0050005','P0050006','P0050007','P0050008','P0050009','P0050010']
h1_cols_to_keep = ['H0010001','H0010002','H0010003']
other_cols_to_keep = ['GEOCODE','PLACENS']

#Make GEOCODE field for 2010
pl10['STATE']=pl10['STATE'].apply(lambda x: str(x).zfill(2))
pl10['COUNTY']=pl10['COUNTY'].apply(lambda x: str(x).zfill(3))
pl10['TRACT']=pl10['TRACT'].apply(lambda x: str(x).zfill(6))
pl10['BLOCK']=pl10['BLOCK'].apply(lambda x: str(x).zfill(4))
pl10['GEOCODE']=pl10.apply(lambda x: x['STATE']+x['COUNTY']+x['TRACT']+x['BLOCK'],axis=1)

pl10_cols = other_cols_to_keep+p2_cols_to_keep+p4_cols_to_keep+h1_cols_to_keep
pl20_cols = pl10_cols+p5_cols_to_keep

pl10_sub = pl10[pl10_cols]
pl20_sub = pl20[pl20_cols]

Rename columns for PL data 

In [4]:
pl_cols10_dict = {'GEOCODE':'GEOID','P0020001':'TOT10','P0020002':'HSP10','P0020005':'WHT_NH10','P0020006':'BLK_NH10','P0020007':'AIA_NH10','P0020008':'ASN_NH10','P0020009':'HPI_NH10','P0020010':'OTH_NH10','P0020011':'2OM_NH10',
                  'P0040001':'TOT_VAP10','P0040002':'HSP_VAP10','P0040005':'WHT_VAP10','P0040006':'BLK_VAP10','P0040007':'AIA_VAP10','P0040008':'ASN_VAP10','P0040009':'HPI_VAP10','P0040010':'OTH_VAP10','P0040011':'2OM_VAP10',
                 'H0010001':'HUNT_TOT10','H0010002':'HUNT_OCC10','H0010003':'HUNT_VAC10'}

pl_cols20_dict = {'GEOCODE':'GEOID20','P0020001':'TOT20','P0020002':'HSP20','P0020005':'WHT_NH20','P0020006':'BLK_NH20','P0020007':'AIA_NH20','P0020008':'ASN_NH20','P0020009':'HPI_NH20','P0020010':'OTH_NH20','P0020011':'2OM_NH20',
                  'P0040001':'TOT_VAP20','P0040002':'HSP_VAP20','P0040005':'WHT_VAP20','P0040006':'BLK_VAP20','P0040007':'AIA_VAP20','P0040008':'ASN_VAP20','P0040009':'HPI_VAP20','P0040010':'OTH_VAP20','P0040011':'2OM_VAP20',
                 'H0010001':'HUNT_TOT20','H0010002':'HUNT_OCC20','H0010003':'HUNT_VAC20','P0050001':'GQ_TOT20','P0050002':'GQ_INS20','P0050003':'GQ_CORR20','P0050004':'GQ_JUVE20','P0050005':'GQ_NURS20','P0050006':'GQ_OINS20','P0050007':'GQ_NINS20','P0050008':'GQ_UNIV20','P0050009':'GQ_MLTR20','P0050010':'GQ_ONINS20'}

pl20_for_comp = ['TOT20', 'HSP20', 'WHT_NH20', 'BLK_NH20', 'AIA_NH20', 'ASN_NH20', 'HPI_NH20', 'OTH_NH20', '2OM_NH20', 'TOT_VAP20', 'HSP_VAP20', 'WHT_VAP20', 'BLK_VAP20', 'AIA_VAP20', 'ASN_VAP20', 'HPI_VAP20', 'OTH_VAP20', '2OM_VAP20', 'HUNT_TOT20', 'HUNT_OCC20', 'HUNT_VAC20']
pl10_for_comp = ['TOT10', 'HSP10', 'WHT_NH10', 'BLK_NH10', 'AIA_NH10', 'ASN_NH10', 'HPI_NH10', 'OTH_NH10', '2OM_NH10', 'TOT_VAP10', 'HSP_VAP10', 'WHT_VAP10', 'BLK_VAP10', 'AIA_VAP10', 'ASN_VAP10', 'HPI_VAP10', 'OTH_VAP10', '2OM_VAP10', 'HUNT_TOT10', 'HUNT_OCC10', 'HUNT_VAC10']
comparison_dict = dict(zip(pl20_for_comp,pl10_for_comp))

pl10_sub.rename(columns = pl_cols10_dict,inplace=True)
pl20_sub.rename(columns = pl_cols20_dict,inplace=True)
display(pl10_sub.head(1))
display(pl20_sub.head(1))

Unnamed: 0,GEOID,PLACENS,TOT10,HSP10,WHT_NH10,BLK_NH10,AIA_NH10,ASN_NH10,HPI_NH10,OTH_NH10,...,WHT_VAP10,BLK_VAP10,AIA_VAP10,ASN_VAP10,HPI_VAP10,OTH_VAP10,2OM_VAP10,HUNT_TOT10,HUNT_OCC10,HUNT_VAC10
0,250010122001000,618248,10,0,10,0,0,0,0,0,...,9,0,0,0,0,0,0,4,4,0


Unnamed: 0,GEOID20,PLACENS,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,OTH_NH20,...,GQ_TOT20,GQ_INS20,GQ_CORR20,GQ_JUVE20,GQ_NURS20,GQ_OINS20,GQ_NINS20,GQ_UNIV20,GQ_MLTR20,GQ_ONINS20
0,250010101001000,99999999,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Subset projection data

In [5]:
proj.rename(columns = {'geoid_2020':'GEOID20'},inplace=True)
proj_fields_to_keep = []
for i in list(proj.columns):
    if '_h_' in i:
        if 'tot' in i:
            proj_fields_to_keep.append(i)
        else:
            continue
    else:
        if i!='state_fips':
            
            proj_fields_to_keep.append(i)
proj_sub = proj[proj_fields_to_keep]
proj_sub

Unnamed: 0,GEOID20,p20_nh_tot,p20_nh_wh,p20_nh_aa,p20_nh_ai,p20_nh_asi,p20_nh_pac,p20_nh_oth,p20_nh_tom,p20_h_tot,...,p29_h_tot,p30_nh_tot,p30_nh_wh,p30_nh_aa,p30_nh_ai,p30_nh_asi,p30_nh_pac,p30_nh_oth,p30_nh_tom,p30_h_tot
0,250039261001009,6,6,0,0,0,0,0,0,0,...,0,7,7,0,0,0,0,0,0,1
1,250056416002013,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,250010127004025,13,10,0,0,0,0,1,2,0,...,0,10,8,0,0,0,0,0,2,0
3,250092506004006,15,15,0,0,0,0,0,0,51,...,34,16,16,0,0,0,0,0,0,32
4,250039261001015,6,6,0,0,0,0,0,0,0,...,0,7,7,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107273,250092506003008,14,10,0,0,4,0,0,0,113,...,104,14,10,0,0,4,0,0,0,103
107274,250092506003009,20,15,0,0,5,0,0,0,166,...,153,20,14,0,0,6,0,0,0,152
107275,250092506003011,14,10,0,0,4,0,0,0,114,...,106,14,10,0,0,4,0,0,0,105
107276,250092506004002,17,17,0,0,0,0,0,0,55,...,36,17,17,0,0,0,0,0,0,34


Rename projection data columns

In [6]:
proj_sub_fields = list(proj_sub.columns)
new_proj_cols = []
for i in proj_sub_fields:
    if i.startswith('p'):
        y = i.split('_')[0].replace('p','')
        h_nh = i.split('_')[1]
        race = i.split('_')[2]
        if h_nh =='h':
            h_nh = '_'
            race='HISP'
        else:
            h_nh = '_NH_'
        race_dict = {'tot':'TOT','wh':'WHT','aa':'BLK','ai':'AIA','asi':'ASN','pac':'HPI','oth':'OTH','tom':'2OM','HISP':'HISP'}
        name = 'P'+h_nh+race_dict.get(race)+y
        new_proj_cols.append(name)

proj_sub_fields.remove('GEOID20')
proj_rename_dict = dict(zip(proj_sub_fields,new_proj_cols))
proj_sub.rename(columns=proj_rename_dict,inplace=True)
proj_sub.head()

Unnamed: 0,GEOID20,P_NH_TOT20,P_NH_WHT20,P_NH_BLK20,P_NH_AIA20,P_NH_ASN20,P_NH_HPI20,P_NH_OTH20,P_NH_2OM20,P_HISP20,...,P_HISP29,P_NH_TOT30,P_NH_WHT30,P_NH_BLK30,P_NH_AIA30,P_NH_ASN30,P_NH_HPI30,P_NH_OTH30,P_NH_2OM30,P_HISP30
0,250039261001009,6,6,0,0,0,0,0,0,0,...,0,7,7,0,0,0,0,0,0,1
1,250056416002013,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,250010127004025,13,10,0,0,0,0,1,2,0,...,0,10,8,0,0,0,0,0,2,0
3,250092506004006,15,15,0,0,0,0,0,0,51,...,34,16,16,0,0,0,0,0,0,32
4,250039261001015,6,6,0,0,0,0,0,0,0,...,0,7,7,0,0,0,0,0,0,1


Join projections, 2020 PL data, and shapefile together

In [7]:
proj_pl20 =pd.merge(pl20_sub,proj_sub,on='GEOID20',how='outer')
proj_pl20['GEOID20'] =proj_pl20['GEOID20'].astype(str)
ma_data20 = pd.merge(b20_sub,proj_pl20,on='GEOID20',how='outer')
ma_data20

Unnamed: 0,GEOID20,geometry,PLACENS,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,...,P_HISP29,P_NH_TOT30,P_NH_WHT30,P_NH_BLK30,P_NH_AIA30,P_NH_ASN30,P_NH_HPI30,P_NH_OTH30,P_NH_2OM30,P_HISP30
0,250039353003056,"POLYGON ((-73.10996 42.69902, -73.10976 42.699...",618273,36,3,29,0,0,0,0,...,1,39,32,0,0,0,0,0,7,1
1,250039201021019,"POLYGON ((-73.23809 42.66205, -73.23797 42.662...",99999999,82,0,82,0,0,0,0,...,6,93,77,5,2,5,0,0,4,6
2,250039261001041,"POLYGON ((-73.41092 42.13063, -73.41082 42.131...",99999999,3,0,0,0,0,0,0,...,0,2,2,0,0,0,0,0,0,0
3,250039311002021,"POLYGON ((-73.08619 42.70946, -73.08607 42.709...",99999999,13,2,9,0,0,0,0,...,1,21,21,0,0,0,0,0,0,1
4,250039261001071,"POLYGON ((-73.32652 42.09883, -73.32633 42.098...",99999999,7,3,4,0,0,0,0,...,0,1,1,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107273,250173839044009,"POLYGON ((-71.44033 42.33203, -71.43995 42.332...",2791559,25,11,9,1,0,0,0,...,0,6,4,0,0,1,0,1,0,0
107274,250173422021016,"POLYGON ((-71.04692 42.42004, -71.04689 42.420...",618223,101,26,55,4,0,1,0,...,36,75,41,21,0,3,0,5,5,37
107275,250173336011038,"POLYGON ((-71.14781 42.51086, -71.14684 42.511...",619411,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
107276,250173832001013,"POLYGON ((-71.42136 42.26631, -71.42116 42.267...",2791559,280,96,80,14,1,36,0,...,210,53,48,2,0,3,0,0,0,214


Join 2010 PL data, and shapefile together

In [8]:
b10_sub = b10[['GEOID','geometry']]
ma_data10=pd.merge(b10_sub,pl10_sub,on='GEOID',how='outer')
ma_data10

Unnamed: 0,GEOID,geometry,PLACENS,TOT10,HSP10,WHT_NH10,BLK_NH10,AIA_NH10,ASN_NH10,HPI_NH10,...,WHT_VAP10,BLK_VAP10,AIA_VAP10,ASN_VAP10,HPI_VAP10,OTH_VAP10,2OM_VAP10,HUNT_TOT10,HUNT_OCC10,HUNT_VAC10
0,250010153001070,"POLYGON ((-70.29481 41.68198, -70.29519 41.682...",618248,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,250010125022055,"POLYGON ((-70.28800 41.64199, -70.28805 41.642...",618248,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,250010127002018,"POLYGON ((-70.34948 41.65485, -70.34925 41.655...",618248,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,250010125022026,"POLYGON ((-70.28962 41.64329, -70.28967 41.643...",618248,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,250010122003103,"POLYGON ((-70.34109 41.70074, -70.34094 41.700...",618248,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
157503,250277441022027,"POLYGON ((-71.53773 42.14578, -71.53742 42.145...",2378167,67,2,65,0,0,0,0,...,59,0,0,0,0,0,0,31,31,0
157504,250277441021016,"POLYGON ((-71.53727 42.15294, -71.53723 42.152...",2378167,92,9,82,0,0,0,0,...,60,0,0,0,0,0,1,29,29,0
157505,250277444002012,"POLYGON ((-71.53873 42.14988, -71.53810 42.149...",2378167,42,0,37,0,0,5,0,...,31,0,0,2,0,0,0,14,14,0
157506,250277444002002,"POLYGON ((-71.53401 42.14346, -71.53436 42.143...",2378167,39,0,39,0,0,0,0,...,34,0,0,0,0,0,0,16,16,0


Query data to Boston

In [9]:
boston_data20 = ma_data20[ma_data20['PLACENS']==619463]
boston_data10 = ma_data10[ma_data10['PLACENS']==619463]
boston_data20.drop(columns = 'PLACENS', inplace=True)
boston_data10.drop(columns = 'PLACENS',inplace=True)

Create list of variables to aggregate for 2020 data and aggregate to council districts

In [10]:
vars20 = list(boston_data20.columns)
vars20.remove('GEOID20')
vars20.remove('geometry')
boston_data20 = boston_data20.to_crs(co_dist.crs)

In [11]:
co_baf['GEOID20'] = co_baf['GEOID20'].astype(str)
boston_data20 = pd.merge(co_baf,boston_data20,on='GEOID20',how='outer',indicator=True)
display(boston_data20[boston_data20['_merge']!='both'])
boston_data20.drop(columns=['_merge','geometry'],inplace=True)
display(boston_data20)
boston_data20['DISTRICT']= boston_data20['DISTRICT'].apply(lambda x: str(x)[-1])
co_dist_grp = boston_data20.groupby('DISTRICT').sum()
co_dist_grp.reset_index(drop=False,inplace=True)
co_dist['DISTRICT'] = co_dist['DISTRICT'].apply(lambda x: str(x))
co_dist  = pd.merge(co_dist,co_dist_grp)
display(co_dist)

Unnamed: 0,DISTRICT,GEOID20,geometry,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,...,P_NH_TOT30,P_NH_WHT30,P_NH_BLK30,P_NH_AIA30,P_NH_ASN30,P_NH_HPI30,P_NH_OTH30,P_NH_2OM30,P_HISP30,_merge


Unnamed: 0,DISTRICT,GEOID20,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,OTH_NH20,...,P_HISP29,P_NH_TOT30,P_NH_WHT30,P_NH_BLK30,P_NH_AIA30,P_NH_ASN30,P_NH_HPI30,P_NH_OTH30,P_NH_2OM30,P_HISP30
0,District 9,250250001011000,255,44,160,11,0,30,0,4,...,20,378,320,7,0,51,0,0,0,19
1,District 9,250250001011001,249,18,162,13,0,46,0,8,...,20,381,323,7,0,51,0,0,0,19
2,District 9,250250001011002,71,3,43,0,0,12,0,5,...,7,127,108,2,0,17,0,0,0,6
3,District 9,250250001011003,102,13,61,11,1,5,0,5,...,8,157,133,3,0,21,0,0,0,8
4,District 9,250250001011004,58,5,36,7,0,8,0,0,...,5,90,76,2,0,12,0,0,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6072,District 1,250259901010032,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6073,District 2,250259901010033,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6074,District 2,250259901010034,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6075,District 3,250259901010035,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Unnamed: 0,DISTRICT,geometry,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,OTH_NH20,...,P_HISP29,P_NH_TOT30,P_NH_WHT30,P_NH_BLK30,P_NH_AIA30,P_NH_ASN30,P_NH_HPI30,P_NH_OTH30,P_NH_2OM30,P_HISP30
0,1,"POLYGON ((-71.06406 42.36901, -71.06413 42.369...",76830,24552,41564,2660,70,4423,25,847,...,37835,57706,47092,2788,38,5149,117,236,2286,38671
1,2,"POLYGON ((-71.07307 42.35125, -71.07194 42.351...",77466,6640,51638,3700,67,12640,23,425,...,9426,72185,53293,4015,68,12156,72,156,2425,9685
2,3,"POLYGON ((-71.04935 42.33035, -71.04891 42.330...",73285,11898,27925,13081,120,12635,21,2223,...,15304,62321,26799,18633,124,13138,0,1053,2574,15649
3,4,"POLYGON ((-71.08390 42.30760, -71.08369 42.307...",72917,17409,9190,36737,234,2864,15,1466,...,21858,67574,10703,49987,188,3305,0,609,2782,22409
4,5,"POLYGON ((-71.08546 42.28105, -71.08543 42.281...",75436,18191,18543,32865,110,1649,26,729,...,26292,73985,23545,45446,123,1785,0,391,2695,26931
5,6,"POLYGON ((-71.11275 42.33183, -71.11253 42.331...",76523,12660,46579,7218,80,6156,19,555,...,16996,75306,56454,8858,13,6119,0,281,3581,17362
6,7,"POLYGON ((-71.09575 42.33774, -71.09482 42.338...",72147,18703,16551,24866,137,5858,41,1389,...,23339,58283,16598,33011,369,5092,173,755,2285,23915
7,8,"POLYGON ((-71.11830 42.35506, -71.11824 42.355...",76370,7633,45468,4601,83,14963,37,598,...,11866,67306,47367,4650,124,12064,0,168,2933,12193
8,9,"POLYGON ((-71.16989 42.35765, -71.16947 42.358...",74673,8427,44006,3536,88,14400,44,1025,...,10964,62344,43062,3389,150,12826,53,908,1956,11228


Create list of variables to aggregate for 2010 data and aggregate to council districts

In [12]:
vars10 = list(boston_data10.columns)
vars10.remove('GEOID')
vars10.remove('geometry')
boston_data10 = boston_data10.to_crs(co_dist.crs)

co_10_assign = maup.assign(boston_data10,co_dist)
co_dist[vars10] = boston_data10[vars10].groupby(co_10_assign).sum()
co_dist

  geometry.index = i

  geometries = geometries[geometries.area > area_cutoff]

  return assign_to_max(intersections(sources, targets, area_cutoff=0).area)


Unnamed: 0,DISTRICT,geometry,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,OTH_NH20,...,WHT_VAP10,BLK_VAP10,AIA_VAP10,ASN_VAP10,HPI_VAP10,OTH_VAP10,2OM_VAP10,HUNT_TOT10,HUNT_OCC10,HUNT_VAC10
0,1,"POLYGON ((-71.06406 42.36901, -71.06413 42.369...",76830,24552,41564,2660,70,4423,25,847,...,35747,1759,79,2743,18,609,685,33503,30639,2864
1,2,"POLYGON ((-71.07307 42.35125, -71.07194 42.351...",77466,6640,51638,3700,67,12640,23,425,...,39123,2727,65,8628,14,167,678,33333,30427,2906
2,3,"POLYGON ((-71.04935 42.33035, -71.04891 42.330...",73285,11898,27925,13081,120,12635,21,2223,...,21551,11961,103,7941,27,2767,1758,27526,25174,2352
3,4,"POLYGON ((-71.08390 42.30760, -71.08369 42.307...",72917,17409,9190,36737,234,2864,15,1466,...,6997,30170,155,1417,7,1058,1364,26528,24092,2436
4,5,"POLYGON ((-71.08546 42.28105, -71.08543 42.281...",75436,18191,18543,32865,110,1649,26,729,...,17017,24310,131,977,18,346,928,28554,26736,1818
5,6,"POLYGON ((-71.11275 42.33183, -71.11253 42.331...",76523,12660,46579,7218,80,6156,19,555,...,39919,5764,91,3431,11,245,867,32178,30548,1630
6,7,"POLYGON ((-71.09575 42.33774, -71.09482 42.338...",72147,18703,16551,24866,137,5858,41,1389,...,14975,20410,172,2852,18,1265,1482,25517,23759,1758
7,8,"POLYGON ((-71.11830 42.35506, -71.11824 42.355...",76370,7633,45468,4601,83,14963,37,598,...,45968,3748,99,9795,20,195,1522,33431,30715,2716
8,9,"POLYGON ((-71.16989 42.35765, -71.16947 42.358...",74673,8427,44006,3536,88,14400,44,1025,...,45092,2793,57,9949,24,911,1456,31911,30609,1302


Function for calculating percent increase

In [13]:
def calculate_per(val20, val10):
    try:
        new_val = round((((val20-val10)/val10)*100),2)
    except:
        new_val = 0.00
    return new_val

Calculate percent change for council districts

In [14]:
for k,v in comparison_dict.items():
    if k.startswith('TOT'):
        pre = 'TOT'
    elif k.startswith('HSP'):
        pre='HSP'.replace('20','')
    elif k.startswith('HUNT'):
        pre='H'+k.split('_')[1].replace('20','')
    else:
        pre = k.split('_')[0]
    if '_V' in k:
        if k.startswith('H'):
            vap = ''
        else:
            vap = 'V'
    else:
        vap=''
    new_col_name = pre+vap+'_P1020'
    co_dist[new_col_name]= co_dist.apply(lambda x: calculate_per(x[k],x[v]),axis=1)
#Reorganize columns
co_dist_cols = list(co_dist.columns)
co_dist_cols.remove('geometry')
co_dist_cols.append('geometry')
council_districts = co_dist[co_dist_cols]
council_districts

Unnamed: 0,DISTRICT,TOT20,HSP20,WHT_NH20,BLK_NH20,AIA_NH20,ASN_NH20,HPI_NH20,OTH_NH20,2OM_NH20,...,WHTV_P1020,BLKV_P1020,AIAV_P1020,ASNV_P1020,OTHV_P1020,2OMV_P1020,HTOT_P1020,HOCC_P1020,HVAC_P1020,geometry
0,1,76830,24552,41564,2660,70,4423,25,847,2689,...,4.36,16.88,-39.24,42.54,3.12,190.95,9.95,10.43,4.75,"POLYGON ((-71.06406 42.36901, -71.06413 42.369..."
1,2,77466,6640,51638,3700,67,12640,23,425,2333,...,22.93,13.35,-12.31,33.0,123.95,159.14,26.52,23.46,58.64,"POLYGON ((-71.07307 42.35125, -71.07194 42.351..."
2,3,73285,11898,27925,13081,120,12635,21,2223,5382,...,18.18,-14.75,-4.85,32.14,-35.56,129.64,15.01,17.0,-6.34,"POLYGON ((-71.04935 42.33035, -71.04891 42.330..."
3,4,72917,17409,9190,36737,234,2864,15,1466,5002,...,13.71,-6.71,16.77,59.77,0.19,169.28,3.89,7.38,-30.62,"POLYGON ((-71.08390 42.30760, -71.08369 42.307..."
4,5,75436,18191,18543,32865,110,1649,26,729,3323,...,-6.22,7.19,-28.24,42.68,60.69,147.2,4.9,6.74,-22.17,"POLYGON ((-71.08546 42.28105, -71.08543 42.281..."
5,6,76523,12660,46579,7218,80,6156,19,555,3256,...,1.54,3.02,-24.18,57.77,71.43,141.64,7.63,7.01,19.26,"POLYGON ((-71.11275 42.33183, -71.11253 42.331..."
6,7,72147,18703,16551,24866,137,5858,41,1389,4602,...,4.63,-3.63,-39.53,94.28,-15.18,144.2,8.39,7.66,18.2,"POLYGON ((-71.09575 42.33774, -71.09482 42.338..."
7,8,76370,7633,45468,4601,83,14963,37,598,2987,...,-5.28,8.7,-31.31,46.76,171.79,70.63,10.29,8.88,26.22,"POLYGON ((-71.11830 42.35506, -71.11824 42.355..."
8,9,74673,8427,44006,3536,88,14400,44,1025,3147,...,-7.95,9.52,31.58,35.38,-8.45,80.77,7.68,6.25,41.24,"POLYGON ((-71.16989 42.35765, -71.16947 42.358..."


Extract data

In [15]:
council_districts.dropna(axis=1, how='all',inplace=True)
council_districts.to_file('./ma_boston_council_districts_nov_2022_2010_2020_demographic_change.shp')
council_districts.drop(columns='geometry',inplace=True)
council_districts.to_csv('./ma_boston_council_districts_nov_2022_2010_2020_demographic_change.csv',index=False)

In [16]:
boston_data10['COUNCIL']=co_10_assign
co_dist_sub = co_dist[['DISTRICT']]
co_dict = list(co_dist_sub.to_dict().values())[0]
boston_data10['COUNCIL'] = boston_data10['COUNCIL'].apply(lambda x: co_dict.get(x))
boston_data_baf10 = boston_data10[['GEOID','TOT10','COUNCIL']]
has_pop= boston_data_baf10[boston_data_baf10['TOT10']>0]
no_assign_co = has_pop[has_pop['COUNCIL'].isna()]
display(no_assign_co)

Unnamed: 0,GEOID,TOT10,COUNCIL
