In [1]:
import pandas as pd
import geopandas as gpd

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

We're trying to make a searchable map with one static layer and 3 toggle layers. 

Static layer = air quality monitors (both regulatory and PurpleAir), styled by 3-yr avg. PM2.5 reading

Toggle layers:
- life expectancy by tract
- asthma by tract
- POC or poverty by tract

Steps:
- pull 2020 and 2010 tracts shapefile
- restrict to just Houston metro
- pull asthma data from [CDC PLACES](https://data.cdc.gov/500-Cities-Places/PLACES-Local-Data-for-Better-Health-Census-Tract-D/cwsq-ngmh/about_data)
- pull most recent POC/poverty from Census API
- pull life expectancy [from CDC](https://data.cdc.gov/NCHS/U-S-Life-Expectancy-at-Birth-by-State-and-Census-T/5h56-n989/about_data) - note, this uses the 2010 tracts

In [2]:
########################################
#load geos
########################################

houmetro_cntys = {'Fort Bend, TX':'48157','San Jacinto, TX':'48407','Waller, TX':'48473',
                'Galveston, TX':'48167','Harris, TX':'48201','Liberty, TX':'48291',
                'Chambers, TX':'48071','Brazoria, TX':'48039','Austin, TX':'48015',
                'Montgomery, TX':'48339'}
houmetro_cntys_swap = {v: k for k, v in houmetro_cntys.items()}
houmetro_fips_ints = [ int(x) for x in list(houmetro_cntys.values()) ]
houmetro_fips_strs = [ str(x) for x in list(houmetro_cntys.values()) ]
houmetro_nms = [ str(x) for x in list(houmetro_cntys.keys()) ]


tracts10_shp = gpd.read_file('https://www2.census.gov/geo/tiger/TIGER2010/TRACT/2010/tl_2010_48_tract10.zip')
tracts10_shp['cnty_geoid'] = tracts10_shp['STATEFP10'].astype(str) + tracts10_shp['COUNTYFP10'].astype(str)
tracts10_shp['cnty_nm'] = tracts10_shp['cnty_geoid'].map(houmetro_cntys_swap)
tracts10_shp.rename(columns={'GEOID10':'tract_geoid'},inplace=True)
houtracts10_shp = tracts10_shp.loc[tracts10_shp['cnty_geoid'].isin(houmetro_fips_strs)]
houtracts10_shp = houtracts10_shp[['cnty_geoid','cnty_nm','tract_geoid','geometry']]

tracts20_shp = gpd.read_file('https://www2.census.gov/geo/tiger/TIGER2024/TRACT/tl_2024_48_tract.zip')
tracts20_shp['cnty_geoid'] = tracts20_shp['STATEFP'].astype(str) + tracts20_shp['COUNTYFP'].astype(str)
tracts20_shp['cnty_nm'] = tracts20_shp['cnty_geoid'].map(houmetro_cntys_swap)
tracts20_shp.rename(columns={'GEOID':'tract_geoid'},inplace=True)
houtracts20_shp = tracts20_shp.loc[tracts20_shp['cnty_geoid'].isin(houmetro_fips_strs)]
houtracts20_shp = houtracts20_shp[['cnty_geoid','cnty_nm','tract_geoid','geometry']]

In [None]:
print(len(houtracts10_shp))
print(houtracts10_shp.dtypes)
display(houtracts10_shp.head(1))

print(len(houtracts20_shp))
print(houtracts20_shp.dtypes)
display(houtracts20_shp.head(1))

In [None]:
########################################
# load CDC asthma data
########################################

#load national data
cdc = pd.read_csv('../data/CDC/PLACES__Local_Data_for_Better_Health__Census_Tract_Data_2024_release_20250103.csv')

#restrict to just hou metro counties
hou_cdc = cdc.loc[cdc['CountyFIPS'].isin(houmetro_fips_ints)]

#delete cdc cause it's huge and we don't need it anymore
del cdc

#restrict to just asthma
hou_asthma = hou_cdc.loc[hou_cdc['MeasureId'] == 'CASTHMA']

#create correct type on tract_geoid
hou_asthma['tract_geoid'] = hou_asthma['LocationName'].astype(str)

#too many columns, reduce and rename
rename_cols = {'Data_Value':'dv','TotalPopulation':'p','TotalPop18plus':'py',
               'tract_geoid':'tract_geoid'}
hou_asthma.rename(columns=rename_cols,inplace=True)
hou_asthma = hou_asthma[list(rename_cols.values())]

#connect to 2020 tracts - GEOID
hou_asthma_geo = houtracts20_shp.merge(hou_asthma,on='tract_geoid',how='left')

#save
hou_asthma_geo.to_file('../GIS/for-map/cdc-asthma-houmetro.geojson')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hou_asthma['tract_geoid'] = hou_asthma['LocationName'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hou_asthma.rename(columns=rename_cols,inplace=True)


In [5]:
print(len(hou_asthma_geo))
print(hou_asthma_geo.columns)
display(hou_asthma_geo.head())

1615
Index(['cnty_geoid', 'cnty_nm', 'tract_geoid', 'geometry', 'dv', 'p', 'py'], dtype='object')


Unnamed: 0,cnty_geoid,cnty_nm,tract_geoid,geometry,dv,p,py
0,48157,"Fort Bend, TX",48157674100,"POLYGON ((-95.61467 29.57828, -95.61339 29.578...",8.3,6732.0,5189.0
1,48157,"Fort Bend, TX",48157674200,"POLYGON ((-95.63989 29.58625, -95.63974 29.586...",7.1,5202.0,4092.0
2,48167,"Galveston, TX",48167720501,"POLYGON ((-95.1662 29.49059, -95.16474 29.4924...",9.4,4239.0,3153.0
3,48167,"Galveston, TX",48167722002,"POLYGON ((-94.93724 29.39704, -94.93724 29.397...",10.2,3487.0,2701.0
4,48167,"Galveston, TX",48167720301,"POLYGON ((-95.21457 29.52567, -95.21389 29.526...",9.0,2455.0,1932.0


In [7]:
print(len(hou_asthma_geo.loc[hou_asthma_geo['dv'].isna()]))
display(hou_asthma_geo.loc[hou_asthma_geo['dv'].isna()])

10


Unnamed: 0,cnty_geoid,cnty_nm,tract_geoid,geometry,dv,p,py
182,48201,"Harris, TX",48201980000,"POLYGON ((-95.28884 29.63928, -95.28882 29.639...",,,
750,48201,"Harris, TX",48201343601,"POLYGON ((-95.15337 29.71258, -95.15335 29.712...",,,
752,48201,"Harris, TX",48201340201,"POLYGON ((-95.1919 29.61617, -95.19147 29.6164...",,,
753,48201,"Harris, TX",48201324102,"POLYGON ((-95.22412 29.72378, -95.22404 29.724...",,,
846,48167,"Galveston, TX",48167990100,"POLYGON ((-94.9554 29.42442, -94.95533 29.4248...",,,
1312,48167,"Galveston, TX",48167990000,"POLYGON ((-95.12397 29.08029, -95.12068 29.083...",,,
1447,48071,"Chambers, TX",48071990000,"POLYGON ((-94.37078 29.55388, -94.37072 29.553...",,,
1450,48039,"Brazoria, TX",48039990000,"POLYGON ((-95.50585 28.82447, -95.49398 28.830...",,,
1478,48201,"Harris, TX",48201980100,"POLYGON ((-95.37486 30.01104, -95.37474 30.011...",,,
1491,48071,"Chambers, TX",48071710600,"POLYGON ((-95.01811 29.55485, -95.01783 29.555...",,,


In [26]:
########################################
# load CDC life expectancy data
########################################

#load national data
life_ex_us = pd.read_csv('../data/CDC/U.S._Life_Expectancy_at_Birth_by_State_and_Census_Tract_-_2010-2015.csv')

#restrict to just hou metro counties
life_ex_us['County'] = life_ex_us['County'].str.replace(' County','')
hou_life = life_ex_us.loc[(life_ex_us['State'] == 'Texas')&(life_ex_us['County'].isin(houmetro_nms))]

#delete us data cause we don't need it
del life_ex_us

#get rid of values that don't have tract numbers
hou_life = hou_life.loc[~hou_life['Census Tract Number'].isna()]

#creately format tract_geoid
hou_life['tract_fips'] = hou_life['Census Tract Number'].apply(lambda x: f'{x:.2f}')
hou_life['tract_fips'] = hou_life['tract_fips'].astype(str).str.replace('.','')
hou_life['cnty_fips'] = hou_life['County'].map(houmetro_cntys)
hou_life['tract_geoid'] = hou_life['cnty_fips'] + hou_life['tract_fips']

#too many columns, reduce and rename
rename_cols = {'Life Expectancy':'ex','Life Expectancy Range':'exr', 
               'Life Expectancy Standard Error':'exe','tract_geoid':'tract_geoid'}
hou_life.rename(columns=rename_cols,inplace=True)
hou_life = hou_life[list(rename_cols.values())]

#connect to 2010 tracts - GEOID
hou_life_geo = houtracts10_shp.merge(hou_life,on='tract_geoid')

#save
hou_life_geo.to_file('../GIS/for-map/cdc-lifeexpectancy-houmetro.geojson')

In [27]:
#just making sure they all join
print('hou_life:',len(hou_life))
print('hou_life_geo:',len(hou_life_geo))
display(hou_life.loc[~hou_life['tract_geoid'].isin(list(hou_life_geo['tract_geoid'].unique()))].head())

hou_life: 1073
hou_life_geo: 1073


Unnamed: 0,State,County,Census Tract Number,Life Expectancy,Life Expectancy Range,Life Expectancy Standard Error,tract_fips,cnty_fips,tract_geoid


In [29]:
hou_life.columns

Index(['State', 'County', 'Census Tract Number', 'Life Expectancy',
       'Life Expectancy Range', 'Life Expectancy Standard Error', 'tract_fips',
       'cnty_fips', 'tract_geoid'],
      dtype='object')