# Filter U.S. tracts for density

In [1]:
import pandas as pd
import geopandas as gpd
import altair as alt
import matplotlib as mpl
import numpy as np
import jenkspy
import altair_latimes as lat
pd.options.display.max_columns = 50
alt.themes.register('latimes', lat.theme)
alt.themes.enable('latimes')
pd.set_option("display.max_columns",200)
pd.options.display.float_format = '{:,.0f}'.format

In [2]:
src = gpd.read_file('input/cb_2019_us_tract.geojson')

In [3]:
src.head()

Unnamed: 0,STATEFP,COUNTYFP,TRACTCE,AFFGEOID,GEOID,NAME,LSAD,ALAND,AWATER,geometry
0,34,5,702808,1400000US34005702808,34005702808,7028.08,CT,1510553,0,"MULTIPOLYGON (((-8338388.128 4871756.092, -833..."
1,39,153,504200,1400000US39153504200,39153504200,5042.0,CT,808151,0,"MULTIPOLYGON (((-9074545.590 5022068.387, -907..."
2,36,71,502,1400000US36071000502,36071000502,5.02,CT,967431,969216,"MULTIPOLYGON (((-8240069.529 5085895.360, -823..."
3,6,13,370000,1400000US06013370000,6013370000,3700.0,CT,999356,0,"MULTIPOLYGON (((-13617311.891 4572245.466, -13..."
4,48,113,19204,1400000US48113019204,48113019204,192.04,CT,2851229,9966,"MULTIPOLYGON (((-10772274.469 3888781.275, -10..."


In [4]:
src.columns = src.columns.str.strip().str.lower().str.replace(' ','_')\
    .str.replace('(', '').str.replace(')', '').str.replace('/','_')

### Bring in population data

In [5]:
tracts_src = pd.read_csv('/Users/mhustiles/data/github/census-data-downloader/\
processed/acs5_2018_population_tracts.csv', dtype={'geoid':'str'})

In [6]:
counties_src = pd.read_csv('/Users/mhustiles/data/github/\
census-data-downloader/processed/acs5_2018_population_counties.csv', dtype={'geoid':'str'})

### Tracts: Merge population and geography

In [7]:
tracts_geo = pd.merge(tracts_src, src, on='geoid')

In [8]:
tracts_geo.rename(columns={'name_x':'placename','universe':'population'}, inplace=True)

In [9]:
tracts_geo = gpd.GeoDataFrame(tracts_geo.drop(['name_y', 'lsad', 'awater', 'universe_moe_annotation',\
           'universe_annotation','universe_moe', 'state', 'county', 'tract', 'affgeoid'], axis=1))

### Calculate area

In [10]:
tracts_geo['geometry'] = tracts_geo['geometry']

In [11]:
tracts_geo['area'] = tracts_geo['geometry'].area / 2.59e+6

### Create a county FIPS field for merges

In [12]:
tracts_geo['ctyfips'] = tracts_geo['statefp'] + tracts_geo['countyfp']

### Counties: Merge population and geography

In [13]:
tracts_cty_geo = pd.merge(counties_src, tracts_geo, right_on='ctyfips', left_on='geoid')

### Split out the county and tract names

In [14]:
tracts_cty_geo[['tract','county','state']] = tracts_cty_geo['placename'].str.split(',',expand=True)

### Clean up

In [15]:
tracts_cty_geo = gpd.GeoDataFrame(tracts_cty_geo.drop(['geoid_x', 'universe_annotation', 'universe_moe',\
            'universe_moe_annotation','universe_moe','universe_moe_annotation', \
            'state', 'tractce', 'statefp', 'countyfp', 'aland'], axis=1))

In [16]:
tracts_cty_geo.rename(columns={'geoid_y': 'geoid', 'universe':'county_pop',\
                              'population':'tract_pop'}, inplace=True)

In [17]:
tracts_cty_geo.head()

Unnamed: 0,name,county_pop,county,geoid,placename,tract_pop,geometry,area,ctyfips,tract
0,"Autauga County, Alabama",55200,Autauga County,1001020801,"Census Tract 208.01, Autauga County, Alabama",2826,"MULTIPOLYGON (((-9637312.704 3828061.099, -963...",73,1001,Census Tract 208.01
1,"Autauga County, Alabama",55200,Autauga County,1001020900,"Census Tract 209, Autauga County, Alabama",6401,"MULTIPOLYGON (((-9643183.360 3856395.489, -961...",159,1001,Census Tract 209
2,"Autauga County, Alabama",55200,Autauga County,1001020500,"Census Tract 205, Autauga County, Alabama",9883,"MULTIPOLYGON (((-9622010.950 3826512.432, -961...",7,1001,Census Tract 205
3,"Autauga County, Alabama",55200,Autauga County,1001020400,"Census Tract 204, Autauga County, Alabama",3831,"MULTIPOLYGON (((-9622010.950 3826512.432, -962...",3,1001,Census Tract 204
4,"Autauga County, Alabama",55200,Autauga County,1001020300,"Census Tract 203, Autauga County, Alabama",3476,"MULTIPOLYGON (((-9625496.919 3827698.823, -962...",3,1001,Census Tract 203


### Filter by density

Parts of a county with a population density lower than 10 people per square mile are not shaded.

In [18]:
tracts_geo['density'] = tracts_geo['population'] / tracts_geo['area']

In [19]:
tracts_geo_dense = tracts_geo[tracts_geo['density'] > 10]

### What's left?

In [20]:
len(tracts_geo_dense)

68568

In [21]:
len(tracts_geo)

71996

In [22]:
tracts_geo_dense.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x126f1d390>

### Just Cali tracts

In [23]:
ca_geo_dense = gpd.GeoDataFrame(tracts_geo_dense[tracts_geo_dense['statefp'] == '06' ])

In [24]:
ca_geo_dense.to_file('output/ca_geo_dense.json', driver='GeoJSON')

In [25]:
ca_geo_dense.head()

Unnamed: 0,geoid,placename,population,statefp,countyfp,tractce,aland,geometry,area,ctyfips,density
3546,6059052511,"Census Tract 525.11, Orange County, California",6295,6,59,52511,1841535,"MULTIPOLYGON (((-13114569.471 3986202.238, -13...",1,6059,5463
3547,6037127520,"Census Tract 1275.20, Los Angeles County, Cali...",4544,6,37,127520,363717,"MULTIPOLYGON (((-13188362.382 4058567.905, -13...",0,6037,22923
3548,6107001400,"Census Tract 14, Tulare County, California",5202,6,107,1400,124522008,"MULTIPOLYGON (((-13272032.337 4345631.099, -13...",76,6107,69
3549,6067008113,"Census Tract 81.13, Sacramento County, California",3626,6,67,8113,1615143,"MULTIPOLYGON (((-13504150.842 4679841.097, -13...",1,6067,3450
3550,6071000824,"Census Tract 8.24, San Bernardino County, Cali...",5401,6,71,824,1442087,"MULTIPOLYGON (((-13098989.751 4043174.543, -13...",1,6071,5922
