In [None]:
#default_exp identify_rural_tracts

In [None]:
# Explore rural census tracts in metro counties (CBSAs) and urban areas (UAs).

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon

In [None]:
rural_tracts = [line.rstrip("\n") for line in open("data/rural_census_tracts.lis")]
len(rural_tracts)

2302

In [None]:
# Combine the state-level Census shapefiles. VERY compact code.
from pathlib import Path
pathlist = Path("/InfoGroup/rural/map_files/tracts/").glob("*.shp")

gdf = pd.concat([gpd.read_file(shp) for shp in pathlist]).pipe(gpd.GeoDataFrame)

In [None]:
gdf.to_file("/InfoGroup/rural/map_files/tracts/tl_2017_USA_tract.shp",driver='ESRI Shapefile')

In [None]:
# Compute the centroid of the polygon
gdf['centroid'] = gdf['geometry'].apply(lambda x: Point(x.centroid)) # shapely methods

In [None]:
gdf.columns

In [None]:
gdf.head()

In [None]:
# Now find out if the centroid is inside a UA. UAs do not respect political boundaries so a single
# census tract could be in multiple UAs -- although census tracts are much smaller than UAs and 
# UAs are discontinuous in space, so there must be very few tracts that overlap multiple UAs -- 
# and a single census tract could be fractionally rural or urban. 

# If we don't want to be concerned with fractions of census tracts, dividing up the continuous 
# measures by the same fraction, it is reasonable to say that a 'Census-urban' census tract is 
# one whose centroid falls within a particular UA and to call that UA the one to which it belongs.

# We know that all census tracts are wholly within or wholly outside a CBSA because they do not 
# cross county boundaries. So 'OMB-rural' census tracts are in 'OMB-rural' counties.

# "Each census tract contains at least one BG [ed.,block group], and BGs are uniquely numbered 
# within the census tract. Within the standard census geographic hierarchy, BGs never cross state,
# county, or census tract boundaries but may cross the boundaries of any other geographic entity."

In [None]:
# File that contains polygon geometry of UAs for 2017
ua_gdf = gpd.read_file('map_files/tl_2017_us_uac10.shp')
ua_gdf.columns

Index(['UACE10', 'GEOID10', 'NAME10', 'NAMELSAD10', 'LSAD10', 'MTFCC10',
       'UATYP10', 'FUNCSTAT10', 'ALAND10', 'AWATER10', 'INTPTLAT10',
       'INTPTLON10', 'geometry'],
      dtype='object')

In [None]:
ua_gdf.head()

Unnamed: 0,UACE10,GEOID10,NAME10,NAMELSAD10,LSAD10,MTFCC10,UATYP10,FUNCSTAT10,ALAND10,AWATER10,INTPTLAT10,INTPTLON10,geometry
0,24310,24310,"Dixon, IL","Dixon, IL Urban Cluster",76,G3500,C,S,25525003,938058,41.8529507,-89.4817439,"POLYGON ((-89.498589 41.854668, -89.498538 41...."
1,27847,27847,"Escanaba, MI","Escanaba, MI Urban Cluster",76,G3500,C,S,46648248,283456,45.8704839,-87.0638396,"(POLYGON ((-87.063103 45.866083, -87.062210999..."
2,18100,18100,"Clintonville, WI","Clintonville, WI Urban Cluster",76,G3500,C,S,5854683,502563,44.6232203,-88.7611283,"POLYGON ((-88.78650499999999 44.629957, -88.78..."
3,6166,6166,"Bedford, IN","Bedford, IN Urban Cluster",76,G3500,C,S,30402519,2314,38.856653,-86.5012383,"(POLYGON ((-86.518316 38.79547, -86.518253 38...."
4,75270,75270,"Riverdale, CA","Riverdale, CA Urban Cluster",76,G3500,C,S,2306823,0,36.431071,-119.8620544,"POLYGON ((-119.869132 36.430832, -119.870931 3..."


In [None]:
print(len(gdf))
print(len(ua_gdf))

In [None]:
%%time
# Count the overlap of tracts in UAs 
overlap_dict = dict.fromkeys(ua_gdf['GEOID10'].tolist(),0)
for i,ua in ua_gdf[['GEOID10','geometry']].iterrows():
    for j,ct in gdf['centroid'].iteritems():
        if ua['geometry'].contains(ct):
                overlap_dict[ua['GEOID10']] += 1
                break

In [None]:
cdf = pd.DataFrame.from_dict(overlap_dict,orient='index',columns=['count'])

In [None]:
# This shows that no UA contains the centroid of more than one census tract.
cdf['count'].value_counts()

In [None]:
# -----------------------

In [None]:
# InfoGroup for 2017 has data on 82,385 census tracts for all. The HRSA/FORHP file 
# of rural units lists 2,302 rural census tracts in addition to all those in non-Metro counties. The Census's 
# Zip Code-to-Census Tracts relationship file identifies 74,091 census tracts in all states(below).

In [None]:
# The ID of a census tract is the combination of the state FIPS, county FIPS, and tract number.
infile = '/home/tflory/Relationship_Files/Census_Tract_to_PUMA.csv'
ct_df = pd.read_csv(infile,usecols=['STATEFP','COUNTYFP','TRACTCE']).drop_duplicates()

In [None]:
len(ct_df['STATEFP'] <= 56)