# Graph of US counties and census tracts

This notebook creates a graph based on US counties and census tract administrative units.

In order to create the graph, we use [the Tiger shapefiles](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.2020.html#list-tab-790442341).

In [33]:
import geopandas as gpd
import pandas as pd
import networkx as nx

In [34]:
def create_graph(tracts, ix="GEOID", lon="INTPTLON", lat="INTPTLAT"):
    geom = tracts.set_index(ix)

    L = tracts[[ix, lat, lon]].set_index(ix)
    L[lat] = L[lat].astype("float")
    L[lon] = L[lon].astype("float")

    feats = [c for c in L.columns if c != "geometry"]

    g = nx.Graph()
    g.add_nodes_from(geom.index.values)

    # make edge list from GeoPandas DataFrame
    edge_list = []
    for index, row in geom.iterrows():
        for f in feats:
            g.nodes[index][f] = L.loc[index][f]
        
        nbrs = geom[geom.geometry.touches(row.geometry)].index.values
        for nbr in nbrs:
            edge_list.append((index, nbr))
    g.add_edges_from(edge_list)
    return g

## CA census tract graph

In [48]:
g = create_graph(gpd.read_file("shapefiles/tl_2020_06_tract/tl_2020_06_tract.shp"))
nx.write_graphml(g, "../data/ca_tracts.graphml")

TypeError: (<class 'geopandas.geoseries.GeoSeries'>, <class 'NoneType'>)

## TX census tract graph

In [30]:
g = create_graph(gpd.read_file("shapefiles/tl_2020_48_tract/tl_2020_48_tract.shp"))
nx.write_graphml(g, "../data/tx_tracts.graphml")

## Counties

In [44]:
counties = gpd.read_file("shapefiles/tl_2010_us_county10/tl_2010_us_county10.shp")
counties = counties[~counties.STATEFP10.isin(["02", "72", "78", "15"])]

In [46]:
g = create_graph(counties, ix="GEOID10", lon="INTPTLON10", lat="INTPTLAT10")
nx.write_graphml(g, "../data/counties.graphml")

TypeError: (<class 'geopandas.geoseries.GeoSeries'>, <class 'NoneType'>)

In [66]:
counties = nx.read_graphml("../data/counties.graphml")
print(len(counties))


3109


In [59]:
gdf = gpd.read_file("shapefiles/tl_2020_06_tract/tl_2020_06_tract.shp")
len(gdf)

9129

In [68]:
gdf = gpd.read_file("../data_collections/air_pollution_mortality_us/geojson.geojson")
ca_tract = gdf[["GEOID", "INTPTLAT", "INTPTLON", "geometry"]]

KeyError: "['GEOID', 'INTPTLAT', 'INTPTLON'] not in index"

In [69]:
gdf

Unnamed: 0,GEOID10,INTPTLAT10,INTPTLON10,geometry
0,28107,34.365205,-89.963065,"POLYGON ((-90.13476 34.22603, -90.13476 34.226..."
1,28101,32.401970,-89.118411,"POLYGON ((-89.13497 32.57697, -89.13466 32.576..."
2,28027,34.228670,-90.603165,"POLYGON ((-90.59062 33.9869, -90.59473 33.9869..."
3,22065,32.365824,-91.240729,"MULTIPOLYGON (((-91.03511 32.12035, -91.03621 ..."
4,51540,38.037658,-78.485381,"POLYGON ((-78.47071 38.04872, -78.47033 38.048..."
...,...,...,...,...
3104,37077,36.299884,-78.657634,"POLYGON ((-78.80329 36.17716, -78.80324 36.179..."
3105,37111,35.682232,-82.048029,"POLYGON ((-81.86107 35.69329, -81.86077 35.692..."
3106,27021,46.951427,-94.333773,"POLYGON ((-94.65939 46.8907, -94.65936 46.8920..."
3107,27057,47.095551,-94.913290,"POLYGON ((-95.16917 47.15252, -95.16917 47.155..."


In [58]:
graph = nx.read_graphml("../data_collections/climate_exposure_ca/graph.graphml")
print(len(graph.nodes()))


9129
