This notebook adds two census tables (censusgeo & censush3map) to the postgres database. Censusgeo has 2 columns: censusblockgroupid, geometry. Geometry column stores the multipolygon associated with the censusblockgroupid. 
censush3map has two columns censusblockgroupid, h3id. H3id column stores the h3id associated with a censusblockgroupid. 
Note: one censusblockgroupid will map one multipolygon but to multiple h3id.

In [1]:
import h3
import shapely
import psycopg2
import pandas as pd
import numpy as np
import sqlalchemy
import geopandas as gpd
import geoalchemy2

In [2]:
h3_res = 9

In [3]:
cbg_gdf = gpd.read_file("cbg.geojson")
atl_shp_file = gpd.read_file("Demographic_Population_NSA_ACS2019.shp")

In [4]:
cbg_gdf["CensusBlockGroup"] = cbg_gdf["CensusBlockGroup"].astype("int64")
cbg_gdf["CensusBlockGroup"] = cbg_gdf["CensusBlockGroup"].astype(str)

In [5]:
cbg_gdf.rename(columns = {"CensusBlockGroup": "censusblockgroupid"}, inplace = True)

In [6]:
atl_polygons = list(atl_shp_file.values.squeeze())
atl_multipolygon = shapely.geometry.MultiPolygon(atl_polygons)

  shell = ob[0]
  holes = ob[1]


In [7]:
atl_filter = cbg_gdf.loc[:,"geometry"].intersects(atl_multipolygon)

In [8]:
atl_filter_extended = cbg_gdf.loc[:,"geometry"].intersects(\
                        shapely.geometry.Polygon(shapely.geometry.box(*[-84.617192, 33.521005, -84.126103, 34.010137])))

In [9]:
conn_string = 'postgresql://junaid:junaid6242@localhost/main'
#conn_string = 'postgresql://junaid:junaid6242@localhost:5901/main'

In [10]:
db = sqlalchemy.create_engine(conn_string)

In [11]:
conn = db.connect()

In [12]:
cbg_gdf_atl = cbg_gdf.loc[atl_filter_extended]
cbg_gdf_atl

Unnamed: 0,StateFIPS,CountyFIPS,TractCode,BlockGroup,censusblockgroupid,State,County,MTFCC,geometry
52092,13,089,021202,2,130890212022,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.33827 33.91816, -84.33799 ..."
52093,13,089,021204,1,130890212041,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.29465 33.89111, -84.29463 ..."
52094,13,089,021208,2,130890212082,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.31091 33.91140, -84.31067 ..."
52123,13,089,021908,1,130890219081,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.20619 33.81168, -84.20394 ..."
52126,13,089,021908,2,130890219082,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.19568 33.78847, -84.19564 ..."
...,...,...,...,...,...,...,...,...,...
57520,13,063,040303,4,130630403034,GA,Clayton County,G5030,"MULTIPOLYGON (((-84.35379 33.64781, -84.35375 ..."
57530,13,089,021414,1,130890214141,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.32116 33.84971, -84.32098 ..."
57531,13,089,021204,2,130890212042,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.29966 33.89275, -84.29943 ..."
57532,13,089,021102,3,130890211023,GA,DeKalb County,G5030,"MULTIPOLYGON (((-84.33852 33.87542, -84.33847 ..."


In [13]:
cbg_gdf_atl[["censusblockgroupid","geometry"]].to_postgis("censusgeo", con=conn, if_exists='replace', index=False)

In [14]:
cbg_id = []
h3_id = []

for cbg, multipoly in cbg_gdf_atl[["censusblockgroupid","geometry"]].values:
    for poly in multipoly.geoms:
        temp_id = list(h3.polyfill_geojson(shapely.geometry.mapping(poly), res=h3_res))
        h3_id = h3_id + temp_id
        cbg_id = cbg_id + [cbg]*len(temp_id)

In [15]:
censush3map_df = pd.DataFrame(list(zip(cbg_id, h3_id)), columns =['censusblockgroupid', 'h3id'])
censush3map_df

Unnamed: 0,censusblockgroupid,h3id
0,130890212022,8944c130e43ffff
1,130890212022,8944c131d8fffff
2,130890212022,8944c130367ffff
3,130890212022,8944c130ecbffff
4,130890212022,8944c130e57ffff
...,...,...
29286,130890211023,8944c130083ffff
29287,130890211023,8944c130087ffff
29288,130890213082,8944c131567ffff
29289,130890213082,8944c13156fffff


In [16]:
censush3map_df.to_sql('censush3map', con=conn, if_exists='replace', index=False)

291

In [17]:
conn.close()