This notebook adds two census tables (censusgeo & censush3map) to the postgres database. Censusgeo has 2 columns: censusblockgroupid, geometry. Geometry column stores the multipolygon associated with the censusblockgroupid. censush3map has two columns censusblockgroupid, h3id. H3id column stores the h3id associated with a censusblockgroupid. Note: one censusblockgroupid will map one multipolygon but to multiple h3id.

In [1]:
import h3
import shapely
import psycopg2
import pandas as pd
import numpy as np
import sqlalchemy
import geopandas as gpd
import geoalchemy2

## connecting to database

In [2]:
conn_string = 'postgresql://junaid:junaid6242@localhost/main'
#conn_string = 'postgresql://junaid:junaid6242@localhost:5901/main'
db = sqlalchemy.create_engine(conn_string)
conn = db.connect()
metadata = sqlalchemy.MetaData()

In [3]:
cities_table = sqlalchemy.Table('cities', metadata, autoload=True, autoload_with=db)
#Equivalent to 'SELECT * FROM cities'
query = sqlalchemy.select([cities_table]) 
cities = conn.execute(query).fetchall()
cities

[(1, 'Atlanta', [-84.617195, 33.521004, -84.12611, 34.010136]),
 (2, 'New York', [-74.25576, 40.49584, -73.6996, 40.915165]),
 (3, 'Los Angeles', [-118.66444, 33.704742, -117.6829, 34.328]),
 (4, 'Chicago', [-87.9465, 41.64355, -87.52186, 42.0841]),
 (5, 'Dallas', [-97.09041, 32.557907, -96.55646, 33.125973])]

## censusgeo table

In [4]:
cbg_gdf = gpd.read_file("cbg.geojson")

In [5]:
cbg_gdf["CensusBlockGroup"] = cbg_gdf["CensusBlockGroup"].astype("int64")
cbg_gdf["CensusBlockGroup"] = cbg_gdf["CensusBlockGroup"].astype(str)
cbg_gdf.rename(columns = {"CensusBlockGroup": "censusblockgroupid"}, inplace = True)

In [6]:
all_cities_filter = pd.Series([False]*len(cbg_gdf))

for _, city, city_bb in cities:
    city_filter = cbg_gdf.loc[:,"geometry"].intersects(shapely.geometry.Polygon(shapely.geometry.box(*city_bb)))
    all_cities_filter = all_cities_filter | city_filter   


In [7]:
censusgeo_df = cbg_gdf[all_cities_filter][["censusblockgroupid","geometry"]]
#writing to database
censusgeo_df.to_postgis("censusgeo", con=conn, if_exists='replace', index=False)
censusgeo_df

Unnamed: 0,censusblockgroupid,geometry
10302,60373104003,"MULTIPOLYGON (((-118.35037 34.20667, -118.3499..."
10306,60590865023,"MULTIPOLYGON (((-117.91372 33.84724, -117.9135..."
10307,60376510024,"MULTIPOLYGON (((-118.32800 33.80984, -118.3277..."
10308,60376511011,"MULTIPOLYGON (((-118.34007 33.82307, -118.3400..."
10309,60590994023,"MULTIPOLYGON (((-118.00659 33.71548, -118.0052..."
...,...,...
198893,481130160014,"MULTIPOLYGON (((-97.00906 32.73005, -97.00906 ..."
198894,481130154032,"MULTIPOLYGON (((-97.02849 32.75343, -97.02845 ..."
198908,481130141261,"MULTIPOLYGON (((-97.00669 32.95643, -97.00668 ..."
198913,481130153051,"MULTIPOLYGON (((-96.98171 32.81563, -96.97645 ..."


## censush3map table

In [8]:
h3_res = 9
cbg_id = []
h3_id = []

for cbg, multipoly in censusgeo_df[["censusblockgroupid","geometry"]].values:
    for poly in multipoly.geoms:
        temp_id = list(h3.polyfill_geojson(shapely.geometry.mapping(poly), res=h3_res))
        h3_id = h3_id + temp_id
        cbg_id = cbg_id + [cbg]*len(temp_id)

In [9]:
censush3map_df = pd.DataFrame(list(zip(cbg_id, h3_id)), columns =['censusblockgroupid', 'h3id'])
#writing to database
censush3map_df.to_sql('censush3map', con=conn, if_exists='replace', index=False)
censush3map_df

Unnamed: 0,censusblockgroupid,h3id
0,60373104003,8929a1892cfffff
1,60373104003,8929a189257ffff
2,60373104003,8929a18920bffff
3,60373104003,8929a18921bffff
4,60373104003,8929a189247ffff
...,...,...
179852,481130153051,8926c86ea73ffff
179853,481130153051,8926c86cc9bffff
179854,481130143113,8926c86e57bffff
179855,481130143113,8926c86e563ffff


## closing the connection to database

In [10]:
conn.close()