In [352]:
# import libraries
from h3 import h3
import geopandas as gpd
import pandas as pd
import numpy as np

In [374]:
from multiprocessing import Pool
from multiprocessing import cpu_count
nCORES = cpu_count()

In [375]:
import time

For boundaries data we will use GADM boundaries from
https://gadm.org/

In [366]:
SHP_PATH = 'data/gadm36_levels_shp/gadm36_2.shp'
gdf = gpd.read_file(SHP_PATH)

In [367]:
# pick the country you want to get the hexagons
COUNTRY = 'GBR'
APERTURE_SIZE = 9
# get only the columns of the unique id and geometry
COL_ID = 'GID_2'
COL_GEOM = 'geometry'
# number of cores to use
CORES = int(nCORES/1)

In [368]:
gdf1 = gdf[gdf.GID_0==COUNTRY]
gdf1 = gdf1[[COL_ID, COL_GEOM]]

In [369]:
def get_hexagons_from_gdf(gdf):
    # convert the multipolygons to polygons by our custom explode function
    polygonOnly_gdf1 = good_explode(gdf)
    polygonOnly_gdf1['hex_list_before'] = polygonOnly_gdf1[COL_GEOM].apply(lambda x : get_hexagons_fromJson(x.__geo_interface__))
    polygonOnly_gdf1_group = polygonOnly_gdf1.groupby(COL_ID)
    # count the sum of hexagons in each unique id
    final_df = pd.DataFrame(polygonOnly_gdf1_group.apply(lambda grp: grp['hex_list_before'].sum()))
    # reset the index and rename the 0 column
    final_df.reset_index(inplace=True)
    final_df.rename(columns={0:'hex_list'},inplace=True)
    # count again the final number of hexagons for each unique id
    final_df['hex_list_length'] = final_df['hex_list'].apply(lambda x : len(x))
    return final_df

In [370]:
gdf1_split = np.array_split(gdf1, CORES)

In [376]:
with Pool(processes=CORES) as pool1:
    start = time.time()
    new_df = pd.concat(pool1.map(get_hexagons_from_gdf, gdf1_split))
    end = round(time.time() - start, 3)

In [379]:
end/60

3.9294333333333333

In [355]:
# get_hexagons_from_gdf(gdf)

step by step the get_hexagons_from_gdf function

In [324]:
gdf1 = gdf[gdf.GID_0==COUNTRY].head(50)
gdf1 = gdf1[[COL_ID, COL_GEOM]]

In [326]:
# convert the multipolygons to polygons by our custom explode function
polygonOnly_gdf1 = good_explode(gdf1)

In [330]:
# get a hexggon list from each polygon by our get_hexagons_fromJson function
#polygonOnly_gdf1['geom_type'] = polygonOnly_gdf1['geometry'].apply(lambda x : x.geom_type)
polygonOnly_gdf1['hex_list_before'] = polygonOnly_gdf1[COL_GEOM].apply(lambda x : get_hexagons_fromJson(x.__geo_interface__))

In [328]:
# check the number of hexagons found before the grouped dataframe
#polygonOnly_gdf1['hex_list_before_length'] = polygonOnly_gdf1['hex_list_before'].apply(lambda x: len(x))

In [329]:
# create a group object by the unique id
polygonOnly_gdf1_group = polygonOnly_gdf1.groupby(COL_ID)

In [333]:
# count the sum of hexagons in each unique id
final_df = pd.DataFrame(polygonOnly_gdf1_group.apply(lambda grp: grp['hex_list_before'].sum()))
# reset the index and rename the 0 column
final_df.reset_index(inplace=True)
final_df.rename(columns={0:'hex_list'},inplace=True)
# count again the final number of hexagons for each unique id
final_df['hex_list_length'] = final_df['hex_list'].apply(lambda x : len(x))

In [343]:
final_df.head()

Unnamed: 0,GID_2,hex_list,hex_list_length
0,GBR.1.10_1,"[891942cdc13ffff, 891942cd043ffff, 891942c82c7...",3955
1,GBR.1.11_1,"[89194a72067ffff, 89194a09ba7ffff, 89194a08b6b...",760
2,GBR.1.12_1,"[89195876437ffff, 89195876527ffff, 8919587618b...",1157
3,GBR.1.13_1,"[89195d300abffff, 89195d1acbbffff, 89195d32eab...",16088
4,GBR.1.14_1,"[8919424d26fffff, 89194248b5bffff, 8919424d363...",1128


In [259]:
def good_explode(self):
        """
        Explode muti-part geometries into multiple single geometries.
        Each row containing a multi-part geometry will be split into
        multiple rows with single geometries, thereby increasing the vertical
        size of the GeoDataFrame.
        The index of the input geodataframe is no longer unique and is
        replaced with a multi-index (original index with additional level
        indicating the multiple geometries: a new zero-based index for each
        single part geometry per multi-part geometry).
        Returns
        -------
        GeoDataFrame
            Exploded geodataframe with each single geometry
            as a separate entry in the geodataframe.
        """
        df_copy = self.copy()

        exploded_geom = df_copy.geometry.explode().reset_index(level=-1)
#         exploded_index = exploded_geom.columns[0]

        df = pd.concat(
            [df_copy.drop(df_copy._geometry_column_name, axis=1),
             exploded_geom], axis=1)
        # reset to MultiIndex, otherwise df index is only first level of
        # exploded GeoSeries index.
#         df.set_index(exploded_index, append=True, inplace=True)
#         df.index.names = list(self.index.names) + [None]
        geo_df = df.set_geometry(self._geometry_column_name)
        
        return geo_df.drop(columns=['level_1']).reset_index(drop=True)

In [97]:
# lol.geometry.__geo_interface__['features'][0]['geometry']

In [342]:
def get_hexagons_fromJson(boundary_geoJson, aperture=APERTURE_SIZE):
    # https://github.com/uber/h3-py/blob/master/h3/h3.py
    hexagons = list(h3.polyfill(boundary_geoJson, aperture,geo_json_conformant=True))
    return hexagons