In [2]:
%load_ext watermark
%watermark

ModuleNotFoundError: No module named 'watermark'

In [1]:
import sqlite3
from os.path import join
import numpy as np
import rasterio
from math import floor, ceil
import numpy as np
import pandas as pd
from rtree import Index
import geopandas as gpd
from geopandas.tools import sjoin
import shapely.wkb
from shapely.geometry import Point, Polygon
from shapely import speedups
from aequilibrae import Project
#from folders import lyr_fldr, model_fldr, model_file, population_sqlite, districts_file, hexbins_file, provinces_file
from time import perf_counter
# shapely.speedups.disable()

from functions.wkb_to_gdf import wkb_to_gdf
from functions.hex_builder import hex_builder

In [2]:
model_fldr = r'/home/jovyan/workspace/road_analytics/Andorra'

In [3]:
project = Project()
project.open(model_fldr)

## Let's get rid of all islands

In [5]:
mainland = wkb_to_gdf(project)

In [6]:
mlnd = mainland.to_crs('epsg:3405')
minx, miny, maxx, maxy = mlnd.unary_union.bounds

# Create Hex Bins

In [8]:
%%time
# size_parameter was determined empirically to not allow any hex bin with more than 10k people and be sufficiently small to allow for clusterings per district
size_parameter = 200
hexb = hex_builder(maxx, minx, maxy, miny, size_parameter, epsg=3405, coverage_area=mlnd)
# hexb = hex_builder(maxx, minx, maxy, miny, size_parameter, epsg=4326)
hexb.to_crs('epsg:4326', inplace=True)
hexb.shape[0]

Expect 46,945 total hexbins for this bounding box
CPU times: user 8.95 s, sys: 52.6 ms, total: 9.01 s
Wall time: 9.13 s


28059

# Adds geo-information to the hex bins

# generalize to levels 1 & 2 of political subdivisions (GADM)

In [None]:
%%time
districts = gpd.read_file(districts_file, layer='districts', driver='gpkg')
districts.rename(columns = {'VARNAME_1':'province', 'VARNAME_2':'district'}, inplace = True)
districts = districts.set_crs('epsg:4326')
districts.head()

In [None]:
# wards = gpd.read_file(wards_file, layer='wards', driver='gpkg')
# wards = wards.set_crs('epsg:4326')
# wards = gpd.GeoDataFrame(wards[]], geometry=wards['geometry'])

In [None]:
%%time
# We get centroids to make the association with spatial divisions a little faster
centroids = gpd.GeoDataFrame(hexb[['hex_id']], geometry=gpd.points_from_xy( hexb['x'], hexb['y']), crs="EPSG:3405")
centroids.to_crs(4326, inplace=True)

In [16]:
%%time
# hexb.to_feather(join(lyr_fldr, 'hexbins_raw.feather'))
# centroids.to_feather(join(lyr_fldr, 'hexbins_centroids.feather'))

# hexb.to_file(join(lyr_fldr, 'hexbins_raw.gpkg'), driver='GPKG', layer='hexbins')
# centroids.to_file(join(lyr_fldr, 'hexbins_raw.gpkg'), driver='GPKG', layer='centroids')

Wall time: 0 ns


In [17]:
%%time
data = sjoin(centroids, districts, how="left", predicate="intersects")
data.drop_duplicates(subset=['hex_id'], inplace=True)
data = data[['hex_id','province', 'district', 'geometry']]
found_centroid = data[['hex_id', 'province', 'district']]
found_centroid = found_centroid.dropna()
found_centroid.head()

Wall time: 10min 28s


In [76]:
%%time
not_found = hexb[~hexb.hex_id.isin(found_centroid.hex_id)]
not_found_merged = sjoin(not_found, districts, how="left", predicate="intersects")
not_found_merged = not_found_merged[['hex_id','province', 'district']]
not_found_merged.dropna(inplace=True)
not_found_merged.head()

Wall time: 9.67 s


Unnamed: 0,hex_id,province,district
7666,1,Dien Bien,Muong Nhe
7668,3,Dien Bien,Muong Nhe
7669,4,Dien Bien,Muong Nhe
7671,6,Dien Bien,Muong Nhe
15868,7,Dien Bien,Muong Nhe


In [78]:
%%time
with_data = pd.concat([not_found_merged, found_centroid])

Wall time: 86 ms


In [83]:
data_complete = hexb.merge(with_data, on='hex_id', how='outer')

In [84]:
%%time
dindex = districts.sindex
empties = data_complete.province.isna()
for idx, record in data_complete[empties].iterrows():
    i -= 1
    geo = record.geometry
    dscrt = [x for x in dindex.nearest(geo.bounds, 10)]
    dist = [districts.loc[d, 'geometry'].distance(geo) for d in dscrt]
    m = dscrt[dist.index(min(dist))]
    data_complete.loc[idx, 'province'] = districts.loc[m, 'province']
    data_complete.loc[idx, 'district'] = districts.loc[m, 'district']    

Wall time: 1.13 s


In [87]:
%%time
zones_with_location = gpd.GeoDataFrame(data_complete[['hex_id', 'province', 'district', 'x', 'y']], geometry=data_complete['geometry'])

Wall time: 239 ms


In [93]:
population_sqlite

'D:/OuterLoop/OneDrive - Outer Loop Consulting/Server/Projects/2021/02-11 - DT4PAG-Vietnam (WorldBank)/CVTS/zoning\\pop.sqlite'

# Load the vectorized population

In [94]:
%%time
popsqlite = sqlite3.connect(join(lyr_fldr, f'data/pop.sqlite'))
popsqlite.enable_load_extension(True)
popsqlite.load_extension('mod_spatialite')

sql = "SELECT population, Hex(ST_AsBinary(GEOMETRY)) as geom FROM raw_population;"
pop_data = gpd.GeoDataFrame.from_postgis(sql, popsqlite, geom_col="geom")
pop_data.set_crs('epsg:4326', inplace=True)

Wall time: 23min 54s


Unnamed: 0,population,geom
0,1.011495,POINT (105.32167 23.39250)
1,0.996461,POINT (105.32250 23.39250)
2,0.980337,POINT (105.32333 23.39250)
3,0.996077,POINT (105.32417 23.39250)
4,0.993219,POINT (105.32500 23.39250)
...,...,...
39742433,0.752134,POINT (104.84417 8.56417)
39742434,0.747060,POINT (104.84500 8.56417)
39742435,0.827918,POINT (104.84583 8.56417)
39742436,0.920547,POINT (104.84667 8.56417)


# Compute the population for each HexBin

In [95]:
%%time
pop_to_zone = sjoin(pop_data, zones_with_location, how="left", predicate="within")

Wall time: 54min 49s


In [96]:
%%time
pop_per_zone = pop_to_zone.groupby(['hex_id']).sum()[['population']].reset_index()
pop_per_zone.loc[:, 'hex_id'] = pop_per_zone.hex_id.astype(int)
pop_per_zone.sort_values(['hex_id'], inplace=True)

Wall time: 8.88 s


In [97]:
%%time
zones_with_pop = zones_with_location.merge(pop_per_zone, on='hex_id', how='left')
zones_with_pop.population.fillna(0, inplace=True)

Wall time: 3.8 s


In [98]:
%%time
zones_with_pop = zones_with_pop.drop_duplicates(subset=['geometry'])
zones_with_pop[['hex_id', 'x', 'y', 'population','province' ,'district','geometry']].to_file(join(lyr_fldr, 'hexbins_small.gpkg'), driver='GPKG')

Wall time: 26min 57s
