In [1]:
%load_ext autoreload
%autoreload 2
    
from pathlib import Path
import metapack as mp
import geopandas as gpd
import pandas as pd
from auto_tqdm import tqdm 
from shapely.wkt import loads
from shapely.geometry import Polygon
import libgeohash as gh 
from operator import mul
import numpy as np

doc = mp.jupyter.open_source_package()
doc.set_sys_path()
import pylib

ea_epsg = 2163 #US Equal Area projection

import logging
logging.basicConfig()

from pylib import lines_logger, points_logger
lines_logger.setLevel(logging.DEBUG)
points_logger.setLevel(logging.DEBUG)

pkg_root = Path(doc.path).parent
pkg = mp.open_package(pkg_root)
pkg

In [2]:
cache = pylib.open_cache(pkg)

In [3]:
pkg_h = mp.open_package('http://library.metatab.org/civicknowledge.com-geohash-us.csv')
hashes = pkg_h.resource('us_geohashes').geoframe().rename(columns={'geohash':'gh4'})


In [4]:
tags_df = cache.get_df('points/tags_df')
tags_df['gh4'] = tags_df.geohash.str.slice(0,4)

In [12]:
# Generate a reduced collection of points

bus_tags = ['restaurant','fast_food','cafe','convenience','bank','supermarket']
low_dens_tags = ['grave_yard','camp_site','camp_pitch','bench','view_point']

def collect_densities(tags):
    t = tags_df.set_index(['geohash'])[['amenity','shop','tourism']].stack().to_frame().reset_index()
    t.columns = ['geohash','group','type']
    t = t[t.type.isin(tags)]
    #t.to_csv('tags.csv')
    t[['gh4']] = t.geohash.str.slice(0,4)
    t = t.merge(tags_df[['geohash', 'geometry']])

    t['dummy'] = 1
    t = hashes[['gh4']].merge(t)# only in the continential US
    t = t.groupby(t.geohash.str.slice(0,6)).dummy.count().to_frame().reset_index()
    t['geometry'] = t.geohash.apply(lambda ghc: Polygon([ele[::-1] for ele in gh.bbox(ghc, coordinates = True)]))
    t = gpd.GeoDataFrame(t, crs=4326).to_crs(3857) # Web Mercator
    t = t[t.dummy >2]

    t['a'] = t.geohash.apply(lambda v: (mul(*gh.dimensions(v, True))))
    t['density'] = (t.dummy/t.a)*1e6
    t['dummy'] = t.dummy.clip(0,t.dummy.mean()*5)
    return t

bus_densities =  collect_densities(bus_tags)
ld_densities =  collect_densities(low_dens_tags)

In [18]:
bus_densities =  bus_densities[['geohash','dummy','density']].rename(columns={'dummy':'count'})

In [19]:
bus_densities.head()

Unnamed: 0,geohash,count,density
0,9mgvem,11.0,17.645323
1,9mgveq,17.0,27.271765
5,9mgzcv,4.0,6.445905
6,9mgzcx,8.0,12.893461
14,9mgzum,7.0,11.280334


In [14]:
# Aggregate the points to geohashes

if False:
    import contextily as ctx

    t = ld_densities.copy()
    t['geometry'] = t.buffer(10000)
    ax =t.plot(column='density', scheme='fisher_jenks_sampled',figsize=(10, 8))
    ctx.add_basemap(ax, source=ctx.providers.Stamen.Toner)
    display(t.dummy.describe())
   
if False:
    t = gpd.sjoin( hd_hashes[['geohash', 'geometry', 'density']], places.to_crs(3857))
    t[['stusab']] = t.statefp.apply(lambda v: stusab.get(int(v)))
    place_den = t[['namelsad', 'stusab', 'geohash','density', 'geometry']]
    place_den = place_den.groupby('geohash').first().sort_values('density', ascending=False)
    place_den.to_csv('densities.csv')

if False and not Path('places.csv').exists():
    import rowgenerators as rg
    from geoid.censusnames import stusab
    cstates =  [ st for st in stusab.values() if st not in ['HI', 'AK', 'PR', 'VI', 'MP', 'GU', 'AS'] ]
    frames = [rg.geoframe("censusgeo://2018/5/{}/place".format(st)) for st in tqdm(cstates)]
    places = pd.concat(frames)
    places.to_csv('places.csv')    