In [None]:
# UDF for GROUP BY in queries

__base32 = '0123456789bcdefghjkmnpqrstuvwxyz'


def encode(lat, lon, precision):
    """(Double?, Double? Uint32) - String?
    
    precision(km):
    1   ±2500
    2   ±630
    3   ±78
    4   ±20
    5   ±2.4
    6   ±0.61
    7   ±0.076
    8   ±0.019
    9   ±0.0024
    10  ±0.00060
    11  ±0.000074
    """

    if lat is None or lon is None:
        return None

    lat_interval, lon_interval = (-90.0, 90.0), (-180.0, 180.0)
    geohash = []
    bits = 16, 8, 4, 2, 1
    bit = 0
    ch = 0
    even = True
    while len(geohash) < precision:
        if even:
            mid = (lon_interval[0] + lon_interval[1]) / 2.0
            if lon > mid:
                ch |= bits[bit]
                lon_interval = (mid, lon_interval[1])
            else:
                lon_interval = (lon_interval[0], mid)
        else:
            mid = (lat_interval[0] + lat_interval[1]) / 2.0
            if lat > mid:
                ch |= bits[bit]
                lat_interval = (mid, lat_interval[1])
            else:
                lat_interval = (lat_interval[0], mid)
        even = not even
        if bit < 4:
            bit += 1
        else:
            geohash += __base32[ch]
            bit = 0
            ch = 0
    return ''.join(geohash)

In [1]:
import pandas as pd
import geohash
import geopandas as gpd
from shapely.geometry import Polygon

In [2]:
!head "./data.json"

{"geohash":null,"Count":7}
{"geohash":"66jc6j8","Count":1}
{"geohash":"66jcfrn","Count":6}
{"geohash":"6f6cw9x","Count":2}
{"geohash":"6fuywrj","Count":1}
{"geohash":"6gycf03","Count":1}
{"geohash":"6gycfmu","Count":1}
{"geohash":"6gyf5b5","Count":1}
{"geohash":"6mc5nkz","Count":3}
{"geohash":"6mc6b44","Count":3}


In [3]:
df = pd.read_json("./data.json", lines=True)
df = df.loc[~df.geohash.isnull()]
df.shape

(114676, 2)

In [4]:
def polygon_from_geohash(geo):
    
    bbox = geohash.bbox(geo)
    lower_left = bbox['w'], bbox['s']
    upper_left = bbox['w'], bbox['n']
    lower_right = bbox['e'],bbox['s']
    upper_right = bbox['e'],bbox['n']

    return Polygon((lower_left, upper_left, upper_right, lower_right, lower_left))

In [5]:
df.geohash = df.geohash.apply(polygon_from_geohash)
gdf = gpd.GeoDataFrame(df, geometry='geohash')
gdf.head()

Unnamed: 0,Count,geohash
1,1,"POLYGON ((-70.576171875 -33.50006103515625, -7..."
2,6,POLYGON ((-70.55694580078125 -33.4039306640625...
3,2,POLYGON ((-52.08755493164062 -32.0718383789062...
4,1,POLYGON ((-49.28878784179688 -28.3502197265625...
5,1,POLYGON ((-46.66854858398438 -23.5972595214843...


In [7]:
gdf.to_file('decoded_data.geojson', driver='GeoJSON', encoding="utf-8")

![](map.png)