# Discretization

Split area into discrete regions.

In [11]:
%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import seaborn as sns
from h3 import h3

from pylab import rcParams
rcParams['figure.figsize'] = (10, 7)

import pandas as pd
pd.set_option('max_columns', 40)
pd.set_option('max_rows', 20)
pd.set_option('precision', 3)


## Prepare data

In [195]:
%%time 

data = pd.read_feather('../data/data.feather')

data = data.sample(100000)

CPU times: user 7.83 s, sys: 17.7 s, total: 25.6 s
Wall time: 53.3 s


In [196]:
def drop_columns(data: pd.DataFrame, rename_columns: dict) -> None:
    drop_columns = [
        'dropoff_datetime',
        'passenger_count',
        'distance',    
        'dropoff_lon',
        'dropoff_lat'
    ]
    
    # drop only of columns are still in the dataset
    if drop_columns[0] in data.columns:
        data.drop(drop_columns, axis=1, inplace=True)
        
    data.rename(columns=rename_columns, inplace=True)

drop_columns(data, {
    'pickup_datetime': 'datetime',
    'pickup_lat': 'lat',
    'pickup_lon': 'lon'
})

data.head(3)

Unnamed: 0,datetime,lon,lat
50455157,2015-05-17 19:17:12,-73.968,40.802
7665736,2015-01-22 11:27:11,-74.002,40.725
39928247,2015-04-19 14:14:03,-73.959,40.768


## Create hexagons

In [205]:
resolution = 8

def to_h3_address(row, resolution):
    return h3.geo_to_h3(row['lat'], row['lon'], resolution)

h3_keys = data.apply(lambda row: to_h3_address(row, resolution), axis=1)

## Create map

In [206]:
from shapely.geometry import Polygon
import geojson
from geojson import FeatureCollection

hexagons = []
for h in h3_keys.unique():
    hexagon = Polygon(h3.h3_to_geo_boundary(h3_address=h, geo_json=True))
    hexagons.append(hexagon)

hexagons = FeatureCollection(hexagons)
geojson_hexagons = geojson.dumps(hexagons)

In [207]:
import folium

geom = {'type': 'Feature', 'geometry': geojson.Polygon([boundary])}

m = folium.Map(location=[40.7536, -73.9811], zoom_start=12)
folium.GeoJson(geojson_hexagons).add_to(m)
m