In [None]:
%load_ext kamu

In [None]:
%import_dataset us.cityofnewyork.data.ems-incident-dispatch --alias dispatch

In [None]:
print(dispatch.count())
dispatch.printSchema()

In [None]:
%%sql
select * from dispatch limit 3

In [None]:
%import_dataset us.cityofnewyork.data.zipcode-boundaries --alias zipcodes

In [None]:
print(zipcodes.count())
zipcodes.printSchema()

In [None]:
%%sql
select * from zipcodes limit 3

In [None]:
%%sql -o count_per_zipcode
SELECT 
    z.MODZCTA as zipcode,
    IFNULL(d.num, 0) as num,
    ST_AsText(ST_Transform(st_geomfromgeojson(geometry), "epsg:2263", "epsg:4326")) as geometry
FROM zipcodes z
LEFT JOIN (
    SELECT count(*) as num, zipcode
    FROM dispatch
    WHERE zipcode IS NOT NULL
    GROUP BY zipcode
) as d ON z.MODZCTA = d.zipcode

In [None]:
%%local
import shapely.wkt
import geojson

def df_to_geojson(df, geom='geometry', props=None):
    if props is None:
        props = [
            c for c in df.columns
            if c != geom
        ]

    return geojson.FeatureCollection([
        geojson.Feature(
            geometry=shapely.wkt.loads(r[geom]),
            properties={p: r[p] for p in props}
        )
        for _, r in df.iterrows()
    ])

In [None]:
%%local
gj = df_to_geojson(count_per_zipcode)

In [None]:
%%local
import os
from mapboxgl.viz import *
from mapboxgl.utils import *

# Must be a public token, starting with `pk`
token = os.getenv('MAPBOX_ACCESS_TOKEN')

In [None]:
%%local

num_max = count_per_zipcode.num.max()
stops = [i * num_max / 4 for i in range(4)]

viz = ChoroplethViz(
    gj,
    style='mapbox://styles/mapbox/dark-v10',
    center=(-74.068266, 40.7019673),
    zoom=9,
    access_token=token,
    color_property='num',
    color_stops=create_color_stops(stops, colors='YlOrRd'),
    color_function_type='interpolate',
    line_stroke='solid',
    line_width=0.3,
    line_color='rgb(128,0,38)',
    opacity=0.8,
    legend_layout='horizontal',
    legend_key_shape='bar',
    legend_key_borders_on=False)

viz.show()

In [None]:
%%sql -o count_by_call_type
SELECT final_call_type, SUM(num) AS number_of_dispatches
FROM (
    SELECT CASE WHEN num < 10000 THEN "other" ELSE final_call_type END AS final_call_type, num
    FROM (
        SELECT final_call_type, count(*) as num
        FROM dispatch
        GROUP BY final_call_type
    )
)
GROUP BY final_call_type
ORDER BY final_call_type

In [None]:
%%local
import pandas_bokeh
pandas_bokeh.output_notebook()
count_by_call_type.set_index("final_call_type").plot_bokeh.pie()