# Running dashboard 


### From notebook

If you run all the cells in this notebook, the final cell will display the dashboard inline

### From JupyterLab

To run this from within JupyterLab: open a terminal and run:

```bash
panel serve /home/jovyan/project/examples/nyc-taxi-snowflake/dashboard.ipynb
```

The dashboard will be live behind the Jupyter proxy. You can copy the URL of this Jupyter window and replace `/lab/*` with `/proxy/5006/dashboard`. For example, your Jupyter URL might be:

```
https://main.demo.saturnenterprise.io/user/aaron/examples-cpu/lab/workspaces/examples-cpu
```

Then your dashboard URL would be: 

```
https://main.demo.saturnenterprise.io/user/aaron/examples-cpu/proxy/5006/dashboard
```

It will take a few seconds to load when first viewing the page, as all the cells in this notebook must be executed first.

### Deployment 

To run as part of a Deployment, use this for the Command (see readme for more details):

```bash
python -m panel serve /home/jovyan/project/examples/nyc-taxi-snowflake/dashboard.ipynb --port=8000 --address="0.0.0.0" --allow-websocket-origin="*"
```

# ML model predictions

The `MODEL_URL` environment variable must be set to be able to get predictions from a deployed model. Otherwise the widget on the "ML" tab will return -1.

In [None]:
import os
import datetime as dt
import numpy as np
import pandas as pd

import dask.dataframe as dd

import hvplot.dask, hvplot.pandas
import holoviews as hv
from holoviews.streams import Selection1D
from bokeh.models import HoverTool
import panel as pn

import warnings
import logging
warnings.filterwarnings("ignore")
logging.captureWarnings(True)

In [None]:
# URL to deployed model (see readme for more details)
MODEL_URL = os.environ.get('MODEL_URL', 'http://0.0.0.0:8000')

In [None]:
import s3fs

fs = s3fs.S3FileSystem(anon=True)

## Read in data

We'll start by reading in the shape file for the taxi zones provided by [NYC TLC](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).

In [None]:
import zipfile
with fs.open('s3://nyc-tlc/misc/taxi_zones.zip') as f:
    with zipfile.ZipFile(f) as zip_ref:
        zip_ref.extractall(f'/tmp/taxi_zones')

In [None]:
import geopandas as gpd
zones = gpd.read_file('/tmp/taxi_zones/taxi_zones.shp').to_crs('epsg:4326')

Run the below cell to visualize the shape file

In [None]:
# zones.hvplot(geo=True)

In [None]:
import geoviews as gv

basemap = gv.tile_sources.CartoLight()

### Query Snowflake

Next we'll aggregate data using Snowflake and pull the results into Pandas

In [None]:
import yaml
import snowflake.connector

with open('/home/jovyan/snowflake_creds.yml') as f:
    creds = yaml.full_load(f)

conn = snowflake.connector.connect(
    warehouse='COMPUTE_WH',
    database='NYC_TAXI',
    schema='PUBLIC',
    **creds,
)

In [None]:
conn.cursor().execute("""
CREATE OR REPLACE VIEW taxi_tip AS
SELECT 
    *,
    HOUR(pickup_datetime) as pickup_hour,
    HOUR(dropoff_datetime) as dropoff_hour,
    DAYOFWEEKISO(pickup_datetime) - 1 as pickup_weekday,  -- start week (Monday) at 0 to match pandas
    DAYOFWEEKISO(dropoff_datetime) -1 as dropoff_weekday,
    tip_amount / fare_amount * 100 as percent_tip
FROM taxi_yellow
WHERE 
    pickup_datetime BETWEEN '2017-01-01' AND '2019-12-31'
    AND fare_amount > 0
    AND tip_amount / fare_amount < 10
""")

In [None]:
def snowflake_query(query):
    result = conn.cursor().execute(query).fetch_pandas_all()
    result.columns = result.columns.str.lower()
    return result

In [None]:
pickup_by_zone_and_time = snowflake_query("""
SELECT
    pickup_taxizone_id,
    pickup_hour,
    pickup_weekday,
    AVG(fare_amount) as average_fare,
    COUNT(fare_amount) as total_rides,
    SUM(fare_amount) as total_fare,
    AVG(trip_distance) as average_trip_distance,
    AVG(percent_tip) as average_percent_tip
FROM taxi_tip
GROUP BY 
    pickup_taxizone_id,
    pickup_hour,
    pickup_weekday
""")
pickup_by_zone_and_time.shape

In [None]:
pickup_by_zone_and_time.sort_values(['pickup_taxizone_id', 'pickup_hour', 'pickup_weekday']).head()

In [None]:
pickup_by_zone = snowflake_query("""
SELECT
    pickup_taxizone_id,
    AVG(fare_amount) as average_fare,
    COUNT(fare_amount) as total_rides,
    SUM(fare_amount) as total_fare,
    AVG(trip_distance) as average_trip_distance,
    AVG(percent_tip) as average_percent_tip
FROM taxi_tip
GROUP BY pickup_taxizone_id
""")
pickup_by_zone = pickup_by_zone.sort_values('pickup_taxizone_id').set_index('pickup_taxizone_id')
pickup_by_zone.shape

In [None]:
pickup_by_zone.head()

In [None]:
dropoff_by_zone = snowflake_query("""
SELECT
    dropoff_taxizone_id,
    AVG(fare_amount) as average_fare,
    COUNT(fare_amount) as total_rides,
    SUM(fare_amount) as total_fare,
    AVG(trip_distance) as average_trip_distance,
    AVG(percent_tip) as average_percent_tip
FROM taxi_tip
GROUP BY dropoff_taxizone_id
""")
dropoff_by_zone = dropoff_by_zone.sort_values('dropoff_taxizone_id').set_index('dropoff_taxizone_id')
dropoff_by_zone.shape

In [None]:
dropoff_by_zone.head()

In [None]:
zones_dict = dict(zip(zones.LocationID.tolist(), zones.zone.tolist()))

pickup_by_zone.index = pickup_by_zone.index.map(zones_dict)
dropoff_by_zone.index = dropoff_by_zone.index.map(zones_dict)

pickup_by_zone.head()

In [None]:
pickup_by_time = snowflake_query("""
SELECT
    pickup_hour,
    pickup_weekday,
    AVG(fare_amount) as average_fare,
    COUNT(fare_amount) as total_rides,
    SUM(fare_amount) as total_fare,
    AVG(trip_distance) as average_trip_distance,
    AVG(percent_tip) as average_percent_tip
FROM taxi_tip
GROUP BY pickup_hour, pickup_weekday
""")
pickup_by_time.shape

In [None]:
pickup_by_time.sort_values(['pickup_hour', 'pickup_weekday']).head()

### Timeseries data

Next we'll read in the hourly timeseries data for the various fields

In [None]:
tip_timeseries = snowflake_query("""
SELECT
    DATE_TRUNC('HOUR', pickup_datetime) as pickup_datetime,
    AVG(percent_tip) as percent_tip
FROM taxi_tip
GROUP BY 1
""")
tip_timeseries = tip_timeseries.sort_values('pickup_datetime').set_index('pickup_datetime')
tip_timeseries.shape

In [None]:
tip_timeseries.sort_values('pickup_datetime').head()

In [None]:
conn.close()

## Construct vizualizations

In this dashboard we'll have three tabs. We'll start with one about volume of rides and aggregate fare, then move on to one about tips and finish with a tab that digests the outputs of the Machine Learning algorithms that we've trained to predict fare.

### Volume tab

In [None]:
total_rides = pickup_by_zone.total_rides.sum()
total_fare = pickup_by_zone.total_fare.sum()

In [None]:
volume_intro = """
# Taxi Volume

Ridership by region and average fares for 2017-01-01 to 2020-01-01. 
"""

In [None]:
logo_file = '/tmp/logo.svg'
fs.get("s3://saturn-public-data/nyc-taxi/data/dashboard/saturn_logo.svg", logo_file)
logo = pn.pane.SVG(logo_file, style={"float": "right"})

In [None]:
def kpi_box(title, color, value, unit=""):
    if value > 1e9:
        value /= 1e9
        increment = "B"
    elif value > 1e6:
        value /= 1e6
        increment = "M"
    elif value > 1e3:
        value /= 1e3
        increment = "K"
    else:
        increment = ""
    
    return pn.pane.Markdown(
        f"""
        ### {title}
        # {unit}{value :.02f} {increment}
        """,
        style={'background-color': '#F6F6F6', 'border': '2px solid black',
                'border-radius': '5px', 'padding': '10px', 'color': color},
    )

In [None]:
fares = kpi_box("Total Fares", "#10874a", total_fare, "$")
rides = kpi_box("Total Rides", "#7a41ba", total_rides)
average = kpi_box("Average Fare", "coral", (total_fare / total_rides), "$")

In [None]:
data = zones.join(pickup_by_zone[["total_rides", "average_fare"]], on="zone")
data["million_rides"] = data.total_rides/1e6

tooltips = [
    ('Total Rides', '@total_rides{(0,0.00 a)}'),
    ('Average Fare', '@{average_fare}{($0.00 a)}'),
    ('Zone', '@zone'),
    ('Borough', '@borough'),
]
hover = HoverTool(tooltips=tooltips)

pickup_map = data.hvplot(
    x="longitude", y="latitude", c="million_rides",
    geo=True, max_width=600, max_height=600,
    alpha=0.6, cmap="viridis", clim=(0, np.ceil(pickup_by_zone.total_rides.max() / 1e6)), 
    hover_cols=["zone", "borough", "average_fare", "total_rides"], 
    title=f"Rides by pickup location (in Millions)", 
    responsive=True, colorbar=True,
    xaxis=None, yaxis=None, selection_alpha=1).opts(tools=["tap", hover], toolbar="above")

In [None]:
toggle = pn.widgets.RadioButtonGroup(options=["Pickup", "Dropoff"], value="Pickup")

In [None]:
@pn.depends(value=toggle)
def volume_table(value):
    data = pickup_by_zone if value == "Pickup" else dropoff_by_zone
    subset = data.total_rides.sort_values(ascending=False)
    subset = subset.loc[subset.index.dropna()]
    
    return pn.Column(
        f"### Top/Bottom 5 {value} Zones", 
        pn.pane.DataFrame(subset.head(5), index_names=False), 
        pn.Spacer(height=10), 
        pn.pane.DataFrame(subset.tail(5), index_names=False, header=False), 
        width_policy="fit"
    )

In [None]:
data = pickup_by_zone_and_time.copy()
data.index = data.index.map(zones_dict)

def heatmap(C, data=data, **kwargs):
    return data.hvplot.heatmap(
        x="pickup_weekday", 
        y="pickup_hour", 
        C=C,
        hover_cols=["total_rides"] if C == "average_fare" else ["average_fare"],
        xticks=[(0, 'Mon'), (1, 'Tues'), (2, 'Wed'), (3, 'Thur'), (4, 'Fri'), (5, 'Sat'), (6, 'Sun')],
        responsive=True, min_height=500, colorbar=False, **kwargs
    ).opts(toolbar=None, xrotation=90, padding=0)

rides_dmap = heatmap(C="total_rides", groupby="pickup_taxizone_id", cmap="reds")
fare_dmap = heatmap(C="average_fare", groupby="pickup_taxizone_id", cmap="blues")
rides_summary = heatmap(data=pickup_by_time, C="total_rides", cmap="reds", title="Total Rides")
fare_summary = heatmap(data=pickup_by_time, C="average_fare", cmap="blues", title="Average Fare")

volume_heatmap = pn.pane.HoloViews(rides_summary)

In [None]:
def ride_or_fares_plot(zone, value):
    if value == ["Rides"]:
        if zone is None:
            obj = rides_summary.opts(alpha=1)
        else:
            obj = rides_dmap[zone].opts(title=f"{zone} Rides").opts(alpha=1)
    elif value == ["Fares"]:
        if zone is None:
            obj = fare_summary
        else:
            obj = fare_dmap[zone].opts(title=f"{zone} Fares")
    else:
        if zone is None:
            obj = (fare_summary * rides_summary.opts(alpha=0.5, padding=0)).opts(title="Total Rides/Fares")
        else:
            obj = (fare_dmap[zone] * rides_dmap[zone].opts(alpha=0.5, padding=0)).opts(title=f"{zone}")
    return obj

def on_pickup_tap(index):
    if index:
        zone = zones.loc[index, "zone"].item()
        value = rides_or_fares.value
        volume_heatmap.object = ride_or_fares_plot(zone, value)
    return
       
volume_stream = Selection1D(source=pickup_map)
volume_stream.param.watch_values(on_pickup_tap, ['index']);

In [None]:
rides_or_fares = pn.widgets.CheckButtonGroup(options=["Rides", "Fares"], value=["Rides"])

In [None]:
def on_rides_or_fares(target, event):
    index = volume_stream.index
    value = event.new
    if index and value:
        zone = zones.loc[index, "zone"].item()
        volume_heatmap.object = ride_or_fares_plot(zone, value)
    
rides_or_fares.link(volume_heatmap, callbacks={"value": on_rides_or_fares})

def on_reset_heatmap(*args):
    value = rides_or_fares.value
    volume_heatmap.object = ride_or_fares_plot(None, value)

reset_heatmap = pn.widgets.Button(name="Reset")
reset_heatmap.on_click(on_reset_heatmap)

In [None]:
volume = pn.GridSpec(name="Volume", sizing_mode='stretch_both', min_width=800, min_height=600, max_height=800)
volume[0, :6] = volume_intro
volume[0, 6] = logo
volume[1, 0] = fares
volume[1, 1] = rides
volume[1, 2] = average
volume[1:4, 4:6] = pn.Column(toggle, volume_table)
volume[1:8, 3] = pn.Column(
    pn.pane.Markdown("*Choose rides, fares, or both and select a zone on the map.*", margin=(0, 10)), 
    rides_or_fares, reset_heatmap, volume_heatmap)
volume[2:8, 0:3] = pickup_map * gv.tile_sources.CartoLight()

## Tip tab

In [None]:
tip_intro = """
# Analysis of Tips

Tips vary based on time of day, location and many other factors. 
"""

In [None]:
tip_heatmap = heatmap(data=pickup_by_time, C="average_percent_tip", cmap="coolwarm", clim=(12, 18), title="Average Tip %")

In [None]:
date_range_slider = pn.widgets.DateRangeSlider(
    name='Show between',
    start=tip_timeseries.index[0], end=tip_timeseries.index[-1],
    value=(tip_timeseries.index.min(), tip_timeseries.index.max())
)
discrete_slider = pn.widgets.DiscreteSlider(name='Rolling window', options=['1H', '2H', '4H', '6H', '12H', '1D', '2D', '7D', '14D', '1M'], value='1D')

def tip_plot(xlim, window):
    data = tip_timeseries.rolling(window).mean()
    return data.hvplot(y="percent_tip", xlim=xlim, ylim=(10, 18), responsive=True, min_height=200).opts(toolbar="above")

tip_timeseries_plot = pn.pane.HoloViews(tip_plot(date_range_slider.value, discrete_slider.value))
    
def trim(target, event):
    target.object = tip_plot(event.new, discrete_slider.value)

def roll(target, event):
    target.object = tip_plot(date_range_slider.value, event.new)

discrete_slider.link(tip_timeseries_plot, callbacks={"value": roll})
date_range_slider.link(tip_timeseries_plot, callbacks={"value": trim})

In [None]:
joined = zones.join(pickup_by_zone, on="zone")

tip_map = joined.hvplot(c="average_percent_tip", geo=True, alpha=0.6, cmap="coolwarm",
                        hover_cols=["zone", "borough"], title="Average Tip %", 
                        clim=(0, 20),responsive=True, colorbar=False,
                        xaxis=None, yaxis=None).opts(toolbar="above")

In [None]:
tip_table = pickup_by_zone.average_percent_tip.sort_values(ascending=False)
tip_table = tip_table.loc[tip_table.index.dropna()]
tip_pane = pn.Column(
    "### Top/Bottom 5 Tip Zones", 
    pn.pane.DataFrame(tip_table.head(5), header=False, index_names=False),
    pn.Spacer(height=10),
    pn.pane.DataFrame(tip_table.tail(5), header=False, index_names=False),
)

In [None]:
tips = pn.GridSpec(name="Tips", sizing_mode='stretch_both', min_width=800, min_height=600, max_height=800)

tips[0, :6] = tip_intro
tips[0, 6] = logo
tips[1:5, 0:2] = tip_map * gv.tile_sources.CartoLight()
tips[1:5, 2:4] = tip_pane
tips[1:5, 4:6] = tip_heatmap

tips[5:8, 0:2] = pn.Column(date_range_slider, discrete_slider, "*Use widgets to control rolling window average on the timeseries plot or and to restrict to between certain dates*")
tips[5:8, 2:6] = tip_timeseries_plot

## ML Tab

In [None]:
ml_intro = """
# Machine Learning

Predict percent tip by consuming a deployed model. Must set MODEL_URL environment variable in Project or Deployment, otherwise the prediction will be -1 (see readme for more details).
"""

In [None]:
import requests

def tip_prediction(pickup_taxizone_id, dropoff_taxizone_id, datetime, passenger_count):
    try:
        SCORING_ENDPOINT = f"{MODEL_URL}/api/predict"

        SATURN_TOKEN = os.environ["SATURN_TOKEN"]

        result = requests.post(
            url=SCORING_ENDPOINT,
            json={
                "passenger_count": passenger_count,
                "tpep_pickup_datetime": str(datetime),
                "pickup_taxizone_id": int(pickup_taxizone_id),
                "dropoff_taxizone_id": int(dropoff_taxizone_id)
            },
            headers={
                "Content-Type": "application/json",
                "Authorization": f"token {SATURN_TOKEN}"
            }
        )
    
        return float(result.json()["prediction"]) * 100
    except:
        return -1

In [None]:
options = {"Choose from map": -1, **{v: k for k, v in zones.zone.to_dict().items()}}
pickup = pn.widgets.Select(name="Pickup", options=options)
dropoff = pn.widgets.Select(name="Dropoff", options=options)
passengers = pn.widgets.IntSlider(name='Passengers', start=0, end=10, step=1, value=2)
plot = zones.hvplot(geo=True, c='zone', legend=False, width=500, height=500, xaxis=None, yaxis=None, alpha=.2, selection_alpha=1).opts(tools=['tap', 'hover'])

def on_map_select(index):
    if index and pickup.value == -1:
        pickup.value = index[0]
    elif index and dropoff.value == -1:
        dropoff.value = index[0]
    return

stream = Selection1D(source=plot)
stream.param.watch_values(on_map_select, ['index'])

overlay = pn.pane.HoloViews(plot * gv.tile_sources.CartoLight())

def on_reset(*args):
    pickup.value = -1
    dropoff.value = -1
    passengers.value = 2
    date.value = dt.datetime.now().date()
    hour.value = 0
    text.background = "#ffffff"
    text.object = None
    stream.update(index=[])
    overlay.object = plot * gv.tile_sources.CartoLight()

reset = pn.widgets.Button(name="Reset", width=80)
reset.on_click(on_reset)

date = pn.widgets.DatePicker(name="Date", value=dt.datetime.now().date())
hour = pn.widgets.DiscreteSlider(
    name="Hour", 
    options=dict(zip(
        ["12am", *[f"{h}am"for h in range(1, 12)] ,"12pm", *[f"{h}pm"for h in range(1, 12)]], 
        list(range(24))
    )))

submit = pn.widgets.Button(name="Predict my tip", button_type='primary', width=200)
text = pn.pane.Markdown(width=200, height=45, style={"padding-left": "10pt"})
helper = pn.pane.Markdown(width=300)

def b(event):
    if pickup.value == -1 or dropoff.value == -1:
        submit.button_type = "danger"
        helper.object = "*You must select pickup and dropoff zone*"
        return
    
    submit.button_type = "primary"
    helper.object = None
    datetime = dt.datetime.combine(date.value, dt.time(hour=hour.value))
    
    prediction = tip_prediction(pickup.value, dropoff.value, datetime, passengers.value)
    
    subset = zones.iloc[[pickup.value, dropoff.value]]
    
    trip = gv.Path((subset.geometry.centroid.x, subset.geometry.centroid.y)).opts(color="black", line_width=2)
    
    obj = plot * gv.tile_sources.CartoLight() * subset.hvplot(geo=True) * trip
    obj.label = f"{subset.zone.tolist()[0]} to {subset.zone.tolist()[1]}"
    
    overlay.object = obj
    
    text.background = "yellow"
    text.object = f"## Prediction: {prediction: .2f}%"
    
submit.on_click(b)

predict = pn.Row(
    pn.Column(
        "## Predict my Tip",
        pickup,
        dropoff, 
        passengers,
        date,
        hour,
        pn.Row(submit, reset),
        helper,
        text,
    ),
    overlay
)

In [None]:
ml = pn.GridSpec(name="ML", sizing_mode='stretch_both', min_width=800, min_height=600, max_height=800)

ml[0, :6] = ml_intro
ml[0, 6] = logo
ml[2:8, :6] = predict

## Final Dashboard

In [None]:
pn.Tabs(volume, tips, ml, tabs_location="left").servable(title="Saturn Taxi")