In [None]:
import datetime as dt
import numpy as np
import pandas as pd

import dask.dataframe as dd

import hvplot.dask, hvplot.pandas
import holoviews as hv
from holoviews.streams import Selection1D
from bokeh.models import HoverTool
import panel as pn

import warnings
warnings.simplefilter("ignore")

## Read in data

We'll start by reading in the geo data downloaded from  LINK

In [None]:
import geopandas as gpd
zones = gpd.read_file('./data/taxi_zones.shp').to_crs('epsg:4326')

zones.hvplot(geo=True)

In [None]:
import geoviews as gv

basemap = gv.tile_sources.CartoLight()

### Grouped data

Next we'll read in the grouped data that we generated in the exploratory notebooks

In [None]:
pickup_data = pd.read_csv("./data/pickup_grouped.csv", index_col="pickup_taxizone_id")
dropoff_data = pd.read_csv("./data/dropoff_grouped.csv", index_col="dropoff_taxizone_id")
pickup_tip = pd.read_csv("./data/pickup_tip.csv", index_col="pickup_taxizone_id")
dropoff_data.head()

In [None]:
pickup_aggregated = pickup_data.groupby("pickup_taxizone_id")[["total_fare", "total_rides"]].sum()
pickup_aggregated["average_fare"] = (pickup_aggregated.total_fare / pickup_aggregated.total_rides).round(2)
pickup_aggregated["average_percent_tip"] = (pickup_aggregated.total_percent_tip / pickup_aggregated.total_tips)
pickup_aggregated.index = pickup_aggregated.index.map(dict(zip(zones.LocationID.tolist(), zones.zone.tolist())))
pickup_aggregated = pickup_aggregated.loc[pickup_aggregated.index.dropna()][["total_fare", "total_rides", "average_fare", "average_percent_tip"]]

dropoff_aggregated = dropoff_data.groupby("dropoff_taxizone_id").sum()
dropoff_aggregated["average_fare"] = (dropoff_aggregated.total_fare / dropoff_aggregated.total_rides)
dropoff_aggregated["average_percent_tip"] = (dropoff_aggregated.total_percent_tip / dropoff_aggregated.total_tips)
dropoff_aggregated.index = dropoff_aggregated.index.map(dict(zip(zones.LocationID.tolist(), zones.zone.tolist())))
dropoff_aggregated = dropoff_aggregated.loc[dropoff_aggregated.index.dropna()][["total_fare", "total_rides", "average_fare", "average_percent_tip"]]

pickup_aggregated.head()

### Timeseries data

Next we'll read in the hourly timeseries data for the various fields

In [None]:
mean_fare_timeseries = pd.read_csv("./data/fare_timeseries.csv", index_col="pickup_datetime", parse_dates=True)
tip_timeseries = pd.read_csv("./data/tip_timeseries.csv", index_col="pickup_datetime", parse_dates=True)

tip_timeseries.head()

## Construct vizualizations

In this dashboard we'll have three tabs. We'll start with one about volume of rides and aggregate fare, then move on to one about tips and finish with a tab that digests the outputs of the Machine Learning algorithms that we've trained to predict fare.

### Volume tab

In [None]:
total_rides = pickup_data.total_rides.sum()
total_fare = pickup_data.total_fare.sum()

In [None]:
volume_intro = """
# Taxi Volume

Ridership by region and total fare for period of record. 
"""

In [None]:
logo = pn.pane.SVG("./data/saturn_logo.svg", style={"float": "right"})

In [None]:
def kpi_box(title, color, value, unit=""):
    if value > 1e9:
        value /= 1e9
        increment = "Billion"
    elif value > 1e6:
        value /= 1e6
        increment = "Million"
    elif value > 1e3:
        value /= 1e3
        increment = "Thousand"
    else:
        increment = ""
    
    return pn.pane.Markdown(
        f"""
        ### {title}
        # {unit}{(total_fare / 1e9) :.02f} {increment}
        """,
        style={'background-color': '#F6F6F6', 'border': '2px solid black',
                'border-radius': '5px', 'padding': '10px', 'color': color},
    )

In [None]:
fares = kpi_box("Total Fares", "#10874a", total_fare, "$")
rides = kpi_box("Total Rides", "#7a41ba", total_rides)
average = kpi_box("Average Fare", "coral", (total_fare / total_rides), "$")

In [None]:
data = zones.join(pickup_aggregated["total_rides"], on="zone")
data = data.join(pickup_aggregated["average_fare"], on="zone")
data["million_rides"] = data.total_rides/1e6

tooltips = [
    ('Total Rides', '@total_rides{(0,0.00 a)}'),
    ('Average Fare', '@{average_fare}{($0.00 a)}'),
    ('Zone', '@zone'),
    ('Borough', '@borough'),
]
hover = HoverTool(tooltips=tooltips)

pickup_map = data.hvplot(
    x="longitude", y="latitude", c="million_rides",
    geo=True, max_width=600, max_height=600,
    alpha=0.6, cmap="viridis", clim=(0, 12), 
    hover_cols=["zone", "borough", "average_fare", "total_rides"], 
    title=f"Rides by pickup location (in Millions)", 
    responsive=True, colorbar=True,
    xaxis=None, yaxis=None, selection_alpha=1).opts(tools=["tap", hover], toolbar="above")

In [None]:
toggle = pn.widgets.RadioButtonGroup(options=["Pickup", "Dropoff"], value="Pickup")

In [None]:
@pn.depends(value=toggle)
def volume_table(value):
    data = pickup_aggregated if value == "Pickup" else dropoff_aggregated
    subset = data.total_rides.sort_values(ascending=False)
    
    return pn.Column(
        f"### Top/Bottom 5 {value} Zones", 
        pn.pane.DataFrame(subset.head(5), index_names=False), 
        pn.Spacer(height=10), 
        pn.pane.DataFrame(subset.tail(5), index_names=False, header=False), 
        width_policy="fit"
    )

In [None]:
data = pickup_data.copy()
data.index = data.index.map(dict(zip(zones.LocationID.tolist(), zones.zone.tolist())))

def heatmap(C, **kwargs):
    return data.hvplot.heatmap(
        x="pickup_weekday", 
        y="pickup_hour", 
        C=C,
        hover_cols=["total_rides"] if C == "average_fare" else ["average_fare"],
        xticks=[(0, 'Mon'), (1, 'Tues'), (2, 'Wed'), (3, 'Thur'), (4, 'Fri'), (5, 'Sat'), (6, 'Sun')],
        responsive=True, min_height=500, colorbar=False, **kwargs
    ).opts(toolbar=None, xrotation=90)

rides_dmap = heatmap(C="total_rides", groupby="pickup_taxizone_id", cmap="reds")
fares_dmap = heatmap(C="average_fare", groupby="pickup_taxizone_id", cmap="blues")
rides_summary = heatmap(C="total_rides", cmap="reds", title="Total Rides").aggregate(function=np.sum)

volume_heatmap = pn.pane.HoloViews(rides_summary)

In [None]:
def ride_or_fares_plot(zone, value):
    if value == ["Rides"]:
        obj = rides_dmap[zone].opts(title=f"{zone} Rides").opts(alpha=1)
    elif value == ["Fares"]:
        obj = fare_dmap[zone].opts(title=f"{zone} Fares")
    else:
        obj = (fare_dmap[zone] * rides_dmap[zone].opts(alpha=0.5)).opts(title=f"{zone}")
    return obj

def on_pickup_tap(index):
    if index:
        zone = zones.loc[index, "zone"].item()
        value = rides_or_fares.value
        volume_heatmap.object = ride_or_fares_plot(zone, value)
    return
       
volume_stream = Selection1D(source=pickup_map)
volume_stream.param.watch_values(on_pickup_tap, ['index']);

In [None]:
rides_or_fares = pn.widgets.CheckButtonGroup(options=["Rides", "Fares"], value=["Rides"])

In [None]:
def on_rides_or_fares(target, event):
    index = volume_stream.index
    value = event.new
    if index and value:
        zone = zones.loc[index, "zone"].item()
        volume_heatmap.object = ride_or_fares_plot(zone, value)
    
rides_or_fares.link(volume_heatmap, callbacks={"value": on_rides_or_fares});

In [None]:
volume = pn.GridSpec(name="Volume", sizing_mode='stretch_both', min_width=800, min_height=600, max_height=800)
volume[0, :6] = volume_intro
volume[0, 6] = logo
volume[1, 0] = fares
volume[1, 1] = rides
volume[1, 2] = average
volume[1:4, 4:6] = pn.Column(toggle, volume_table)
volume[1, 3] = pn.Column(
    pn.pane.Markdown("*Choose rides, fares, or both and select a zone on the map.*", margin=(0, 10)), 
    rides_or_fares, 
    height_policy="fit")
volume[2:8, 3] = volume_heatmap
volume[2:8, 0:3] = pickup_map * gv.tile_sources.CartoLight()
volume[4:8, 4:6] = "./pie_chart.svg"

In [None]:
volume

## Tip tab

In [None]:
tip_intro = """
# Analysis of Tips

Tips vary based on time of day, location and many other factors. 
"""

In [None]:
heatmap = tip_timeseries.hvplot.heatmap(
    x="index.dt.weekday", 
    y="index.dt.hour", 
    C="tip%",
    title="Average Tip %",
    xlabel="Day",
    ylabel="Hour",
    xticks=[(0, 'Mon'), (1, 'Tues'), (2, 'Wed'), (3, 'Thur'), (4, 'Fri'), (5, 'Sat'), (6, 'Sun')],
    responsive=True, min_height=500,
    colorbar=False, cmap="coolwarm", clim=(8, 12)
).aggregate(function=np.mean).opts(toolbar=None, xrotation=90)

In [None]:
date_range_slider = pn.widgets.DateRangeSlider(
    name='Show between',
    start=tip_timeseries.index[0], end=tip_timeseries.index[-1],
    value=(pd.Timestamp("2018-01"), pd.Timestamp("2019-02"))
)
discrete_slider = pn.widgets.DiscreteSlider(name='Rolling window', options=['1H', '2H', '4H', '6H', '12H', '1D', '2D', '7D', '14D', '1M'], value='1D')

def tip_plot(xlim, window):
    data = tip_timeseries.rolling(window).mean()
    return data.hvplot(y="tip%", xlim=xlim, ylim=(8, 12), responsive=True, min_height=200).opts(toolbar="above")

tip_timeseries_plot = pn.pane.HoloViews(tip_plot(date_range_slider.value, discrete_slider.value))
    
def trim(target, event):
    target.object = tip_plot(event.new, discrete_slider.value)

def roll(target, event):
    target.object = tip_plot(date_range_slider.value, event.new)

discrete_slider.link(tip_timeseries_plot, callbacks={"value": roll})
date_range_slider.link(tip_timeseries_plot, callbacks={"value": trim})

In [None]:
joined = zones.join(pickup_tip, on="LocationID")

tip_map = joined.hvplot(c="total", geo=True, alpha=0.6, cmap="coolwarm",
                        hover_cols=["zone", "borough"], title="Average Tip %", 
                        clim=(5, 15),responsive=True, colorbar=False,
                        xaxis=None, yaxis=None).opts(toolbar="above")

In [None]:
tip_table = joined[["zone", "total"]].set_index("zone").sort_values(by="total", ascending=False)
tip_table["tip %"] = tip_table.total.round(2)
tip_table = tip_table.drop("total", axis=1).drop_duplicates()
tip_pane = pn.Column(
    "### Top/Bottom 5 Tip Zones", 
    pn.pane.DataFrame(tip_table.head(5), header=False, index_names=False),
    pn.Spacer(height=10),
    pn.pane.DataFrame(tip_table.tail(5), header=False, index_names=False),
)

In [None]:
tips = pn.GridSpec(name="Tips", sizing_mode='stretch_both', min_width=800, min_height=600, max_height=800)

tips[0, :6] = tip_intro
tips[0, 6] = logo
tips[1:5, 0:2] = tip_map * gv.tile_sources.CartoLight()
tips[1:5, 2:4] = tip_pane
tips[1:5, 4:6] = heatmap

tips[5:8, 0:2] = pn.Column(date_range_slider, discrete_slider, "*Use widgets to control rolling window average on the timeseries plot or and to restrict to between certain dates*")
tips[5:8, 2:6] = tip_timeseries_plot

In [None]:
tips

## ML Tab

In [None]:
ml_intro = """
# Machine Learning

Exploring the historical accuracy of various models. Predict fare by consuming deployed models.
"""

In [None]:
def mock_fare_prediction(pickup_taxizone_id, dropoff_taxizone_id, datetime):
    return np.random.randint(10, 100)

actual = mean_fare_timeseries.fare_amount
predicted_fare = actual + np.random.randn(len(actual)) * 10

table = pd.DataFrame({
    "actual": actual, 
    "predicted": predicted_fare
})
table.index.name = "time"

In [None]:
options = {"Choose from map": -1, **{v: k for k, v in zones.zone.to_dict().items()}}
pickup = pn.widgets.Select(name="Pickup", options=options)
dropoff = pn.widgets.Select(name="Dropoff", options=options)
plot = zones.hvplot(geo=True, c='zone', legend=False, width=500, height=500, xaxis=None, yaxis=None, alpha=.2, selection_alpha=1).opts(tools=['tap', 'hover'])

def on_map_select(index):
    if index and pickup.value == -1:
        pickup.value = index[0]
    elif index and dropoff.value == -1:
        dropoff.value = index[0]
    return

stream = Selection1D(source=plot)
stream.param.watch_values(on_map_select, ['index'])

overlay = pn.pane.HoloViews(plot * gv.tile_sources.CartoLight())

def on_reset(*args):
    pickup.value = -1
    dropoff.value = -1
    date.value = dt.datetime.now().date()
    hour.value = 0
    text.background = "#ffffff"
    text.object = None
    stream.update(index=[])
    overlay.object = plot * gv.tile_sources.CartoLight()

reset = pn.widgets.Button(name="Reset", width=80)
reset.on_click(on_reset)

date = pn.widgets.DatePicker(name="Date", value=dt.datetime.now().date())
hour = pn.widgets.DiscreteSlider(
    name="Hour", 
    options=dict(zip(
        ["12am", *[f"{h}am"for h in range(1, 12)] ,"12pm", *[f"{h}pm"for h in range(1, 12)]], 
        list(range(24))
    )))

submit = pn.widgets.Button(name="Predict my fare", button_type='primary', width=200)
text = pn.pane.Markdown(width=200, height=45, style={"padding-left": "10pt"})
helper = pn.pane.Markdown(width=300)

def b(event):
    if pickup.value == -1 or dropoff.value == -1:
        submit.button_type = "danger"
        helper.object = "*You must select pickup and dropoff zone*"
        return
    
    submit.button_type = "primary"
    helper.object = None
    datetime = dt.datetime.combine(date.value, dt.time(hour=hour.value))
    
    prediction = mock_fare_prediction(pickup.value, dropoff.value, datetime)
    
    subset = zones.iloc[[pickup.value, dropoff.value]]
    
    trip = gv.Path((subset.geometry.centroid.x, subset.geometry.centroid.y)).opts(color="black", line_width=2)
    
    obj = plot * gv.tile_sources.CartoLight() * subset.hvplot(geo=True) * trip
    obj.label = f"{subset.zone.tolist()[0]} to {subset.zone.tolist()[1]}"
    
    overlay.object = obj
    
    text.background = "yellow"
    text.object = f"## Prediction: ${prediction}.00"
    
submit.on_click(b)

predict = pn.Row(
    pn.Column(
        "## Predict my Fare",
        pn.Row(pickup),
        pn.Row(dropoff), 
        date,
        hour,
        pn.Row(submit, reset),
        helper,
        text,
    ),
    overlay
)

In [None]:
timeseries = table.hvplot.line(
    y=["actual", "predicted"], ylim=(0, 50),
    xlim=(pd.Timestamp("2018-01-05"), pd.Timestamp("2018-01-12")), legend="bottom_left"
).opts(toolbar="above")

In [None]:
metrics = pd.DataFrame({"tool": ["scikit", "xgboost"], "model": ["elastic", "elastic"], "rmse": [0.45, 0.65]})
metrics_text = """
### Summary of Models

There are a variety of models displayed above. Here we show their errors.
"""

In [None]:
ml = pn.GridSpec(name="ML", sizing_mode='stretch_both', min_width=800, min_height=600, max_height=800)

ml[0, :6] = ml_intro
ml[0, 6] = logo
ml[1:3, :6] = pn.pane.HoloViews(timeseries)
ml[3:8, 4:6] =pn.Column(metrics_text, pn.pane.DataFrame(metrics, index=False))
ml[3:8, :4] = predict

## Final Dashboard

In [None]:
pn.Tabs(volume, tips, ml, tabs_location="left").servable(title="Saturn Taxi")