In [None]:
import numpy as np
import dask.dataframe as dd

import hvplot.dask, hvplot.pandas
import holoviews as hv
from holoviews.streams import Selection1D
import datetime as dt
import panel as pn
import pandas as pd

import warnings
warnings.simplefilter("ignore")

## Read in data

We'll start by reading in the geo data downloaded from  LINK

In [None]:
import geopandas as gpd
zones = gpd.read_file('./data/taxi_zones.shp').to_crs('epsg:4326')

zones.hvplot(geo=True)

In [None]:
import geoviews as gv

basemap = gv.tile_sources.CartoLight()

### Volume data

Next we'll read in the aggregated data that we generated in the exploratory notebooks

In [None]:
pickup_volume = pd.read_csv("./data/pickup_volume.csv", index_col="pickup_taxizone_id")
dropoff_volume = pd.read_csv("./data/dropoff_volume.csv", index_col="dropoff_taxizone_id")
dropoff_volume

### Fare data

In [None]:
pickup_fare = pd.read_csv("./data/pickup_fare.csv", index_col="pickup_taxizone_id")
dropoff_fare = pd.read_csv("./data/dropoff_fare.csv", index_col="dropoff_taxizone_id")
actual_mean_fare = pd.read_csv("./data/fare_timeseries.csv", index_col="pickup_datetime", parse_dates=True)
actual_std_fare = pd.read_csv("./data/fare_std_timeseries.csv", index_col="pickup_datetime", parse_dates=True)

actual_std_fare.head()

### Tip data

In [None]:
pickup_tip = pd.read_csv("./data/pickup_tip.csv", index_col="pickup_taxizone_id")
dropoff_tip = pd.read_csv("./data/dropoff_tip.csv", index_col="dropoff_taxizone_id")

tip_grouped = pd.read_csv("./data/tip_percent.csv", index_col="pickup_weekday")
tip_timeseries = pd.read_csv("./data/tip_timeseries.csv", index_col="pickup_datetime", parse_dates=True)
tip_timeseries.head()

## Construct vizualizations

In this dashboard we'll have three tabs. We'll start with one about volume of rides and aggregate fare, then move on to one about tips and finish with a tab that digests the outputs of the Machine Learning algorithms that we've trained to predict fare.

### Volume tab

In [None]:
total_rides = pickup_volume.total.sum()
total_fare = pickup_fare.total.sum()

In [None]:
volume_intro = """
# Taxi Volume

Ridership by region and total fare for period of record. 
"""

In [None]:
logo = pn.pane.SVG("./data/saturn_logo.svg", style={"float": "right"})

In [None]:
color = "#10874a"
fares = pn.pane.Markdown(
    f"""
    ### Total Fares
    # ${(total_fare / 1e9) :.02f} Billion
    """,
    style={'background-color': '#F6F6F6', 'border': '2px solid black',
            'border-radius': '5px', 'padding': '10px', 'color': color},
)

In [None]:
color = "#7a41ba"
rides = pn.pane.Markdown(
    f"""
    ### Total Rides
    # {(total_rides / 1e6) :.02f} Million
    """,
    style={'background-color': '#F6F6F6', 'border': '2px solid black',
            'border-radius': '5px', 'padding': '10px', 'color': color},
)

In [None]:
color = "coral"
average = pn.pane.Markdown(
    f"""
    ### Average Fare
    # ${(total_fare / total_rides) :.02f}
    """,
    style={'background-color': '#F6F6F6', 'border': '2px solid black',
            'border-radius': '5px', 'padding': '10px', 'color': color},
)

In [None]:
toggle = pn.widgets.RadioButtonGroup(options=["Pickup", "Dropoff"], value="Pickup")

In [None]:
@pn.depends(value=toggle)
def volume_map(value):
    volume = pickup_volume if value == "Pickup" else dropoff_volume
    data = zones.join(volume["total"]/1e6, on="LocationID")

    return data.hvplot(
        x="longitude", y="latitude", c="total",
        geo=True, tiles="CartoLight",
        alpha=0.6, cmap="viridis", clim=(0, 12), 
        hover_cols=["zone", "borough"], 
        title=f"Ride volume by {value} location (in Millions)", 
        responsive=True, colorbar=False,
        xaxis=None, yaxis=None, selection_alpha=1).opts(toolbar="above")

In [None]:
value = "Pickup"
volume = pickup_volume if value == "Pickup" else dropoff_volume
data = zones.join(volume["total"]/1e6, on="LocationID")

pickup_map = data.hvplot(
    x="longitude", y="latitude", c="total",
    geo=True, 
    alpha=0.6, cmap="viridis", clim=(0, 12), 
    hover_cols=["zone", "borough"], 
    title=f"Ride volume by {value} location (in Millions)", 
    responsive=True, colorbar=True,
    xaxis=None, yaxis=None, selection_alpha=1).opts(tools=["tap"], toolbar="above")

In [None]:
@pn.depends(value=toggle)
def volume_table(value):
    volume = pickup_volume if value == "Pickup" else dropoff_volume
    top = volume.total.sort_values(ascending=False).head(5)
    subset = zones.join(top, on="LocationID", how="right")[['zone', 'total']]
    return hv.Table(subset).opts(title=f"Top {value} Zones", width=400)

In [None]:
t = pickup_volume.copy()
t["Zone"] = t.index.map(dict(zip(zones.LocationID.tolist(), zones.zone.tolist())))
t = t.set_index("Zone").drop("total", axis=1)
t.columns.name = "Hour"
t = t.melt(ignore_index=False, value_name="Rides")
t

In [None]:
dmap = t.hvplot.barh(x="Hour", y="Rides", groupby="Zone", responsive=True, xaxis=False).opts(toolbar=None)

In [None]:
summary = t.groupby("Hour").sum().hvplot.barh(x="Hour", y="Rides", responsive=True, xaxis=False, title="Total Rides", min_height=300).opts(toolbar=None)

In [None]:
bars = pn.pane.HoloViews(summary)

In [None]:
def on_pickup_tap(index):
    if index:
        zone = zones.loc[index, "zone"].item()
        bars.object = dmap[zone].opts(title=f"{zone} Rides")
    return
       
volume_stream = Selection1D(source=pickup_map)
volume_stream.param.watch_values(on_pickup_tap, ['index']);

In [None]:
volume = pn.GridSpec(name="Volume", sizing_mode='stretch_both', min_width=1000, min_height=600, max_height=800)
volume[0, :4] = volume_intro
volume[0, 5] = logo
volume[1, 0] = fares
volume[1, 1] = rides
volume[1, 2] = average
volume[1:4, 4:6] = pn.Column(toggle, volume_table)
volume[1:8, 3] = bars
volume[2:8, 0:3] = pickup_map * gv.tile_sources.CartoLight()
volume[4:8, 4:6] = "./pie_chart.svg"

## Tip tab

In [None]:
intro = """
# Analysis of Tips

Tips vary based on time of day, location and many other factors. 
"""

In [None]:
heatmap = tip_timeseries.hvplot.heatmap(
    x="index.dt.weekday", 
    y="index.dt.hour", 
    C="tip%",
    title="Average Tip %",
    xlabel="Day",
    ylabel="Hour",
    xticks=[(0, 'Mon'), (1, 'Tues'), (2, 'Wed'), (3, 'Thur'), (4, 'Fri'), (5, 'Sat'), (6, 'Sun')],
    responsive=True, min_height=500,
    colorbar=False, cmap="coolwarm", clim=(8, 12)
).aggregate(function=np.mean).opts(toolbar=None, xrotation=90)

In [None]:
date_range_slider = pn.widgets.DateRangeSlider(
    name='Show between',
    start=tip_timeseries.index[0], end=tip_timeseries.index[-1],
    value=(pd.Timestamp("2018-01"), pd.Timestamp("2019-02"))
)
discrete_slider = pn.widgets.DiscreteSlider(name='Rolling window', options=['1H', '2H', '4H', '6H', '12H', '1D', '2D', '7D', '14D', '1M'], value='1D')

def tip_plot(xlim, window):
    data = tip_timeseries.rolling(window).mean()
    return data.hvplot(y="tip%", xlim=xlim, ylim=(8, 12), responsive=True, min_height=200).opts(toolbar="above")

tip_timeseries_plot = pn.pane.HoloViews(tip_plot(date_range_slider.value, discrete_slider.value))
    
def trim(target, event):
    target.object = tip_plot(event.new, discrete_slider.value)

def roll(target, event):
    target.object = tip_plot(date_range_slider.value, event.new)

discrete_slider.link(tip_timeseries_plot, callbacks={"value": roll})
date_range_slider.link(tip_timeseries_plot, callbacks={"value": trim})

In [None]:
joined = zones.join(pickup_tip, on="LocationID")

tip_map = joined.hvplot(c="total", geo=True, alpha=0.6, cmap="coolwarm",
                        hover_cols=["zone", "borough"], title="Average Tip %", 
                        clim=(5, 15),responsive=True, colorbar=False,
                        xaxis=None, yaxis=None).opts(toolbar="above")

In [None]:
tip_table = joined[["zone", "total"]].set_index("zone").sort_values(by="total", ascending=False)
tip_table["tip %"] = tip_table.total.round(2)
tip_table = tip_table.drop("total", axis=1).drop_duplicates()
tip_pane = pn.pane.DataFrame(tip_table.head(20), header=False, index_names=False)

In [None]:
tips = pn.GridSpec(name="Tips", sizing_mode='stretch_both', min_width=1000, min_height=600, max_height=800)

tips[0, :5] = intro
tips[0, 5] = logo
tips[1:5, 0:2] = tip_map * gv.tile_sources.CartoLight()
tips[1:5, 2:4] = pn.Column("### Top 20 Tip Zones", tip_pane)
tips[1:5, 4] = heatmap
tips[5:8, 0] = pn.Column(date_range_slider, discrete_slider, "*Use widgets to control rolling window average on the timeseries plot or and to restrict to between certain dates*")
tips[5:8, 1:5] = tip_timeseries_plot

## ML Tab

In [None]:
intro = """
# Machine Learning

Exploring the historical accuracy of various models. Predict fare by consuming deployed models.
"""

In [None]:
def mock_fare_prediction(pickup_taxizone_id, dropoff_taxizone_id, datetime):
    return np.random.randint(10, 100)

predicted_fare = actual_mean_fare.fare_amount + np.random.randn(len(actual_mean_fare.fare_amount)) * 10

table = pd.DataFrame({
    "top": actual_mean_fare.fare_amount + actual_std_fare.fare_amount, 
    "bottom": actual_mean_fare.fare_amount - actual_std_fare.fare_amount, 
    "mean": actual_mean_fare.fare_amount, 
    "predicted": predicted_fare
})
table.index.name = "time"

In [None]:
options = {v: k for k, v in zones.zone.to_dict().items()}
pickup = pn.widgets.Select(name="Pickup", options=options)
dropoff = pn.widgets.Select(name="Dropoff", options=options)
toggle = pn.widgets.RadioButtonGroup(options=["pickup", "dropoff"], value="pickup")

plot = zones.hvplot(geo=True, c='zone', legend=False, width=400, height=400, xaxis=None, yaxis=None, alpha=.2, selection_alpha=1).opts(tools=['tap', 'hover'])

def on_map_select(index):
    if index and toggle.value == "pickup":
        pickup.value = index[0]
    elif index and toggle.value == "dropoff":
        dropoff.value = index[0]
    return
       
stream = Selection1D(source=plot)
stream.param.watch_values(on_map_select, ['index'])

overlay = pn.pane.HoloViews(plot * gv.tile_sources.CartoLight())
date = pn.widgets.DatePicker(name="Date", value=dt.datetime.now().date())
datetime = pn.widgets.DatetimeInput(name="Time", value=dt.datetime.now())
date.link(datetime, value='value')

submit = pn.widgets.Button(name="Predict my fare", button_type='primary')
text = pn.pane.Markdown(width=300)
output = pn.pane.HoloViews()

def b(event):
    prediction = mock_fare_prediction(pickup.value, dropoff.value, datetime.value)
    
    subset = zones.iloc[[pickup.value, dropoff.value]]
    
    trip = gv.Path((subset.geometry.centroid.x, subset.geometry.centroid.y)).opts(color="black", line_width=2)
    obj = subset.hvplot(geo=True, width=400, height=400, xaxis=None, yaxis=None) * trip * gv.tile_sources.CartoLight()
    obj.label = f"{subset.zone.tolist()[0]} to {subset.zone.tolist()[1]}"
    output.object = obj
    
    text.object = f"""
    ## Prediction: ${prediction}.00
    
    Date: {datetime.value}
    """
    tabs.active = 1
    
submit.on_click(b)

input_tab = pn.Row(
    pn.Column(
        pn.Row(pickup),
        pn.Row(dropoff), 
        pn.Row("Choose from Map", margin=5),
        pn.Row(toggle),
        date,
        datetime,
        submit,
    ),
    overlay,
    name="input",
)
output_tab = pn.Row(
    text,
    output,
    height=300,
    name="output"
)
tabs = pn.Tabs(input_tab, output_tab)

predict = pn.Column(
    pn.Row("## Predict my Fare"),
    tabs
)

In [None]:
timeseries = (
    table.hvplot.area(y="bottom", y2="top", alpha=0.2, width=800, ylabel="fare", hover=False) *
    table.hvplot.line(y=["mean", "predicted"], ylim=(0, 50),
                      xlim=(pd.Timestamp("2018-01-05"), pd.Timestamp("2018-01-12")), legend="bottom_left")
).opts(toolbar="above")

In [None]:
ml = pn.GridSpec(name="ML", sizing_mode='stretch_both', min_width=1000, min_height=600, max_height=800)

ml[0, :5] = intro
ml[0, 5] = logo
ml[1:3, :5] = pn.pane.HoloViews(timeseries)
ml[3:8, :5] = predict

## Final Dashboard

In [None]:
pn.Tabs(volume, tips, ml, active=1, tabs_location="left").servable(title="Saturn Taxi")