In [None]:
import numpy as np
import dask.dataframe as dd
from datetime import datetime as dt

import hvplot.dask, hvplot.pandas
from holoviews.streams import Selection1D
import panel as pn
import pandas as pd
import geoviews as gv

import warnings
warnings.simplefilter("ignore")

In [None]:
from dask.distributed import Client, wait

client = Client()
client

In [None]:
ddf = dd.read_parquet(
    "s3://saturn-titan/data/nyc-taxi/taxi_2017_2019/", 
    assume_missing=True, 
    engine="pyarrow"
)
ddf

In [None]:
def mock_fare_prediction(pickup_taxizone_id, dropoff_taxizone_id, datetime):
    return np.random.randint(10, 100)

In [None]:
import geopandas as gpd
zones = gpd.read_file('./data/taxi_zones.shp').to_crs('epsg:4326')

In [None]:
options = {v: k for k, v in zones.zone.to_dict().items()}
pickup = pn.widgets.Select(name="Pickup", options=options)
dropoff = pn.widgets.Select(name="Dropoff", options=options)
toggle = pn.widgets.RadioButtonGroup(options=["pickup", "dropoff"], value="pickup")

plot = zones.hvplot(geo=True, c='zone', legend=False, alpha=.2, selection_alpha=1).opts(tools=['tap', 'hover'])

def on_map_select(index):
    if index and toggle.value == "pickup":
        pickup.value = index[0]
    elif index and toggle.value == "dropoff":
        dropoff.value = index[0]
    return
       
stream = Selection1D(source=plot)
stream.param.watch_values(on_map_select, ['index'])

overlay = pn.pane.HoloViews(plot * gv.tile_sources.CartoLight())
date = pn.widgets.DatePicker(name="Date", value=dt.now().date())
datetime = pn.widgets.DatetimeInput(name="Time", value=dt.now())
date.link(datetime, value='value')

submit = pn.widgets.Button(name="Predict my fare", button_type='primary')
text = pn.pane.Markdown(width=300)
output = pn.pane.HoloViews()

def b(event):
    prediction = mock_fare_prediction(pickup.value, dropoff.value, datetime.value)
    
    subset = zones.iloc[[pickup.value, dropoff.value]]
    
    trip = gv.Path((subset.geometry.centroid.x, subset.geometry.centroid.y)).opts(color="black", line_width=2)
    obj = subset.hvplot(geo=True, width=400, height=400, tiles="CartoLight") * trip
    obj.label = f"{subset.zone.tolist()[0]} to {subset.zone.tolist()[1]}"
    output.object = obj
    
    text.object = f"""
    ## Prediction: ${prediction}.00
    
    Date: {datetime.value}
    """
    
submit.on_click(b)

pn.Column(
    pn.Row("# Predict my Fare"),
    pn.Row(
        pn.Column(
            pn.Row(pickup),
            pn.Row(dropoff), 
            pn.Row("Choose from Map", margin=5),
            pn.Row(toggle),
            date,
            datetime,
            submit,
        ),
        overlay,
    ),
    pn.Row(
        text,
        output
    )
)

In [None]:
fare = ddf[["pickup_datetime", "fare_amount"]]
fare = fare.set_index("pickup_datetime").resample('1H').mean().compute()

# make sure to only include real values
start = ddf.head(1).pickup_datetime.values[0]
end = ddf.tail(1).pickup_datetime.values[0]
trimmed = fare[start:end]

trimmed.to_csv("./data/fare_timeseries.csv")

In [None]:
fare = ddf[["pickup_datetime", "fare_amount"]]
fare = fare.set_index("pickup_datetime").resample('1H').std().compute()

# make sure to only include real values
start = ddf.head(1).pickup_datetime.values[0]
end = ddf.tail(1).pickup_datetime.values[0]
trimmed = fare[start:end]

trimmed.to_csv("./data/fare_std_timeseries.csv")