In [None]:
import numpy as np
import dask.dataframe as dd

import hvplot.dask, hvplot.pandas
import panel as pn
import pandas as pd

import warnings
warnings.simplefilter("ignore")

In [None]:
from dask.distributed import Client, wait

client = Client("tcp://127.0.0.1:39945")
client

In [None]:
ddf = dd.read_parquet(
    "s3://saturn-titan/data/nyc-taxi/taxi_2017_2019/", 
    assume_missing=True, 
    engine="pyarrow"
)
ddf

In [None]:
ddf["pickup_hour"] = ddf.pickup_datetime.dt.hour
ddf["dropoff_hour"] = ddf.dropoff_datetime.dt.hour

In [None]:
for value in ["pickup", "dropoff"]:
    gb = ddf[[f"{value}_taxizone_id", f"{value}_hour", "fare_amount"]].groupby([f"{value}_taxizone_id", f"{value}_hour"])
    volume = gb["fare_amount"].count().compute().unstack()
    volume["total"] = volume.sum(axis=1)
    volume.to_csv(f"data/{value}_volume.csv")

    fare = gb["fare_amount"].sum().compute().unstack()
    fare["total"] = fare.sum(axis=1)
    fare.to_csv(f"data/{value}_fare.csv")

In [None]:
import geopandas as gpd
zones = gpd.read_file('./data/taxi_zones.shp').to_crs('epsg:4326')

joined = zones.join(pickup_zone["total"], on="LocationID")

# put the dropoff/pickup on a widget with same clim

value = "pickup"

pickup_map = joined.hvplot(x="longitude", y="latitude", c="total", logz=True,
                           geo=True, 
                           alpha=0.5, cmap="reds", hover_cols=["zone", "borough"], 
                           title=f"Ride volume by {value} location", height=600, width=800)
pickup_map

In [None]:
joined = zones.join(dropoff_zone["total"], on="LocationID")

# put the dropoff/pickup on a widget with same clim

value = "dropoff"

dropoff_map = joined.hvplot(x="longitude", y="latitude", c="total", logz=True,
                           geo=True, 
                           alpha=0.5, cmap="blues", hover_cols=["zone", "borough"], 
                           title=f"Ride volume by {value} location", height=600, width=800)
dropoff_map

## Payment Type Pie Chart

In [None]:
payment_type = ddf.payment_type.value_counts().compute()

In [None]:
new_index = payment_type.index.map({
    "1": "Credit card", 
    "2": "Cash", 
    "3": "No charge", 
    "4": "Dispute", 
    "5": "Unknown", 
    "6": "Voided trip"
}).astype("category")

payment_type.index = new_index

In [None]:
payment_type.name = "value"
payment_type.index.name = "payment_type"

In [None]:
from math import pi

import pandas as pd

from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.transform import cumsum

output_notebook()

data = payment_type.reset_index()
data['angle'] = data['value']/data['value'].sum() * 2*pi
data["label"] = data.value.apply(lambda x: f"{x/1e6: .0f} M")
data["frac"] = data.angle.apply(lambda x: f"{x / (2*pi): .0%}")

data = data[:2]
data['color'] = ["thistle", "lightblue"]


p = figure(plot_height=350, plot_width=350, toolbar_location=None,
           x_range=(-.5, .5), y_range=(0, 2), title="Payment Type")

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', source=data)

p.text(x=[-0.2, 0.07], y=[1.4, 0.7], text=data["payment_type"].astype(str) + ":\n  " + data["label"] + "\n  " + data["frac"],
       text_align="left", text_baseline="top", text_font_size="15px")


p.title.text_font_size = "20px"
p.axis.axis_label=None
p.axis.visible=False
p.grid.grid_line_color = None
p.outline_line_width = 0

show(p)

In [None]:
from bokeh.io import export_svgs

p.output_backend = "svg"
export_svgs(p, filename="pie_chart.svg")