In [None]:
# theme: flatly, lumen, sandstone, yeti
import polars as pl
from great_tables import GT, _data_color, md, nanoplot_options
from IPython.display import Markdown

In [None]:
import pandas as pd

df = pd.read_parquet(
    f"../data/arrivals_per_hour.parquet",
    filters = [[("caltrans_district", "==", "04 - Oakland")]]
).drop_duplicates()

In [None]:
12020 / 103495

In [None]:
df[(df.name.str.contains("BART")) & (df.day_type=="weekday")]

In [None]:
df = pl.read_parquet(
    f"../data/arrivals_polars.parquet"
).sort("caltrans_district", "n_trips", descending=[False, False])

In [None]:
all_districts = sorted(df.get_column("caltrans_district").unique())

In [None]:
def get_hex(color_name: str) -> str:
    """
    Since some of the color names don't pull the hex code, 
    we'll grab it here.
    """
    return _data_color.constants.COLOR_NAME_TO_HEX[color_name]

nano_options = nanoplot_options(
    data_point_radius=0,
    data_point_stroke_color="none",
    data_point_stroke_width=0,
    data_line_type="curved",
    data_line_stroke_color=get_hex("steelblue4"),
    data_line_stroke_width=8,
    data_area_fill_color=get_hex("steelblue4"),
    #vertical_guide_stroke_color=None,
    show_y_axis_guide=True,
    #show_vertical_guides=False,
    interactive_data_values = True,
    #reference_line_color=get_hex("salmon1"),
    show_reference_line=False
)

In [None]:
# https://quarto.org/docs/output-formats/html-themes.html
# https://github.com/posit-dev/great-tables/blob/main/great_tables/_data_color/constants.py
def make_plot(df):
    """
    Make table for each district.
    """
    # equivalent of df.iloc
    district_name = df.item(0, "caltrans_district")
    
    table = (GT(df)
    .fmt_integer(
          columns = ["n_trips"]
     ).data_color(
        columns=["n_trips"],
        palette=["white", get_hex("mediumseagreen")],
        na_color="lightgray"
     ).fmt_nanoplot(
         columns = "weekday_arrivals", 
         plot_type="line",
         reference_line="mean",
         options = nano_options
     ).fmt_nanoplot(
         columns = "weekend_arrivals", 
         plot_type="line",
         reference_line="mean",
         options = nano_options
     ).cols_label(
         name = "Operator",
         n_trips = "Weekly Trips",
         weekday_arrivals = "Weekday Arrivals",
         weekend_arrivals = "Weekend Arrivals",
     ).sub_missing(
         columns = ["weekday_arrivals", "weekend_arrivals"],
         missing_text = ""
     ).tab_header(
         title = f"District {district_name}: Stop Arrivals by Hour",
         subtitle = "weekday vs weekend totals"
     )#.tab_source_note(
      #   source_note = "GTFS scheduled stop_times.txt"
     #).tab_source_note(
      #  source_note = md(
      #      "[GTFS specification](https://gtfs.org/schedule/reference/#stop_timestxt)"
      #  )
     #)
    .tab_options(
        container_width = "100%",
        table_background_color="white",
        table_body_hlines_style="none",
        table_body_vlines_style="none",
        heading_background_color="white",
        column_labels_background_color="white",
        row_group_background_color="white",
        stub_background_color="white",
        source_notes_background_color="white"
     )
    .cols_hide("caltrans_district")
    )
    return table

How does transit service breakdown by the hour?

While transit operators typically operate at higher frequencies during peak 
commuting hours, there might be differences across operators. 

We can use GTFS scheduled stop times to count the number of arrivals 
for each hour of the day and look at how weekday and weekend service differ by operators. Within each Caltrans District, operators are sorted according to the number of weekly trip volume, from least to most.

Here are a couple of interesting service patterns:

## Commuter Service
* **Bay Area 511 Mission Bay and Bay Area 511 Commute.org (District 4)** both serve commuter riders primarily, with pronounced AM and PM peak service, no scheduled midday arrivals during weekdays, and no weekend service.

## Primarily Midday Service
* **Cerritos on Wheels (District 7)** operates strictly during business hours (9 AM - 5 PM).

## Service for Travelers
* **LAX FlyAway (District 7)** operates round-the-clock, dipping only between 2 AM - 3 AM.
* **Anaheim Resort (District 12)** continuously provides shuttle service to Disneyland visitors, except from 2 AM - 4 AM.
    
### References
* [General Transit Feed Specification](https://gtfs.org/)<br>
* [GTFS stop_times table](https://gtfs.org/schedule/reference/#stop_timestxt)

In [None]:
for d in all_districts:
    table = make_plot(df.filter(pl.col("caltrans_district") == d))
    display(table)