In [1]:
import altair as alt
import pandas as pd
import numpy as np


In [2]:
mbta_data = pd.read_csv("MBTA_Commuter_Rail.csv")
mbta_data

Unnamed: 0,service_date,gtfs_route_id,gtfs_route_long_name,peak_offpeak_ind,otp_numerator,otp_denominator,cancelled_numerator,ObjectId,otp_ratio,service_time
0,2024-12-31,CR-Providence,Providence/Stoughton Line,OFF_PEAK,30,31,0,7230,0.97,05:00:00
1,2024-12-31,CR-Lowell,Lowell Line,PEAK,11,11,0,7386,1.00,05:00:00
2,2024-12-31,CR-Middleborough,Middleborough/Lakeville Line,PEAK,5,6,0,7387,0.83,05:00:00
3,2024-12-31,CR-Needham,Needham Line,PEAK,8,8,0,7506,1.00,05:00:00
4,2024-12-31,CR-Newburyport,Newburyport/Rockport Line,PEAK,6,6,0,7508,1.00,05:00:00
...,...,...,...,...,...,...,...,...,...,...
9834,2024-01-02,CR-Fitchburg,Fitchburg Line,PEAK,11,11,0,97527,1.00,05:00:00
9835,2024-01-02,CR-Worcester,Framingham/Worcester Line,PEAK,19,19,0,97528,1.00,05:00:00
9836,2024-01-02,CR-Franklin,Franklin Line,PEAK,7,9,0,97529,0.78,05:00:00
9837,2024-01-02,CR-Greenbush,Greenbush Line,PEAK,6,7,0,97530,0.86,05:00:00


In [38]:

# Convert to datetime
mbta_data['service_date'] = pd.to_datetime(mbta_data['service_date'])

mbta_data['week_start'] = mbta_data['service_date'] - pd.to_timedelta(mbta_data['service_date'].dt.weekday, unit='d')
mbta_data['week_number'] = mbta_data['week_start'].dt.isocalendar().week
mbta_data['month'] = mbta_data['week_start'].dt.strftime('%Y-%m')

# Group by month and week to calculate avg OTP ratio
reliability_grouped = (
    mbta_data.groupby(['month', 'week_start', 'week_number'])['otp_ratio']
    .mean()
    .reset_index()
)

reliability_grouped['week_in_month'] = (
    reliability_grouped.groupby('month')['week_number']
    .rank(method='dense')
    .astype(int)
)

# Dropdown
month_options = [None] + sorted(reliability_grouped["month"].unique())
month_labels = ['All', 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August',
               'September', 'October', 'November', 'December']
month_dropdown = alt.binding_select(options=month_options, labels=month_labels, name="Select Month: ")
month_selection = alt.param(name="month_selection", bind=month_dropdown)

month_list = sorted(reliability_grouped["month"].unique())
month_colors = [
    "#1f77b4", "#ff7f0e", "#d62728", "#2ca02c", "#bcbd22", "#17becf",
    "#9467bd", "#e377c2", "#8c564b", "#7f7f7f", "#aec7e8", "#ffbb78"
]

# Line chart
line_chart = (
    alt.Chart(reliability_grouped)
    .transform_calculate(
        is_selected="!month_selection || datum.month === month_selection"
    )
    .mark_line(point=True)
    .encode(
        x=alt.X("week_in_month:O", title="Week in Month"),
        y=alt.Y("otp_ratio:Q", title="Average OTP Ratio", scale=alt.Scale(domain=[0.8, 1.0])),
        color=alt.condition(
            "datum.month === month_selection || month_selection === null",
            alt.Color("month:N", title="Month", scale=alt.Scale(domain=month_list, range=month_colors)),
            alt.value("lightgray")
        ),
        opacity=alt.condition(
            "datum.month === month_selection || month_selection === null",
            alt.value(1),
            alt.value(0.3)
        ),
        tooltip=["month", "week_in_month", "otp_ratio"]
    )
    .add_params(month_selection)
    .properties(title="Average OTP Ratio per Week", width=700, height=400)
)
line_chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [40]:
line_chart.save("avg_otp_month.html")


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
