In [20]:
import pandas as pd
import altair as alt

In [21]:
df =  pd.read_csv("MBTA_Line_and_Stop.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7920 entries, 0 to 7919
Data columns (total 18 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   FID                  7920 non-null   int64 
 1   mode                 7920 non-null   int64 
 2   season               7920 non-null   object
 3   route_id             7920 non-null   object
 4   route_name           7920 non-null   object
 5   direction_id         7920 non-null   int64 
 6   day_type_id          7920 non-null   object
 7   day_type_name        7920 non-null   object
 8   time_period_id       7920 non-null   object
 9   time_period_name     7920 non-null   object
 10  stop_name            7920 non-null   object
 11  stop_id              7920 non-null   object
 12  total_ons            7920 non-null   int64 
 13  total_offs           7920 non-null   int64 
 14  number_service_days  7920 non-null   int64 
 15  average_ons          7920 non-null   int64 
 16  averag

In [22]:
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [29]:
domain = ['Green', 'Blue', 'Red', 'Orange']
range_ = ['green', 'blue', 'red', 'orange']

df_no_off_peak = df[df["time_period_name"] != "OFF_PEAK"]
df_no_off_peak["Stop name"] = df_no_off_peak["stop_name"].apply(lambda x: x.title())
df_no_off_peak["Time period"] = df_no_off_peak["time_period_name"].apply(lambda x: x.replace("_", " ").title())

axis_labels = ("datum.label == 'time_period_01' ? 'Very Early Morning': \
               datum.label == 'time_period_02' ? 'Early AM': \
               datum.label == 'time_period_03' ? 'AM Peak': \
               datum.label == 'time_period_04' ? 'Mid-day Base': \
               datum.label == 'time_period_05' ? 'Mid-day Peak': \
               datum.label == 'time_period_06' ? 'PM Peak': \
               datum.label == 'time_period_07' ? 'Evening': \
               datum.label == 'time_period_08' ? 'Late Evening': \
               datum.label == 'time_period_09' ? 'Night': 'Frequent'")

legend = alt.selection_point(fields=['route_id'], bind='legend')

line = alt.Chart(df_no_off_peak).mark_line().encode(
    x = alt.X("time_period_id",axis=alt.Axis(labelExpr=axis_labels, title="Time Period", labelAngle=-45)),
    y = alt.Y("average(average_flow)", axis=alt.Axis(title="Average of avg. flow")),
    color = alt.Color('route_id', scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(title="Route")),
    opacity = alt.condition(legend, alt.value(1), alt.value(0))
).properties(
    height = 500,
    width = 350,
    title={"text": "Average of Avg. Flows Over Time Periods"}
).add_params(
    legend
)

scatter = alt.Chart(df_no_off_peak).mark_point().encode(
    x = alt.X("average_ons", axis=alt.Axis(title="Average Ons")),
    y = alt.Y("average_offs", axis=alt.Axis(title="Average Offs")),
    color= alt.Color("route_id", scale=alt.Scale(domain=domain, range=range_)),
    tooltip = ["Stop name", "Time period"],
    size= alt.value(20)
).properties(
    width = 350,
    height = 500,
    title={"text": "Average Ons vs Average Offs",
          "subtitle": "Black outlined dots are husky stations"}
)

husky = alt.Chart(df_no_off_peak).mark_point().encode(
    x = "average_ons",
    y = "average_offs",
    color = alt.value("black"),
    opacity = alt.condition(alt.FieldOneOfPredicate(field = "stop_name", oneOf = ["Northeastern University", "Ruggles"]), 
                            alt.value(1), alt.value(0)),
    size = alt.value(50)
)

points = (scatter+husky).transform_filter(legend)

line | points

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_off_peak["Stop name"] = df_no_off_peak["stop_name"].apply(lambda x: x.title())
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_no_off_peak["Time period"] = df_no_off_peak["time_period_name"].apply(lambda x: x.replace("_", " ").title())
