In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

In [23]:
df = pd.read_csv('export.csv')

In [24]:
df

Unnamed: 0,date,stop_id,am_rush,pm_rush,full_day
0,2014-01-01,place-alfcl,418,850,2672
1,2014-01-01,place-andrw,496,643,2049
2,2014-01-01,place-aport,1082,1298,4291
3,2014-01-01,place-aqucl,155,1299,2435
4,2014-01-01,place-armnl,242,1105,2402
...,...,...,...,...,...
226018,2023-12-31,place-welln,299,713,1539
226019,2023-12-31,place-wimnl,284,260,813
226020,2023-12-31,place-wlsta,284,375,989
226021,2023-12-31,place-wondl,627,991,2589


In [25]:
key = pd.read_csv('key.csv')

In [26]:
key

Unnamed: 0,stop_id,route_or_line,station_name
0,place-welln,Orange Line,Wellington
1,place-tumnl,Orange Line,Tufts Medical Center
2,place-sdmnl,Blue Line,Suffolk Downs
3,place-asmnl,Red Line,Ashmont
4,place-dwnxg,Orange Line,Downtown Crossing
...,...,...,...
62,place-qamnl,Red Line,Quincy Adams
63,place-rbmnl,Blue Line,Revere Beach
64,place-symcl,Green Line,Symphony
65,place-gover,Green Line,Government Center


In [27]:
# Merge the DataFrames on 'station_id'
merged_df = pd.merge(df, key, on='stop_id', how='left')

# Duplicate rows based on multiple route_or_line values
duplicated_rows = merged_df[merged_df['route_or_line'].notnull() & merged_df.duplicated(subset=['stop_id'])]

# Concatenate the original and duplicated rows
final_df = pd.concat([merged_df, duplicated_rows], ignore_index=True).dropna()

In [29]:
final_df.head()

Unnamed: 0,date,stop_id,am_rush,pm_rush,full_day,route_or_line,station_name
0,2014-01-01,place-alfcl,418,850,2672,Red Line,Alewife
1,2014-01-01,place-andrw,496,643,2049,Red Line,Andrew
2,2014-01-01,place-aport,1082,1298,4291,Blue Line,Airport
3,2014-01-01,place-aqucl,155,1299,2435,Blue Line,Aquarium
4,2014-01-01,place-armnl,242,1105,2402,Green Line,Arlington


In [33]:
final_df['year'] = final_df['date'].str[:4]

In [39]:
final_df = final_df.groupby(['year', 'route_or_line', 'station_name', 'stop_id'])[['am_rush', 'pm_rush', 'full_day']].mean().reset_index()

In [63]:
df_melted = final_df.melt(id_vars=['year', 'route_or_line', 'station_name'], value_vars=['am_rush', 'pm_rush', 'full_day'], var_name='period', value_name='value')

In [64]:
df_melted

Unnamed: 0,year,route_or_line,station_name,period,value
0,2014,Blue Line,Airport,am_rush,2248.537723
1,2014,Blue Line,Aquarium,am_rush,279.277092
2,2014,Blue Line,Beachmont,am_rush,1285.786207
3,2014,Blue Line,Bowdoin,am_rush,150.460905
4,2014,Blue Line,Government Center,am_rush,730.075000
...,...,...,...,...,...
1993,2023,Red Line,Quincy Center,full_day,2327.704420
1994,2023,Red Line,Savin Hill,full_day,733.050562
1995,2023,Red Line,Shawmut,full_day,911.865169
1996,2023,Red Line,South Station,full_day,8974.181564


In [83]:
options = ['am_rush', 'pm_rush', 'full_day']
labels = ['am_rush', 'pm_rush', 'full_day']

input_radio = alt.binding_radio(
    options=options,
    labels=labels,
    name='Time: '
)

input_dropdown = alt.binding_select(
    options=list(df_melted['route_or_line'].unique()),
    labels=list(df_melted['route_or_line'].unique()),
    name="Line: ",
)

selection = alt.selection_point(
    value='full_day',
    fields=['period'],
    bind=input_radio,
)

selection2 = alt.selection_point(
    value='Blue Line',
    fields=['route_or_line'],
    bind=input_dropdown,
)

### CHATGPT CODE THAT DOESN'T WORK

# Create first dropdown for selecting train line
line_dropdown = alt.binding_select(options=final_df['route_or_line'].unique().tolist(), name='Select Line: ')
line_select = alt.selection_single(fields=['line'], bind=line_dropdown, value='Red Line')

# Create second dropdown for selecting stations based on the selected line
station_dropdown = alt.binding_select(options=[], name='Select Station: ')
station_select = alt.selection_single(fields=['station_name'], bind=station_dropdown, value=None)

# Update options of the station dropdown based on the selected line
def update_station_dropdown(line):
    stations = final_df.loc[final_df['route_or_line'] == line, 'station_name'].unique().tolist()
    station_dropdown.options = stations


alt.Chart(df_melted).mark_line().encode(
    x='year:Q',
    y='value:Q',
    color='period:N'
).add_params(
    line_select, station_select
).transform_filter(
    line_select
).transform_filter(
    station_select
)

