In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt

In [52]:
# Speed Restrictions Dataframe created in speed_restrictions_preprocessing.ipynb
speed_restr = pd.read_csv('speed_restrictions.csv')

# Gated Station entries per day with AM and PM rush hours created in data_processing.ipynb
gse2018 = pd.read_csv('2018_entries_agg.csv')
gse2020 = pd.read_csv('2020_entries_agg.csv')
gse2023 = pd.read_csv('2023_entries_agg.csv')
gse2020

Unnamed: 0.1,Unnamed: 0,date,stop_id,am_rush,pm_rush,full_day
0,0,2020-01-01,place-alfcl,444,722,2449
1,1,2020-01-01,place-andrw,354,576,1668
2,2,2020-01-01,place-aport,920,1200,3883
3,3,2020-01-01,place-aqucl,104,1007,2015
4,4,2020-01-01,place-armnl,99,807,1592
...,...,...,...,...,...,...
22529,22529,2020-12-31,place-welln,259,239,816
22530,22530,2020-12-31,place-wimnl,90,64,260
22531,22531,2020-12-31,place-wlsta,300,59,576
22532,22532,2020-12-31,place-wondl,378,166,935


In [53]:
# Convert date columns to Pandas DateTime Objects
speed_restr['start_date'] = pd.to_datetime(speed_restr.start_date)
speed_restr['end_date'] = pd.to_datetime(speed_restr.end_date)

gse2018['date'] = pd.to_datetime(gse2018.date)

gse2020['date'] = pd.to_datetime(gse2020.date)

gse2023['date'] = pd.to_datetime(gse2023.date)

In [37]:
# Trim 2020 GSE to create COVID GSE
gse_COVID = gse2020[(gse2020.date >= pd.to_datetime('2020-03-15')) & (gse2020.date <= pd.to_datetime('2020-07-01'))].reset_index()
gse_COVID.drop(['index', 'Unnamed: 0'], axis=1, inplace=True)
gse_COVID

Unnamed: 0,date,stop_id,am_rush,pm_rush,full_day
0,2020-03-15,place-alfcl,204,291,961
1,2020-03-15,place-andrw,325,470,1359
2,2020-03-15,place-aport,815,852,3075
3,2020-03-15,place-aqucl,82,483,1109
4,2020-03-15,place-armnl,78,380,818
...,...,...,...,...,...
6751,2020-07-01,place-welln,523,340,1330
6752,2020-07-01,place-wimnl,269,176,707
6753,2020-07-01,place-wlsta,384,130,840
6754,2020-07-01,place-wondl,905,409,2079


In [54]:
# Create Day of Week columns for each DateFrame
speed_restr['start_day_of_week'] = speed_restr.start_date.dt.weekday
speed_restr['end_day_of_week'] = speed_restr.end_date.dt.weekday

gse2018['day_of_week'] = gse2018.date.dt.weekday

gse_COVID['day_of_week'] = gse_COVID.date.dt.weekday

gse2023['day_of_week'] = gse2023.date.dt.weekday

gse_COVID

Unnamed: 0,date,stop_id,am_rush,pm_rush,full_day,day_of_week
0,2020-03-15,place-alfcl,204,291,961,6
1,2020-03-15,place-andrw,325,470,1359,6
2,2020-03-15,place-aport,815,852,3075,6
3,2020-03-15,place-aqucl,82,483,1109,6
4,2020-03-15,place-armnl,78,380,818,6
...,...,...,...,...,...,...
6751,2020-07-01,place-welln,523,340,1330,2
6752,2020-07-01,place-wimnl,269,176,707,2
6753,2020-07-01,place-wlsta,384,130,840,2
6754,2020-07-01,place-wondl,905,409,2079,2


In [49]:
gse_COVID_avg_per_wd = gse_COVID.groupby(['day_of_week', 'stop_id']).mean(numeric_only=True)
gse_COVID_avg = gse_COVID_avg_per_wd.add_suffix('_avg').reset_index()
gse_COVID_avg

Unnamed: 0,day_of_week,stop_id,am_rush_avg,pm_rush_avg,full_day_avg
0,0,place-alfcl,307.125000,248.125000,862.437500
1,0,place-andrw,358.437500,364.250000,1148.750000
2,0,place-aport,467.866667,385.533333,1427.133333
3,0,place-aqucl,61.785714,256.285714,600.142857
4,0,place-armnl,71.250000,197.562500,441.875000
...,...,...,...,...,...
443,6,place-welln,104.937500,141.875000,448.687500
444,6,place-wimnl,70.937500,56.125000,226.562500
445,6,place-wlsta,82.625000,63.125000,249.875000
446,6,place-wondl,224.285714,186.071429,748.500000


In [55]:
# Function to return average ridership per day of week between two dates for 2018 GSE (inclusive)
def avg_per_day_between(start, end, gse_df):
    timeframe = gse_df[(gse_df.date >= start) & (gse_df.date <= end)].reset_index()
    timeframe.drop(['Unnamed: 0', 'index'], axis=1, inplace=True)
    
    return timeframe.groupby(['day_of_week', 'stop_id']).mean(numeric_only=True).add_suffix('_avg').reset_index()

avg_per_day_between('2023-05-14', '2023-09-21', gse2023)

Unnamed: 0,day_of_week,stop_id,am_rush_avg,pm_rush_avg,full_day_avg
0,0,place-alfcl,1908.263158,974.736842,3736.368421
1,0,place-andrw,1067.052632,832.105263,2821.210526
2,0,place-aport,2278.473684,2217.526316,7544.157895
3,0,place-aqucl,270.000000,1907.894737,3814.578947
4,0,place-armnl,374.631579,1487.736842,2920.473684
...,...,...,...,...,...
443,6,place-welln,367.631579,445.526316,1494.000000
444,6,place-wimnl,326.368421,305.263158,1037.052632
445,6,place-wlsta,231.411765,115.588235,579.470588
446,6,place-wondl,873.736842,809.157895,3028.105263


In [56]:
speed_restr

Unnamed: 0.1,Unnamed: 0,ID,Loc_GTFS_Stop_ID,Restriction_Status,Restriction_Reason,Track_Direction,Line,Branch,Track_Name,Location_Type,...,Restriction_Distance_Feet,Line_Restricted_Track_Pct,Systemwide_Restricted_Track_Pct,SR_Restriction_Distance_Span,Restriction_Path,start_date,end_date,Restriction_Length_Days,start_day_of_week,end_day_of_week
0,0,20,place-aport,Active Restriction,,WB,Blue Line,Blue Line,BL WB,Station,...,101.0,0.001533,0.000140,Multi-Segment,End,2023-10-17,2023-10-19,2 days,1,3
1,1,20,place-wimnl | place-aport,Active Restriction,,WB,Blue Line,Blue Line,BL WB,Between Stations,...,699.0,0.010608,0.000970,Multi-Segment,Start,2023-10-17,2023-10-19,2 days,1,3
2,2,21,place-mvbcl | place-aport,Active Restriction,Track,EB,Blue Line,Blue Line,BL EB,Between Stations,...,400.0,0.006070,0.000555,Single Segment,Start|End,2023-12-14,2023-12-31,17 days,3,6
3,3,43,place-astao | place-welln,Active Restriction,Track,NB,Orange Line,Orange Line,OL NB,Between Stations,...,1000.0,0.008403,0.001388,Single Segment,Start|End,2023-10-03,2023-10-12,9 days,1,3
4,4,44,place-rugg,Active Restriction,Track,NB,Orange Line,Orange Line,OL NB,Station,...,400.0,0.003361,0.000555,Multi-Segment,Start,2023-10-17,2023-12-21,65 days,1,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
724,724,577753,place-mdftf | place-balsq,Active Restriction,Track,WB,Green Line,Green Line Trunk,GL Kenmore-College Ave WB,Between Stations,...,400.0,0.001403,0.000555,Single Segment,Start|End,2023-09-20,2023-10-10,20 days,2,1
725,725,577754,place-balsq | place-mgngl,Active Restriction,Track,WB,Green Line,Green Line Trunk,GL Kenmore-College Ave WB,Between Stations,...,500.0,0.001754,0.000694,Single Segment,Start|End,2023-09-20,2023-10-10,20 days,2,1
726,726,577755,place-gilmn | place-esomr,Active Restriction,Track,WB,Green Line,Green Line Trunk,GL Kenmore-College Ave WB,Between Stations,...,400.0,0.001403,0.000555,Single Segment,Start|End,2023-09-20,2023-10-10,20 days,2,1
727,727,577756,place-gilmn | place-esomr,Active Restriction,Track,WB,Green Line,Green Line Trunk,GL Kenmore-College Ave WB,Between Stations,...,400.0,0.001403,0.000555,Single Segment,Start|End,2023-09-20,2023-10-10,20 days,2,1


In [None]:
# Function to create visualization for a speed restriction.
# Plots the average GSE for 2018 and 2023 per weekday during the same timeframe of the speed restriction,
# along with the COVID average per weekday

def speed_restr_vis(restriction_id):
    restr_start = speed_restr.start_date[speed_restr.ID == int(restriction_id)].iloc[0]
    restr_end = speed_restr.end_date[speed_restr.ID == int(restriction_id)].iloc[0]
    
    # TODO: filter by stop_id(s, there may be multiple if between stations); what happens when its only two days out of 7?
    peak_data = avg_per_day_between(restr_start, restr_end, gse2018)
    speed_restr_data = avg_per_day_between(restr_start, restr_end, gse2023)

    covid_data = gse_COVID_avg[gse_COVID_avg.day_of_week >= ]

    # Plot the three datasets
    plt.clf()
    plt.plot(peak_data)
    plt.plot(speed_restr_data)
    plt.plot(covid_data) 