In [2]:
import pandas as pd

# Gated Station Entries Data Processing

In [2]:
filename = 'GSE_2018.csv'

In [3]:
df = pd.read_csv(filename)

In [4]:
# clean time and convert to datetime
df['time_period'] = df['time_period'].str[1:-1]
df['datetime'] = df['service_date'] + " " + df['time_period']
df['datetime'] = pd.to_datetime(df['datetime'])

In [5]:
# get entry total for each station for each date by hour
df = df.groupby(['datetime', 'stop_id'])['gated_entries'].sum().reset_index()
df['gated_entries'] = df['gated_entries'].astype(int)
df['date'] = df['datetime'].dt.date
df['hour'] = df['datetime'].dt.hour
df = df.groupby(['date', 'hour', 'stop_id'])['gated_entries'].sum().reset_index()

In [6]:
# define am rush hour as 6-10, define pm rush hour as 3-7
df_am = df.loc[(df['hour'] >= 6) & (df['hour'] <= 10), :]
df_am = df_am.groupby(['date', 'stop_id'])['gated_entries'].sum().reset_index()
df_am['am_rush'] = df_am['gated_entries']
df_am = df_am.loc[:, ['date', 'stop_id', 'am_rush']]

df_pm = df.loc[(df['hour'] >= 15) & (df['hour'] <= 19), :]
df_pm = df_pm.groupby(['date', 'stop_id'])['gated_entries'].sum().reset_index()
df_pm['pm_rush'] = df_pm['gated_entries']
df_pm = df_pm.loc[:, ['date', 'stop_id', 'pm_rush']]

In [7]:
# get entry total for each station by date
df_day = df.groupby(['date', 'stop_id'])['gated_entries'].sum().reset_index()
df_day['full_day'] = df_day['gated_entries']
df_day = df_day.loc[:, ['date', 'stop_id', 'full_day']]

In [8]:
# combine everything into one dataframe
df_full_day = df_am.merge(df_pm, left_on=['date', 'stop_id'], right_on=['date', 'stop_id'])
df_full_day = df_full_day.merge(df_day, left_on=['date', 'stop_id'], right_on=['date', 'stop_id'])

In [9]:
df_full_day

Unnamed: 0,date,stop_id,am_rush,pm_rush,full_day
0,2015-01-01,place-alfcl,477,847,2775
1,2015-01-01,place-andrw,426,631,1765
2,2015-01-01,place-aport,725,1121,3598
3,2015-01-01,place-aqucl,164,1270,2411
4,2015-01-01,place-armnl,188,1002,2117
...,...,...,...,...,...
22311,2015-12-31,place-welln,2281,1849,5648
22312,2015-12-31,place-wimnl,577,386,1434
22313,2015-12-31,place-wlsta,1758,755,3442
22314,2015-12-31,place-wondl,2344,1377,5111


# Speed Restrictions by Day Data Processing

### Columns to drop:
1. Location_Description
2. Direction_Sort
3. Restriction_Distance_Miles
4. Line_Total_Track_Miles
5. Systemwide_Total_Track_Miles

In [18]:
dataframes = []
for i in range(1,13):
    speed_restrictions_filename_template:str = f'data/2023-{i:0>2}_Speed_Restrictions_By_Day.csv'
    month_df = pd.read_csv(speed_restrictions_filename_template)

    # Drop specified columns
    month_df.drop(labels=['Location_Description', 'Direction_Sort', 'Restriction_Distance_Miles', 'Line_Total_Track_Miles', 'Systemwide_Total_Track_Miles'], axis=1, inplace=True)

    # add dataframe to list
    dataframes.append(month_df)

speed_restrictions_2023_df:pd.DataFrame = pd.concat(dataframes)
speed_restrictions_2023_df.drop('SRV_MAIN_UNIQUE_ID', axis=1, inplace=True)
speed_restrictions_2023_df
    

Unnamed: 0,Calendar_Date,ID,Track_Direction,Line,Branch,Track_Name,Loc_GTFS_Stop_ID,Location_Type,Restriction_Status,Date_Restriction_Reported,...,SR_Restriction_Distance_Span,Restriction_Path,Restriction_Days_Active_On_Calendar_Day,Restriction_Days_to_Clear,Daily_Restriction_Count_Start,Month_Restriction_Count_Start,Restriction_Count_New,Restriction_Count_Cleared,Month_Restriction_Count_End,Daily_Restriction_Count_End
0,2023-01-01,329396,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,place-armnl | place-boyls,Between Stations,Active Restriction,2021-09-22,...,Single Segment,Start|End,466,,1,1,0,0,0,1
1,2023-01-01,334870,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,place-north | place-spmnl,Between Stations,Active Restriction,2021-10-05,...,Single Segment,Start|End,453,,1,1,0,0,0,1
2,2023-01-01,443512,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,place-north | place-spmnl,Between Stations,Active Restriction,2022-07-12,...,Single Segment,Start|End,173,,1,1,0,0,0,1
3,2023-01-01,358277,EB,Green Line,Green Line Trunk,GL Kenmore-College Ave EB,place-spmnl | place-lech,Between Stations,Active Restriction,2021-12-17,...,Single Segment,Start|End,380,,1,1,0,0,0,1
4,2023-01-01,358285,WB,Green Line,Green Line Trunk,GL Kenmore-College Ave WB,place-lech | place-spmnl,Between Stations,Active Restriction,2021-12-17,...,Single Segment,Start|End,380,,1,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6739,2023-12-05,570802,NB,Orange Line,Orange Line,OL NB,place-forhl | place-grnst,Between Stations,Active Restriction,2023-08-22,...,Single Segment,Start|End,105,,1,0,0,0,0,1
6740,2023-12-04,570802,NB,Orange Line,Orange Line,OL NB,place-forhl | place-grnst,Between Stations,Active Restriction,2023-08-22,...,Single Segment,Start|End,104,,1,0,0,0,0,1
6741,2023-12-03,570802,NB,Orange Line,Orange Line,OL NB,place-forhl | place-grnst,Between Stations,Active Restriction,2023-08-22,...,Single Segment,Start|End,103,,1,0,0,0,0,1
6742,2023-12-02,570802,NB,Orange Line,Orange Line,OL NB,place-forhl | place-grnst,Between Stations,Active Restriction,2023-08-22,...,Single Segment,Start|End,102,,1,0,0,0,0,1
