In [2]:
import pandas as pd
pd.set_option("display.max_columns", 100)
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
wego = pd.read_csv('../data/headway_data_clean.csv')

In [4]:
# convert times to datetime64 objects, check dtype to confirm
wego['SCHEDULED_TIME'] = pd.to_datetime(wego['SCHEDULED_TIME'], format = '%Y-%m-%d %H:%M:%S')
wego['ACTUAL_ARRIVAL_TIME'] = pd.to_datetime(wego['ACTUAL_ARRIVAL_TIME'], format = '%Y-%m-%d %H:%M:%S')
wego['ACTUAL_DEPARTURE_TIME'] = pd.to_datetime(wego['ACTUAL_DEPARTURE_TIME'], format = '%Y-%m-%d %H:%M:%S')
wego['DATE'] = pd.to_datetime(wego['DATE'], format = "%Y/%m/%d")


In [5]:
#Create a column that gives each day of the week its on variable
wego['DAYS_OF_THE_WEEK'] = wego['DATE'].dt.weekday


**Question1: How much impact does being late or too spaced out at the first stop have downstream**



*Does being late to start a trip or too spaced out between buses have an affect on the remainder of the trip?*

In [6]:
#dropping all nan values in the adherence column
wego = wego.dropna(subset = 'ADHERENCE')

In [None]:
route_3_to_downtown = wego.loc[
    (wego['ROUTE_ABBR'] == 3) & (wego['ROUTE_DIRECTION_NAME']== 'TO DOWNTOWN')
]

In [None]:
route_3_to_downtown = route_3_to_downtown.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column
route_3_to_downtown['TRIP_ID'] = route_3_to_downtown['TRIP_ID'].astype(str)
route_3_to_downtown['CALENDAR_ID'] = route_3_to_downtown['CALENDAR_ID'].astype(str)

In [None]:
#creating a column that concats the two columns
route_3_to_downtown['TRIP_IDS'] = route_3_to_downtown['TRIP_ID'] + route_3_to_downtown['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_3_to_downtown['row_num'] = 1
route_3_to_downtown['stops'] = route_3_to_downtown.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_3_to_downtown_pivot = route_3_to_downtown.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')

In [None]:
#.reset_index(drop = True)

In [None]:
route_3_to_downtown_pivot

In [None]:
route_3_new = route_3_to_downtown_pivot.iloc[:, 0:5].dropna()

In [None]:
route_3_new

In [None]:
route_3_new[1].mean().round(2)

In [None]:
route_3_new[5].mean().round(2)

In [None]:
route_3_new[1].corr(route_3_new[5])

In [None]:
route_3_to_downtown_pivot[1].corr(route_3_to_downtown_pivot[5])

In [None]:
route_3_new = route_3_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:"five"})

In [None]:
lm = smf.ols("five ~ one", data=route_3_new).fit()
lm.summary()

**On route 3 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately  units or **

In [None]:
#one coeff*(adherence value)-(intercept)
0.7530*(2)-0.5153

**where y=mx+b, with y being arrival time... In order to be within 1 minute of arrival adherence(y), the bus needs to leave within x minutes of adherence from stop 1.**

**-1=(coeff one)x + intercept**

In [None]:
#-1 = (0.7530)(X)+(-0.5153)

In [None]:
(-0.5153-1)/(0.7530)

In [None]:
route_number = 3
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:5].dropna()

In [None]:
route_df_new[5].mean().round(2)

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[5])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:"five"})

In [None]:
lm = smf.ols("five ~ one", data=route_df_new).fit()
lm.summary()

In [None]:
#one coeff*(adherence value)-(intercept)
0.8184*(-1)-(-3.2013)

**On route 3 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 0.8184 units or 49 seconds.**

***ROUTE 7 FROM DOWNTOWN***

In [7]:
wego['ROUTE_ABBR'].unique()

array([22, 23,  3,  7, 50, 52, 55, 56])

In [8]:
route_number = 7
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [9]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [10]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')

In [11]:
route_df

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,TIME_POINT_ABBR,ROUTE_STOP_SEQUENCE,TRIP_EDGE,LATITUDE,LONGITUDE,SCHEDULED_TIME,ACTUAL_ARRIVAL_TIME,ACTUAL_DEPARTURE_TIME,ADHERENCE,SCHEDULED_HDWY,ACTUAL_HDWY,HDWY_DEV,ADJUSTED_EARLY_COUNT,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,DAYS_OF_THE_WEEK,TRIP_IDS,row_num,stops
6136,120230801,1,99465617,2023-08-01,7,700,3144,348421,0,FROM DOWNTOWN,HBHS,5.0,2,36.107575,-86.812719,2023-08-01 05:40:00,2023-08-01 05:09:44,2023-08-01 05:09:44,30.266666,,,,0,0,1,0,,0,27,0.000000,1,348421120230801,1,1
6140,120230801,1,99465621,2023-08-01,7,700,3144,348423,0,FROM DOWNTOWN,MCC5_9,3.0,1,36.167091,-86.781923,2023-08-01 06:15:00,2023-08-01 05:20:00,2023-08-01 05:27:29,47.516666,10.0,,,1,0,0,0,0.0,0,50,7.483333,1,348423120230801,1,1
6141,120230801,1,99465622,2023-08-01,7,700,3144,348423,0,FROM DOWNTOWN,21BK,4.0,0,36.138881,-86.800622,2023-08-01 06:30:00,2023-08-01 05:34:24,2023-08-01 05:34:24,55.600000,10.0,,,1,0,0,0,0.0,0,61,0.000000,1,348423120230801,1,2
6142,120230801,1,99465623,2023-08-01,7,700,3144,348423,0,FROM DOWNTOWN,HBHS,5.0,2,36.107575,-86.812719,2023-08-01 06:42:00,2023-08-01 05:42:03,2023-08-01 05:42:03,59.950000,,,,1,0,0,0,,0,75,0.000000,1,348423120230801,1,3
6146,120230801,1,99465627,2023-08-01,7,700,3144,348425,0,FROM DOWNTOWN,MCC5_9,3.0,1,36.167091,-86.781923,2023-08-01 07:30:00,2023-08-01 06:07:11,2023-08-01 06:23:28,66.533333,15.0,18.016666,3.016666,1,0,0,0,0.0,0,98,16.283333,1,348425120230801,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350319,120230930,2,100406606,2023-09-30,7,702,2312,353447,0,FROM DOWNTOWN,MCC5_9,3.0,1,36.167091,-86.781923,2023-09-30 21:45:00,2023-09-30 21:43:52,2023-09-30 21:50:54,-5.900000,30.0,36.716666,6.716666,0,0,1,0,0.0,0,504,7.033333,5,353447120230930,1,1
350321,120230930,2,100406608,2023-09-30,7,702,2312,353447,0,FROM DOWNTOWN,HBHS,5.0,2,36.107575,-86.812719,2023-09-30 22:11:00,2023-09-30 22:22:39,2023-09-30 22:22:39,-11.650000,,,,0,1,0,0,,0,529,0.000000,5,353447120230930,1,2
350325,120230930,2,100406612,2023-09-30,7,702,2312,353449,0,FROM DOWNTOWN,MCC5_9,3.0,1,36.167091,-86.781923,2023-09-30 22:45:00,2023-09-30 22:49:19,2023-09-30 22:49:19,-4.316666,30.0,31.866666,1.866666,0,0,1,0,0.0,0,552,0.000000,5,353449120230930,1,1
350326,120230930,2,100406613,2023-09-30,7,702,2312,353449,0,FROM DOWNTOWN,21BK,4.0,0,36.138881,-86.800622,2023-09-30 22:59:00,2023-09-30 23:21:05,2023-09-30 23:21:05,-22.083333,30.0,47.500000,17.500000,0,1,0,0,0.0,0,563,0.000000,5,353449120230930,1,2


In [None]:
route_df_new = route_df_pivot.iloc[:, 0:3].dropna()


In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[3].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[3])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three'})

In [None]:
lm = smf.ols("three ~ one", data=route_df_new).fit()
lm.summary()

In [None]:
(-1) - (-1.5649)/(1.0030)

In [None]:
#y = -1.5649 + 1.0030 * x

In [None]:
#-1.5649 + 1.0030

**On route 7 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately **

**ROUTE 7 TO DOWNTOWN**

In [None]:
route_number = 7
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:3].dropna()

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[3].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[3])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three'})

In [None]:
lm = smf.ols("three ~ one", data=route_df_new).fit()
lm.summary()

In [None]:
(-1) - (-0.3846)/(0.8931)

**On route 7 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately **

**ROUTE 22 TO DOWTOWN**

In [None]:
route_number = 22
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')

In [None]:
wego[wego['ROUTE_ABBR']==22].head(20)

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:3].dropna()

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[3].mean().round(2)

In [None]:
route_df_new_4 = route_df_pivot.iloc[:, 0:4].dropna()

In [None]:
route_df_new[1].corr(route_df_new[3])

In [None]:
 route_df_new_4[1].corr(route_df_new_4[4])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three'})

In [None]:
route_df_new_4 = route_df_new_4.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four'})

In [None]:
lm = smf.ols("three ~ one", data=route_df_new).fit()
lm.summary()

**On route 22 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately .This is when the bus has only three stops**

In [None]:
lm = smf.ols("four ~ one", data=route_df_new_4).fit()
lm.summary()

**On route 22 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately . This is when the bus has only four stops**

**ROUTE 22 FROM DOWNTOWN**

In [None]:
route_number = 22
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:3].dropna()

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[3].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[3])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three'})

In [None]:
lm = smf.ols("three ~ one", data=route_df_new).fit()
lm.summary()

**On route 22 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately .**

**ROUTE 23 FROM DOWNTOWN**

In [None]:
route_number = 23
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:4].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[4].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[4])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four'})

In [None]:
lm = smf.ols("four ~ one", data=route_df_new).fit()
lm.summary()

**On route 23 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately .**

**ROUTE 23 TO DOWNTOWN**

In [None]:
route_number = 23
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()


In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:4].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[4].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[4])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four'})

In [None]:
lm = smf.ols("four ~ one", data=route_df_new).fit()
lm.summary()

**On route 23 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately .**

**ROUTE 50 TO DOWNTOWN**

In [None]:
route_number = 50
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:6].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[6].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[6])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five', 6:'six'})

In [None]:
lm = smf.ols("six ~ one", data=route_df_new).fit()
lm.summary()

**On route 50 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 0.7214 units or 43 seconds.**

**ROUTE 50 FROM DOWNTOWN**

In [None]:
route_number = 50
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:6].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[6].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[6])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five', 6:'six'})

In [None]:
lm = smf.ols("six ~ one", data=route_df_new).fit()
lm.summary()

**On route 50 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately .**

**ROUTE 52 TO DOWNTOWN**

In [None]:
route_number = 52
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:5].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[5].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[5])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five'})

In [None]:
lm = smf.ols("five ~ one", data=route_df_new).fit()
lm.summary()

In [None]:
(-1) - (-0.5048)/(0.8862)

**On route 52 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately .**

**ROUTE 52 FROM DOWNTOWN**

In [None]:
route_number = 52
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:5].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[5].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[5])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five'})

In [None]:
lm = smf.ols("five ~ one", data=route_df_new).fit()
lm.summary()

**On route 52 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 0.8895  or 53 .**

In [None]:
0.8895*(-2)-1.2117

In [None]:
0.8895*(2)-1.2117

**ROUTE 55 TO DOWNTOWN**

In [None]:
route_number = 55
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:6].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[6].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[6])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five', 6:'six'})

In [None]:
lm = smf.ols("six ~ one", data=route_df_new).fit()
lm.summary()

**On route 55 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 0.8993 units or 53 seconds.**

**ROUTE 55 FROM DOWNTOWN**

In [None]:
route_number = 55
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:6].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[6].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[6])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five', 6:'six'})

In [None]:
lm = smf.ols("six ~ one", data=route_df_new).fit()
lm.summary()

In [None]:
(-1) - (-1.5983)/(1.0783)

**On route 55 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 1.0783 units or 1 minute.**

**ROUTE 56 TO DOWNTOWN**

In [None]:
route_number = 56
route_direction = 'TO DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:5].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[5].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[5])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five'})

In [None]:
lm = smf.ols("five ~ one", data=route_df_new).fit()
lm.summary()

**On route 56 to downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 0.2590 units or 15 seconds.**

**ROUTE 56 FROM DOWNTOWN**

In [None]:
route_number = 56
route_direction = 'FROM DOWNTOWN'

route_df = wego.loc[
    (wego['ROUTE_ABBR'] == route_number) & (wego['ROUTE_DIRECTION_NAME']== route_direction)
]
route_df = route_df.copy()

In [None]:
#changing the dtype to string so that i can concat with the "CALENDAR ID" column
#changing the dtype to string so that i can concat with the "ROUTE ID" column

route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']
route_df['TRIP_ID'] = route_df['TRIP_ID'].astype(str)
route_df['CALENDAR_ID'] = route_df['CALENDAR_ID'].astype(str)
route_df['TRIP_IDS'] = route_df['TRIP_ID'] + route_df['CALENDAR_ID']

In [None]:
#allows for each stop to be numbered in their respective route IDs
route_df['row_num'] = 1
route_df['stops'] = route_df.groupby('TRIP_IDS')['row_num'].cumsum()

In [None]:
#creating a pivot table that will make the columns the stop #, the rows is each trip, and the values the adherence(amount of minutes late)
route_df_pivot = route_df.pivot_table(values = 'ADHERENCE', index = 'TRIP_IDS', columns = 'stops')
route_df_pivot

In [None]:
route_df_new = route_df_pivot.iloc[:, 0:6].dropna()
route_df_new

In [None]:
route_df_new[1].mean().round(2)

In [None]:
route_df_new[6].mean().round(2)

In [None]:
route_df_new[1].corr(route_df_new[6])

In [None]:
route_df_new = route_df_new.rename(columns = {1:"one", 2:'two', 3:'three', 4:'four', 5:'five', 6:'six'})

In [None]:
lm = smf.ols("six ~ one", data=route_df_new).fit()
lm.summary()

In [None]:
0.3739

**On route 56 from downtown As you move from the first bus stop to the last one, the lateness tends to increase by approximately 0.3739 units or 22 seconds.**

In [None]:
wego[wego['ROUTE_ABBR']==55].head(20)

In [None]:
route_df_new