MBTdelay

© Mark Mace 2019 markfmace@gmail.com

A tool to estimate the delay in travel time between two points in the MBTA rail system 



In [1]:
# GENERAL INCLUSIONS
import numpy as np
import requests
import os
import json
import matplotlib.pyplot as plt
import csv
import pandas as pd

# FOR DATES AND TIMES #
import time
import datetime
from dateutil import tz
from datetime import timedelta
import arrow


In [2]:
# IMPORT API KEYS -- OTHERWISE NEED TO BE SET IN NOTEBOOK
from MY_API_KEYS import *

In [3]:
# GENERAL PURPOSE FUNCTIONS FOR CONVERSION TO SECONDS
# e.g. x weeks -> seconds
def weeks(x):
    return int(7*24*60*60*x)

def days(x):
    return int(24*60*60*x)

def hours(x):
    return int(60*60*x)

def minutes(x):
    return int(60*x)

# CREAT FORMATTING STRING OF PYTHON JSON OBJECT
def jprint(obj):
    text = json.dumps(obj, sort_keys=True)
    print(text)


In [4]:
####################################
# LIST OF ALL STOPS PROVIDED BY MBTA 
####################################
gld=pd.read_csv('DATA/GreenLineDStops.csv')


# NEEDED FOR FILTERING -- THIS IS PREFILTERED #
# FIRST FILL OUT THE GREEN LINE BY UPTOWN AND DOWNTOWN 
# HARDCODED TO BE CORRECT
# NOTE THAT LAST NAME IS DESCRIPTION IS DESTINATION
# stops_list=pd.read_csv('DATA/stops.csv')
# REMOVE UNNCESSARY COLUMNS AND GET ONLY MBTA RAIL RAPID TRANSIT
# stops_list=stops_list[["stop_id","stop_code","stop_name","stop_desc","stop_lat","stop_lon","location_type"]]
# stops_list=stops_list[np.logical_and(stops_list['location_type']==0.0,stops_list['stop_desc'].str.contains("Green Line",na=False))]
# stops_list=stops_list.drop('location_type',axis=1)
# stops_list.to_csv("DATA/GreenLineStops.csv")
# gld=gld[gld['stop_name'].str.contains('|'.join(green_line_d))]

# MAKES NAMES FRIENDLIER FOR USE
# Quincy Center->QuincyCenter
gld['stop_name']=gld['stop_name'].str.replace(" ","")

# NORTHBOUND -- ODD NUMBERS
# GREEN LINE LECHMERE BOUND FROM RIVERSIDE
gl_lm_frs=gld[np.logical_or(gld['direction']==1,gld['direction']==-1)]

# SOUTHBOUND
# GRENE LINE RIVERSIDE BOUND FROM LECHMERE
gl_rs_flm=gld[np.logical_or(gld['direction']==0,gld['direction']==-1)]
gl_rs_flm=gl_rs_flm.sort_values(by=['stop_id'])

# SHOULD BE  -- ALL CHECKED #
print(len(gl_lm_frs),len(gl_rs_flm))

# # LIST ALL STATIONS ONCE -- UNIQUE FOR NAMES 
all_stations=gl_lm_frs
all_stations_name=np.unique(all_stations['stop_name'])

# LIST ALL STATIONS ONCE -- UNIQUE FOR NAMES 
all_stations_complete=[gl_lm_frs,gl_rs_flm]
all_stations_complete=pd.concat(all_stations_complete)
all_stations_complete_id=np.unique(all_stations_complete['stop_id'])


25 28


In [11]:
all_stations_complete

Unnamed: 0.1,Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,direction
0,7900,70156,70156,Arlington,Arlington - Green Line - Park Street & North,42.351902,-71.070893,1
2,8074,70176,70176,Beaconsfield,Beaconsfield - Green Line - Park Street & North,42.335809,-71.140841,1
4,8139,70158,70158,Boylston,Boylston - Green Line - Park Street & North,42.352531,-71.064682,1
6,8174,70178,70178,BrooklineHills,Brookline Hills - Green Line - Park Street & N...,42.331238,-71.127012,1
8,8225,70180,70180,BrooklineVillage,Brookline Village - Green Line - Park Street &...,42.332838,-71.116234,1
10,8263,70172,70172,ChestnutHill,Chestnut Hill - Green Line - Park Street & North,42.326633,-71.165299,1
12,8358,70154,70154,Copley,Copley - Green Line - Park Street & North,42.349871,-71.078049,1
14,8556,70166,70166,Eliot,Eliot - Green Line - Park Street & North,42.318957,-71.216776,1
16,8581,70186,70186,Fenway,Fenway - Green Line - Park Street & North,42.345362,-71.104162,1
18,8647,70201,70201,GovernmentCenter,Government Center - Green Line - North Station...,42.359705,-71.059215,1


In [5]:
# GET AND SAVE TRAIN DELAY DATA DATA FROM MBTA
# WHILE HEADWAY AT A GIVEN STATION A GIVEN START AND END TIME AND LINE
# DATES ARE DEFINED FOR THE LOCAL TIME 
# https://cdn.mbta.com/sites/default/files/developers/2018-10-30-mbta-realtime-performance-api-documentation-version-0-9-5-public.pdf
# "current_dep_dt":"1457455918",
#  "previous_dep_dt":"1457455185",
#  "headway_time_sec":"733",
#  "benchmark_headway_time_sec":"420"
def GetTrainHeadway(start_date,end_date,loc,route_col):

    # TRAIN SPECIFIC IDS #
    stop_loc="&stop="+str(int(loc))
    train_line='&route='+route_col
    
    # BASE API URL 
    base_url='http://realtime.mbta.com/developer/api/v2.1/'
    search_tag='headways'
    api_tag='?api_key='+MBTA_API_KEY
    form_tag='&format=json'
    url=base_url+search_tag+api_tag+form_tag+route_col

    # START AND END TIMES #
    start=int(time.mktime(datetime.datetime.strptime(start_date,"%Y-%m-%d %H:%M:%S").timetuple()))
    end  =int(time.mktime(datetime.datetime.strptime(end_date,  "%Y-%m-%d %H:%M:%S").timetuple()))
    #print(startTime,endTime)
    #print(endTime-startTime)

    # GET NUMBER OF WEEKS IN INTERVAL REQUESTED #
    num_weeks=int((end - start)/(60*60*24*7))
    print(num_weeks)

    train_times=[]

    # LOOP THROUGH WEEKS
    for i in range(0,num_weeks):
        
        from_time="&from_datetime="+str(start+weeks(i))
        to_time="&to_datetime="+str(start+weeks(i+1)-1)
        
        # FINAL URL FOR API REQUEST #
        request_URL=url+stop_loc+from_time+to_time
        #print(request_URL)
        
        response=requests.get(request_URL)
        dwell_times=response.json()['headways']

        for trip in dwell_times:
            # SAVES TIME THAT TRAIN ARRIVES AND DEPARTS FROM STATION #
            current_dep_dt=int(trip['current_dep_dt'])
            previous_dep_dt=int(trip['previous_dep_dt'])
            headway_time_sec=int(trip['headway_time_sec'])
            benchmark_headway_time_sec=int(trip['benchmark_headway_time_sec'])
            
            train_times.append([current_dep_dt,previous_dep_dt,
                                headway_time_sec,benchmark_headway_time_sec])

    return train_times



In [6]:
all_stations_complete_id

array([70150, 70151, 70152, 70153, 70154, 70155, 70156, 70157, 70158,
       70159, 70160, 70162, 70163, 70164, 70165, 70166, 70167, 70168,
       70169, 70170, 70171, 70172, 70173, 70174, 70175, 70176, 70177,
       70178, 70179, 70180, 70181, 70182, 70183, 70186, 70187, 70196,
       70197, 70198, 70199, 70200, 70201, 70202, 70203, 70204, 70205,
       70206, 70207, 70208, 70210, 71150, 71151])

In [15]:
start_date="2016-01-01 00:00:00"
end_date="2019-08-31 00:00:00"
train_color="Green"

sm_all_stations_complete_id=all_stations_complete_id[12:]
for station_id in sm_all_stations_complete_id:
# for station_id in ['70092']:
    station_name=all_stations_complete[all_stations_complete['stop_id']==station_id]['stop_name'].values[0]
    print(station_name," ",station_id)
    d_data=GetTrainHeadway(start_date,end_date,station_id,train_color)
    d_data=np.array(d_data)
    #print(d_data)
    with open('HEADWAY_DATA/Headway_'+station_name+'_'+str(station_id)+'.csv', mode='w') as dwell_file:
        dwell_file=csv.writer(dwell_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        dwell_file.writerow(["CURR_ARR_DT","PREV_ARR_DT","HDW_T","BNCH_HDW_T"])
        for line in range(len(d_data)):
            #print(d_data[line,0])
            dwell_file.writerow([d_data[line,0],d_data[line,1],d_data[line,2],d_data[line,3]])



Woodland   70163
191
Waban   70164
191
Waban   70165
191
Eliot   70166
191
Eliot   70167
191
NewtonHighlands   70168
191
NewtonHighlands   70169
191
NewtonCentre   70170
191
NewtonCentre   70171
191
ChestnutHill   70172
191
ChestnutHill   70173
191
Reservoir   70174
191
Reservoir   70175
191
Beaconsfield   70176
191
Beaconsfield   70177
191
BrooklineHills   70178
191
BrooklineHills   70179
191
BrooklineVillage   70180
191
BrooklineVillage   70181
191
Longwood   70182
191
Longwood   70183
191
Fenway   70186
191
Fenway   70187
191
ParkStreet   70196
191
ParkStreet   70197
191
ParkStreet   70198
191
ParkStreet   70199
191
ParkStreet   70200
191
GovernmentCenter   70201
191
GovernmentCenter   70202
191
Haymarket   70203
191
Haymarket   70204
191
NorthStation   70205
191
NorthStation   70206
191
SciencePark   70207
191
SciencePark   70208
191
Lechmere   70210
191
Kenmore   71150
191
Kenmore   71151
191


In [10]:
print(station_name)
print(d_data)

Woodland
[[1467219991 1467218274       1717        480]
 [1467228608 1467227452       1156        300]
 [1467256027 1467254298       1729        540]
 ...
 [1566604377 1566603950        427        540]
 [1566605288 1566604377        911        580]
 [1566605729 1566605288        441        580]]


In [770]:
r_am_faw

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon
7770,70061,70061.0,Alewife,Alewife - Red Line,42.395428,-71.142483
8432,70064,70064.0,Davis,Davis - Red Line - Alewife,42.39674,-71.121815
9320,70066,70066.0,Porter,Porter - Red Line - Alewife,42.3884,-71.119149
8747,70068,70068.0,Harvard,Harvard - Red Line - Alewife,42.373362,-71.118956
8328,70070,70070.0,Central,Central - Red Line - Alewife,42.365379,-71.103554
8925,70072,70072.0,Kendall,Kendall/MIT - Red Line - Alewife,42.362434,-71.085591
8276,70074,70074.0,Charles,Charles/MGH - Red Line - Alewife,42.361166,-71.070628
9236,70076,70076.0,ParkStreet,Park Street - Red Line - Alewife,42.356395,-71.062424
8479,70078,70078.0,DowntownCrossing,Downtown Crossing - Red Line - Alewife,42.355518,-71.060225
9573,70080,70080.0,SouthStation,South Station - Red Line - Alewife,42.352271,-71.055242
