In [1]:
################################################################################
# File name:    "dist_to_south_station.ipynb"
#
# Project title:    Boston Affordable Housing project (visting scholar porject)
#
# Description:    This program takes a dateset of MBTA and commuter rail stations
#                 and calculates the travel distance from that station to South
#                 Station in downtown Boston, MA. The output of this program is
#                 eventually combined with the manhattan and euclidean distance 
#                 output and used in the rd_amenities.do file. 
#
#                 Uses the HERE transit routing api to calculate the travel 
#                 distance (in meters)
#
#                 Note: The transit routes are date/time dependent and so will
#                 change depending on when the program is run.
#
#                 See: https://developer.here.com/documentation/public-transit/dev_guide/quick-start.html
#
# Inputs:    ./all_stations.csv
#
# Outputs:    ./dist_to_south_station.csv
#
# Created:    06/01/2022
# Updated:    09/29/2022
#
# Author:    Nicholas Chiumenti
################################################################################

In [2]:
import os
import requests
import json
import pandas as pd
from shapely.geometry import Point, LineString
from time import sleep
from datetime import datetime

In [3]:
## Part 1: load in the station coordinates file
data_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/all_stations.csv"

data_df = pd.read_csv(data_path)

# error check
assert len(data_df) == 303

In [4]:
# set proxy to get past firewall
os.environ["https_proxy"] = "http://p1proxy.frb.org:8080"
os.environ["http_proxy"] = "http://p1proxy.frb.org:8080"
os.environ["no_proxy"] = "frb.org,frb.pvt,frb.gov"

# set api variable constants
CA_BUNDLE_FP = "/etc/pki/tls/certs/ca-bundle.crt" # don't change this

BASE_URL = "https://transit.router.hereapi.com/v8/routes" # don't change this

YOUR_API_KEY = "Lb9_7H6GG3JxsxMfE8a0G8zvr0X8TfttPhUQvKlv0tI" # this may change

SOUTH_STATION = "42.35259298206998,-71.05517164043283" # this can be changed

# loop through stations
for i, row in data_df.iterrows():
    
    # set origin as current station
    origin = f"{row['station_lat']},{row['station_lon']}"

    place_name = f"{row['station_name'].title().replace(' ','')}"
    
    # create the api url
    api_url = f"{BASE_URL}?apiKey={YOUR_API_KEY}&origin={origin};placeName={place_name}&destination={SOUTH_STATION};placeName=SouthStation&return=travelSummary"
    
    # call the url
    response = requests.get(api_url, verify = CA_BUNDLE_FP) 
    
    # unpack the response data
    if response.status_code != 200:
        # store status code if != 'Response 200'
        data_df.loc[i, "response_code"] = response.status_code
        
    else:
        # store status code if == 'Response 200'
        data_df.loc[i, "response_code"] = response.status_code
        
        # unpack the output
        output = json.loads(response.text) # the json output from the api call    
        
        # test if there is route segment data, if yes store data
        try:
            
            # unpack the route segment data
            segments = output["routes"][0]["sections"]

            total_len_m = 0 # to store the total length in meters

            # loop through trip segments to sum total length
            segment_lens = [segments[l]["travelSummary"]['length'] 
                            for l in range(len(segments))]
            
            total_len_m = sum(segment_lens)

            # store the total route length in meters
            data_df.loc[i, "length_m"] = total_len_m
            
            data_df.loc[i, "json_output"] = str(output)
        
        # if no route segment data, store the output as string
        except:
            data_df.loc[i, "json_output"] = str(output)
    
    print(f"{i} of {len(data_df)} complete")
    
    sleep(1) # sleep for 1 second

0 of 303 complete
1 of 303 complete
2 of 303 complete
3 of 303 complete
4 of 303 complete
5 of 303 complete
6 of 303 complete
7 of 303 complete
8 of 303 complete
9 of 303 complete
10 of 303 complete
11 of 303 complete
12 of 303 complete
13 of 303 complete
14 of 303 complete
15 of 303 complete
16 of 303 complete
17 of 303 complete
18 of 303 complete
19 of 303 complete
20 of 303 complete
21 of 303 complete
22 of 303 complete
23 of 303 complete
24 of 303 complete
25 of 303 complete
26 of 303 complete
27 of 303 complete
28 of 303 complete
29 of 303 complete
30 of 303 complete
31 of 303 complete
32 of 303 complete
33 of 303 complete
34 of 303 complete
35 of 303 complete
36 of 303 complete
37 of 303 complete
38 of 303 complete
39 of 303 complete
40 of 303 complete
41 of 303 complete
42 of 303 complete
43 of 303 complete
44 of 303 complete
45 of 303 complete
46 of 303 complete
47 of 303 complete
48 of 303 complete
49 of 303 complete
50 of 303 complete
51 of 303 complete
52 of 303 complete
53 

In [7]:
# create log and save date stamps
save_date = datetime.now().strftime("%C%y_%m_%d")
log_date = datetime.now().strftime('%D at %I:%M:%S %p')

# set log and save paths
save_path = f"/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/dist_south_station_{save_date}.csv"
log_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/python_programs/transit_distances/dist_south_station_log.txt"

# save dataset as .csv
save_df = data_df
save_df.to_csv(save_path, index = False)

# write to log
with open(log_path,'a') as file:
    file.write(f"Finish running on {log_date}: {len(save_df):,} observations written to '{save_path}'.\n")
    file.write("NOTE! Results may change ever time the program is run.\n")

# Done!
print(f"Done! {len(save_df):,} observations written")

Done! 303 observations written
