In [1]:
################################################################################
# File name:    "all_stations.ipynb"
#
# Project title:    Boston Affordable Housing project (visting scholar porject)
#
# Description:    This file compiles the commuter rail stations and the mbta
#                 rapid transit stations into one file to be used in calculating
#                 the distance to downtown measure in rd_amenties.do
#
# Inputs:    ./TRAINS_NODE.shp
#            ./MBTA_NODE.shp
#
# Outputs:    ./all_stations.csv
#
# Created:    06/01/2022
# Updated:    09/29/2022
#
# Author:    Nicholas Chiumenti
################################################################################

In [2]:
import datetime
import pandas as pd
import geopandas as gpd

# Set paths for all inputs/outputs

In [3]:
## set paths throughout program
## CHANGE THESE TO ADJUST WHERE FILES ARE LOADED AND SAVED

# commuter rail station shape file path
trains_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/trains/TRAINS_NODE.shp"

# mbta rapid transit shape file path
mbta_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/mbta_rapid_transit/MBTA_NODE.shp"

# save path
save_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/all_stations.csv"

# log path
log_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/python_programs/transit_distances/all_stations_log.txt"


# Import train station stop data and append together

In [4]:
## load commuter rail stops
commuter_nodes = gpd.read_file(trains_path)

# drop non-commuter rail observations and those outside of MA
commuter_nodes = commuter_nodes[
    (commuter_nodes["LINE_BRNCH"].notnull()) 
    & (commuter_nodes["STATE"] == "MA") 
    & (commuter_nodes["C_RAILSTAT"] == "Y")
    & (commuter_nodes["MAP_STA"] == "Y")
]

# tag source file and line
commuter_nodes["LAYER"] = "TRAIN_NODES"
commuter_nodes["LINE"] = "COMMUTER RAIL"

# error check
commuter_nodes.crs == 26986

## load mbta rapid transit
mbta_nodes = gpd.read_file(mbta_path)

# tage source file
mbta_nodes["LAYER"] = "MBTA_NODES"

# error check
mbta_nodes.crs == 26986

## append commuter rail stations to mbta stations
stations_gdf = pd.concat([commuter_nodes, mbta_nodes], ignore_index = True)

# convert the crs to epsg:4269 (lat lon coords)
stations_gdf.to_crs("EPSG:4269", inplace=True)

stations_gdf["STATION_LAT"] = stations_gdf.geometry.y
stations_gdf["STATION_LON"] = stations_gdf.geometry.x

stations_gdf["STATION_ID"] = stations_gdf.index

stations_gdf = stations_gdf[["STATION_ID", "STATION", "LINE", "LAYER", "STATION_LAT", "STATION_LON", "geometry"]]

stations_gdf.columns = ["station_id", "station_name", "line", "layer", "station_lat", "station_lon", "geometry"]

# error checking
assert stations_gdf.crs == 4269
assert len(stations_gdf) == 303 # check number of stations
assert len(stations_gdf['station_id'].unique()) == 303

# save file
stations_gdf.to_csv(save_path, index = False)

In [5]:
# save a log .txt file
date = datetime.datetime.now().strftime('%D at %I:%M:%S %p')

with open(log_path,'a') as file:
    file.write(f"Finish running on {date}: {len(stations_gdf):,} observations written to '{save_path}'.\n")  

# Done!
print(f"Done! {len(stations_gdf):,} observations written")

Done! 303 observations written
