In [1]:
################################################################################
# File name:    "station_boundary_dist.ipynb"
#
# Project title:    Boston Affordable Housing project (visting scholar porject)
#
# Description:    
#
# Inputs:    ./all_stations.csv
#            ./adm3_latlong.shp
#
# Outputs:    ./station_boundary_dist.csv
#             ./station_boundary_dist_log.txt
#
# Created:    10/05/2022
# Updated:    10/05/2022
#
# Author:    Nicholas Chiumenti
################################################################################

In [10]:
from datetime import datetime
import pandas as pd
import geopandas as gpd

In [3]:
stations_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/all_stations.csv"
boundary_path = "/home/a1nfc04/local_to_aws/Python_Projects/closest_boundary_py/boundary_shapefiles/adm3_latlong.shp"

# train stations file
stations_df = pd.read_csv(stations_path)
stations_df.drop(columns = "geometry", inplace = True)
stations_gdf = gpd.GeoDataFrame(stations_df,
                                geometry = gpd.points_from_xy(stations_df['station_lon'], stations_df['station_lat'],
                                                              crs = "EPSG:4269")
                               )
stations_gdf.to_crs("EPSG:26986", inplace=True)

# boundaries file
boundary_gdf = gpd.read_file(boundary_path)
boundary_gdf.to_crs("EPSG:26986", inplace=True)
boundary_gdf = boundary_gdf[["LEFT_FID", "RIGHT_FID", "zo_usety", "municipal", "ncessch", "geometry"]]
boundary_gdf["boundary_using_id"] = boundary_gdf.index

assert stations_gdf.crs == boundary_gdf.crs

# distance between stations and boundaries

In [4]:
# merge stations and boundaries
gdf_1 = stations_gdf
gdf_2 = boundary_gdf

gdf_3 = pd.merge(gdf_1, gdf_2, how = "cross")

assert len(gdf_3) == 10953753

In [5]:
%%time

# calc distance in meters to boundary
gdf_3["dist_meters"] = gdf_3["geometry_x"].distance(gdf_3["geometry_y"])

# convert to miles
gdf_3["dist_miles"] = gdf_3["dist_meters"] * (1/1609.344)

# drop obs > .5 miles away
gdf_4 = gdf_3[(gdf_3["dist_miles"] <= .5)]

CPU times: user 1min 2s, sys: 1.72 s, total: 1min 3s
Wall time: 1min 3s


In [11]:
# create log and save date stamps
save_date = datetime.now().strftime("%C%y_%m_%d")
log_date = datetime.now().strftime('%D at %I:%M:%S %p')

# set log and save paths
save_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/data/shapefiles/train_stops/station_boundary_dist.csv"
log_path = "/home/a1nfc04/Documents/boston_zoning_sdrive/python_programs/transit_distances/station_boundary_dist_log.txt"

# save dataset as .csv
save_df = gdf_4
save_df.to_csv(save_path, index = False)

# write to log
with open(log_path,'a') as file:
    file.write(f"Finish running on {log_date}: {len(save_df):,} observations written to '{save_path}'.\n")

# Done!
print(f"Done! {len(save_df):,} observations written")

Done! 31,778 observations written
