# NYC Subway Plotting

In [1]:
# libraries
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from pyspark.sql import SparkSession, functions as F
import leafmap

In [2]:
# spark session initialization
spark = SparkSession.builder\
    .master("local[5]")\
    .appName("main")\
    .config("spark.sql.debug.maxToStringFields", 100)\
    .config("spark.driver.memory", '4g')\
    .config("spark.executor.instances", 5)\
    .getOrCreate()

spark

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/05/09 06:43:49 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/05/09 06:43:50 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
23/05/09 06:43:50 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042.
23/05/09 06:43:50 WARN Utils: Service 'SparkUI' could not bind on port 4042. Attempting port 4043.


In [3]:
# prediction data
pred_df = spark.read.csv('results/prediction.csv', header=True, inferSchema=True)
pred_df.printSchema()

root
 |-- id: string (nullable = true)
 |-- line: string (nullable = true)
 |-- stop_name: string (nullable = true)
 |-- borough: string (nullable = true)
 |-- n_label: string (nullable = true)
 |-- s_label: string (nullable = true)
 |-- entries: double (nullable = true)
 |-- exits: double (nullable = true)
 |-- arrests: integer (nullable = true)
 |-- complaints: integer (nullable = true)
 |-- summons: integer (nullable = true)
 |-- latitude: double (nullable = true)
 |-- longitude: double (nullable = true)
 |-- pred_price: double (nullable = true)



In [4]:
# convert to pandas
pred_pd = pred_df.toPandas()
pred_pd.head()

Unnamed: 0,id,line,stop_name,borough,n_label,s_label,entries,exits,arrests,complaints,summons,latitude,longitude,pred_price
0,F12,Queens Blvd,5 Av/53 St,M,Queens,Downtown & Brooklyn,171540100000.0,183989500000.0,1811,3603,1056,40.760167,-73.975224,229.723626
1,637,Lexington Av,Bleecker St,M,Uptown & The Bronx,Downtown,196565200000.0,153466700000.0,3444,5079,1046,40.725915,-73.994659,215.209436
2,603,Pelham,Middletown Rd,Bx,Pelham Bay Park,Manhattan,16027570000.0,9955782000.0,442,1094,842,40.843863,-73.836322,93.021918
3,725,Flushing,Times Sq-42 St,M,Queens,34 St - Hudson Yards,494357900000.0,184005900000.0,11199,12406,14342,40.755477,-73.987691,226.520469
4,606,Pelham,Zerega Av,Bx,Pelham Bay Park,Manhattan,9471628000.0,5313407000.0,649,1661,996,40.836488,-73.847036,93.021918


In [5]:
# create geometry
pred_pd['geometry'] = [Point(xy) for xy in zip(pred_pd['longitude'],pred_pd['latitude'])]
pred_pd.head()

Unnamed: 0,id,line,stop_name,borough,n_label,s_label,entries,exits,arrests,complaints,summons,latitude,longitude,pred_price,geometry
0,F12,Queens Blvd,5 Av/53 St,M,Queens,Downtown & Brooklyn,171540100000.0,183989500000.0,1811,3603,1056,40.760167,-73.975224,229.723626,POINT (-73.975224 40.760167)
1,637,Lexington Av,Bleecker St,M,Uptown & The Bronx,Downtown,196565200000.0,153466700000.0,3444,5079,1046,40.725915,-73.994659,215.209436,POINT (-73.994659 40.725915)
2,603,Pelham,Middletown Rd,Bx,Pelham Bay Park,Manhattan,16027570000.0,9955782000.0,442,1094,842,40.843863,-73.836322,93.021918,POINT (-73.836322 40.843863)
3,725,Flushing,Times Sq-42 St,M,Queens,34 St - Hudson Yards,494357900000.0,184005900000.0,11199,12406,14342,40.755477,-73.987691,226.520469,POINT (-73.987691 40.755477)
4,606,Pelham,Zerega Av,Bx,Pelham Bay Park,Manhattan,9471628000.0,5313407000.0,649,1661,996,40.836488,-73.847036,93.021918,POINT (-73.847036 40.836488)


In [6]:
# nta map
nta_map = gpd.read_file(r'nynta2020_23b/nynta2020.shp')
nta_map.to_crs(4326, inplace=True)

In [7]:
# make geopandas dataframe
pred_geo_df = gpd.GeoDataFrame(pred_pd, crs=4326, geometry = pred_pd.geometry)
pred_geo_df.to_crs(4326, inplace=True)

In [8]:
# get map plotting data
nta_demos = pd.read_excel('https://www1.nyc.gov/assets/planning/download/office/planning-level/nyc-population/acs/demo_2019_acs5yr_nta.xlsx')
nta_df = nta_map.merge(nta_demos, how='left', left_on='NTA2020', right_on='GeoID')

In [9]:
# save pred geometry dataframe
pred_geo_df.to_file("pred_geo.json", driver="GeoJSON")

In [10]:
# draw leafmap
m = leafmap.Map(center=(40,-100),zoom=4)
m.add_gdf(nta_df, layer_name='2020 NTA Demographic Information', info_mode='on_click')
m.add_point_layer(filename=r'pred_geo.json', popup=['stop_name', 'arrests', 'complaints', 'summons', 'entries', 'exits', 'pred_price'], layer_name="Stations")
m

Map(center=[40, -100], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zoom_out_t…