- merge together data to perform compeititve analysis of each MRT station's cafes

In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
from shapely.geometry import shape
import numpy as np
import matplotlib.pyplot as plt
import json
import folium

### Merge population data to neighborhoods

In [6]:
# load the neighborhood data
neighborhoods = gpd.read_file('data/geojson/polygon/local-area-boundary.geojson')

# set crs to EPSG32610
neighborhoods = neighborhoods.to_crs(epsg=32610)

# load population data from the 2016 census
population = pd.read_csv('data/neighborhood-pop-2016.csv')

# Merge the population data with the neighborhood GeoDataFrame
neighborhoods = neighborhoods.merge(population, on='name')

# calculate the area of each neighborhood in square kilometers
neighborhoods['area_km2'] = neighborhoods['geometry'].area / 10**6

# calculate the population density of each neighborhood
neighborhoods['population_density'] = neighborhoods['population'] / neighborhoods['area_km2']

# rename name to neighborhood
neighborhoods.rename(columns={'name': 'neighborhood'}, inplace=True)

# save the neighborhood data to a new GeoJSON file
neighborhoods.to_file('data/processed_data/neighborhoods_with_pop.geojson', driver='GeoJSON')

In [18]:
neighborhoods.head()

Unnamed: 0,neighborhood,geo_point_2d,geometry,population,area_km2,population_density
0,Downtown,"{'lon': -123.11656700827415, 'lat': 49.2807470...","POLYGON ((491836.349 5459718.919, 492419.675 5...",62030,4.674227,13270.643919
1,Hastings-Sunrise,"{'lon': -123.04026992328848, 'lat': 49.2779340...","POLYGON ((495892.653 5460083.768, 496226.106 5...",34575,8.332271,4149.529075
2,Kerrisdale,"{'lon': -123.15957618168902, 'lat': 49.2236554...","POLYGON ((486957.910 5451434.123, 487209.002 5...",13975,6.608907,2114.570409
3,Marpole,"{'lon': -123.1283816596551, 'lat': 49.21020746...","POLYGON ((492208.402 5450157.054, 491875.556 5...",24460,6.003074,4074.57895
4,Oakridge,"{'lon': -123.12302460370566, 'lat': 49.2264031...","POLYGON ((492310.707 5453376.091, 492269.051 5...",13030,4.023603,3238.390885


## Transit stations

### assign neighborhood to station

In [7]:
# assign neighborhood to transit stations
rapid_transit_stations = gpd.read_file('data/geojson/point/rapid_transit_stations.geojson')

# Ensure that both GeoDataFrames are in the same CRS
rapid_transit_stations = rapid_transit_stations.to_crs(epsg=32610)

# Use sjoin to attach the neighborhood to each station based on the point location
rapid_transit_stations = gpd.sjoin(rapid_transit_stations, neighborhoods, how="left", predicate='intersects')

# drop added columns except for neighborhood
rapid_transit_stations.drop(columns=['index_right', 'area_km2', 'population', 'population_density', 'geo_point_2d'], inplace=True)

# Generate a station_id column (duplicate station names are possible)
rapid_transit_stations['station_id'] = rapid_transit_stations.index + 1

# change station_id to string
rapid_transit_stations['station_id'] = rapid_transit_stations['station_id'].astype(str)

# save the joined data to a new GeoJSON file
rapid_transit_stations.to_file('data/processed_data/stations_with_id_neighborhood.geojson', driver='GeoJSON')

### generate 500m catchment around station

- catchment will be the primary object used for comparative analysis

In [29]:
# goal: add a "population_desnity" column to the test_catchment data
# population density will be calculated by measuring the area of overlap between the station catchment and the neighborhood polygons
# calculate the ratio of area of overlap of the station catchment for each different neighborhood with its own pop density data, and use this ratio to calculate the population density for each station catchment

# Create a new GeoDataFrame by intersecting neighborhood polygons with station catchments
# This will give us the areas of neighborhood polygons that lie within each catchment
neighborhoods = gpd.read_file('data/processed_data/neighborhoods_with_pop.geojson')
station_catchments = gpd.read_file('data/processed_data/station_catchments.geojson')
station_catchments.drop(columns=['neighborhood'], inplace=True)

intersections = gpd.overlay(neighborhoods, station_catchments, how='intersection')

# Calculate the area of the intersected polygons
intersections['intersected_area'] = intersections.geometry.area

# If the column exists, perform the merge operation
intersections = intersections.merge(
    neighborhoods[['neighborhood', 'population_density']],
    on='neighborhood',
    how='left'  # Use left join to ensure all rows in 'intersections' are kept
)

# Calculate the weighted population density for each intersection
intersections['weighted_pop_density'] = intersections['intersected_area'] * intersections['population_density_x']

# Group by station_id and sum the weighted population densities, and sum the intersected areas
grouped = intersections.groupby('station_id').agg(
    total_weighted_pop_density=('weighted_pop_density', 'sum'),
    total_intersected_area=('intersected_area', 'sum')
).reset_index()

# Calculate the final population density for each station catchment
grouped['population_density'] = grouped['total_weighted_pop_density'] / grouped['total_intersected_area']

# Merge this back to the station_catchments DataFrame
station_catchments_with_pop_density = station_catchments.merge(
    grouped[['station_id', 'population_density']],
    on='station_id'
)

station_catchments_with_pop_density

# export station_id and population_density to csv
station_catchments_with_pop_density[['station_id', 'population_density']].to_csv('data/processed_data/station_pop_density.csv', index=False)


In [17]:
# testing update to test_catchment



# load the station catchment data
test_catchment = gpd.read_file('data/processed_data/station_catchments.geojson')

# Ensure that both GeoDataFrames are in the same CRS
test_catchment = test_catchment.to_crs(epsg=32610)

# Use sjoin to attach the neighborhood to each station catchment based on the polygon location
test_catchment = gpd.sjoin(test_catchment, neighborhoods, how="left", predicate='intersects')


test_catchment

KeyError: 'population'

In [8]:
# copy transit stations
station_catchments = rapid_transit_stations.copy()

# Convert to the correct UTM zone for Vancouver for accurate distance measurements
station_catchments = station_catchments.to_crs(epsg=32610)

# Create 500m buffers
station_catchments['catchment_area'] = station_catchments['geometry'].buffer(500)

#rename  geometry to station_location
station_catchments.rename(columns={'geometry': 'station_location'}, inplace=True)

# set the activate geometry to the new catchment_area
station_catchments.set_geometry('catchment_area', inplace=True)

station_catchments.head()

# station_catchments.to_file('data/processed_data/station_catchments.geojson', driver='GeoJSON')

Unnamed: 0,station_name,station_line,station_location,neighborhood,station_id,catchment_area
0,Waterfront,Expo,POINT (491874.099 5459264.233),Downtown,1,"POLYGON ((492374.099 5459264.233, 492371.692 5..."
1,VCC - Clark,Expo,POINT (494255.295 5457005.321),Strathcona,2,"POLYGON ((494755.295 5457005.321, 494752.887 5..."
2,Olympic Village,Canada,POINT (491619.243 5457070.632),Fairview,3,"POLYGON ((492119.243 5457070.632, 492116.835 5..."
3,Broadway - City Hall,Canada,POINT (491648.797 5456674.067),Mount Pleasant,4,"POLYGON ((492148.797 5456674.067, 492146.389 5..."
4,King Edward,Canada,POINT (491606.180 5455156.590),Riley Park,5,"POLYGON ((492106.180 5455156.590, 492103.773 5..."


### zoning

#### add catchments to zoning data

In [300]:
# Load the zoning GeoDataFrame
zoning = gpd.read_file('data/geojson/polygon/zoning-districts-and-labels.geojson')

# Ensure that both GeoDataFrames are in the same CRS
zoning = zoning.to_crs(station_catchments.crs)

# Perform the spatial join to associate each zoning area with its catchment
zoning_with_catchments = gpd.sjoin(zoning, station_catchments, how="left", predicate='intersects')

# drop added columns except for station_id
zoning_with_catchments.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

# display non-null values
zoning_with_catchments[zoning_with_catchments['station_id'].notnull()].head()

Unnamed: 0,object_id,zoning_classification,zoning_category,zoning_district,cd_1_number,geo_point_2d,geometry,station_id
11,199895,Commercial,C,C-1,,"{'lon': -123.11662549117584, 'lat': 49.2344856...","POLYGON ((491472.727 5453428.426, 491474.290 5...",20
12,199899,Commercial,C,C-2,,"{'lon': -123.11522745916199, 'lat': 49.2496243...","POLYGON ((491554.270 5455209.713, 491552.028 5...",5
13,199904,Residential,RM,RM-4N,,"{'lon': -123.08866887003467, 'lat': 49.2661122...","POLYGON ((493239.064 5457049.207, 493239.246 5...",2
13,199904,Residential,RM,RM-4N,,"{'lon': -123.08866887003467, 'lat': 49.2661122...","POLYGON ((493239.064 5457049.207, 493239.246 5...",23
14,199905,Comprehensive Development,BCPED,BCPED,,"{'lon': -123.11946651338913, 'lat': 49.2751412...","POLYGON ((491256.203 5458059.637, 491263.201 5...",19


In [284]:
# save the joined data to a new GeoJSON file
zoning_with_catchments.to_file('data/processed_data/zoning_with_station.geojson', driver='GeoJSON')

### add zoning proportions to catchment gdf

In [304]:


# Create a new GeoDataFrame by intersecting zoning polygons with station catchments
# This will give us the areas of zoning polygons that lie within each catchment
intersections = gpd.overlay(zoning, station_catchments, how='intersection')

# Calculate the area of the intersected polygons
intersections['intersected_area'] = intersections.area

# Group by station_id and zoning_classification, then sum the intersected areas
area_per_zone_type = intersections.groupby(['station_id', 'zoning_classification'])['intersected_area'].sum().reset_index()

# Now calculate the total intersected area per catchment
total_intersected_area_per_catchment = area_per_zone_type.groupby('station_id')['intersected_area'].sum().reset_index()

# Merge to get the total intersected area back into the area_per_zone_type DataFrame
area_per_zone_type = area_per_zone_type.merge(
    total_intersected_area_per_catchment, on='station_id', suffixes=('', '_total')
)

# Calculate the proportion of each zoning classification within each catchment
area_per_zone_type['proportion'] = area_per_zone_type['intersected_area'] / area_per_zone_type['intersected_area_total']

# Pivot to get each zoning classification as a column
zoning_pivot = area_per_zone_type.pivot(index='station_id', columns='zoning_classification', values='proportion').fillna(0)

# Merge the pivot table back to the station_catchments GeoDataFrame
station_catchments_with_zoning = station_catchments.merge(
    zoning_pivot, left_on='station_id', right_index=True, how='left'
)

station_catchments_with_zoning.head()


Unnamed: 0,station_name,station_line,station_location,neighborhood,station_id,catchment_area,Commercial,Comprehensive Development,Historical Area,Industrial,Residential,Residential Inclusive
0,Waterfront,Expo,POINT (491874.099 5459264.233),Downtown,1,"POLYGON ((492374.099 5459264.233, 492371.692 5...",0.0,0.914284,0.085716,0.0,0.0,0.0
1,VCC - Clark,Expo,POINT (494255.295 5457005.321),Strathcona,2,"POLYGON ((494755.295 5457005.321, 494752.887 5...",0.00354,0.145734,0.0,0.344492,0.438913,0.067321
2,Olympic Village,Canada,POINT (491619.243 5457070.632),Fairview,3,"POLYGON ((492119.243 5457070.632, 492116.835 5...",0.264643,0.400714,0.0,0.259801,0.072108,0.002734
3,Broadway - City Hall,Canada,POINT (491648.797 5456674.067),Mount Pleasant,4,"POLYGON ((492148.797 5456674.067, 492146.389 5...",0.29707,0.287345,0.0,0.110963,0.304621,0.0
4,King Edward,Canada,POINT (491606.180 5455156.590),Riley Park,5,"POLYGON ((492106.180 5455156.590, 492103.773 5...",0.01139,0.112326,0.0,0.0,0.241787,0.634496


In [283]:

# Calculate the area of each zoning type within each catchment
zoning_with_catchments['area'] = zoning_with_catchments.geometry.area
area_per_zone_type = zoning_with_catchments.groupby(['station_id', 'zoning_classification'])['area'].sum().reset_index()

# You could then calculate the proportion of each zone type within each catchment
total_area_per_catchment = area_per_zone_type.groupby('station_id')['area'].sum().reset_index()
area_per_zone_type = area_per_zone_type.merge(total_area_per_catchment, on='station_id', suffixes=('', '_total'))
area_per_zone_type['proportion'] = area_per_zone_type['area'] / area_per_zone_type['area_total']

# Now, pivot the area_per_zone_type to have each zone_type as a column
zoning_pivot = area_per_zone_type.pivot(index='station_id', columns='zoning_classification', values='proportion').fillna(0)

# Join this pivot table back to the station_catchments GeoDataFrame
station_catchments_with_zoning = station_catchments.merge(zoning_pivot, left_on='station_id', right_index=True, how='left')

# Now, station_catchments_with_zoning will have columns for each zone_type proportion
station_catchments_with_zoning.head()

Unnamed: 0,station_name,station_line,station_location,neighborhood,station_id,catchment_area,Commercial,Comprehensive Development,Historical Area,Industrial,Residential,Residential Inclusive
0,Waterfront,Expo,POINT (491874.099 5459264.233),Downtown,1,"POLYGON ((492374.099 5459264.233, 492371.692 5...",0.0,0.926305,0.073695,0.0,0.0,0.0
1,VCC - Clark,Expo,POINT (494255.295 5457005.321),Strathcona,2,"POLYGON ((494755.295 5457005.321, 494752.887 5...",0.000752,0.063532,0.0,0.484574,0.427466,0.023676
2,Olympic Village,Canada,POINT (491619.243 5457070.632),Fairview,3,"POLYGON ((492119.243 5457070.632, 492116.835 5...",0.017464,0.022453,0.0,0.009307,0.006002,0.944774
3,Broadway - City Hall,Canada,POINT (491648.797 5456674.067),Mount Pleasant,4,"POLYGON ((492148.797 5456674.067, 492146.389 5...",0.286018,0.366573,0.0,0.115903,0.231505,0.0
4,King Edward,Canada,POINT (491606.180 5455156.590),Riley Park,5,"POLYGON ((492106.180 5455156.590, 492103.773 5...",0.000156,0.005196,0.0,0.0,0.004344,0.990303


In [305]:
# drop station_location column before saving
station_catchments_with_zoning = station_catchments_with_zoning.drop(columns='station_location')

# save the joined data to a new GeoJSON file
station_catchments_with_zoning.to_file('data/processed_data/station_catchments_zoning.geojson', driver='GeoJSON')

# assign catchments to remaining spacial data

## Point data

### Cafe locations

In [10]:
cafe_locations = gpd.read_file('data/geojson/point/cafes_details.geojson')

cafe_locations.head()

Unnamed: 0,name,rating,user_ratings_total,vicinity,place_id,price_level,weekly_open_hours,open_weekday,close_weekday,open_weekend,close_weekend,geometry
0,Trees Organic Coffee,4.3,1606.0,"450 Granville Street, Vancouver",ChIJx19lhHhxhlQR00Wyx2MBMZc,2.0,103.0,Standard Open,Late Close (> 10PM),Standard Open,Late Close (> 10PM),POINT (-123.11433 49.28484)
1,Tim Hortons,3.5,474.0,"555 West Hastings Street Unit 6, Vancouver",ChIJfe2sYHhxhlQR6YR9clFor1Y,1.0,106.5,Early Open (< 6AM),Late Close (> 10PM),Early Open (< 6AM),Standard Close,POINT (-123.11199 49.28495)
2,Mink Chocolates Cafe,4.3,1149.0,"863 West Hastings Street, Vancouver",ChIJPxF7YIJxhlQR2GV7dekt2xM,2.0,63.5,Standard Open,Standard Close,Standard Open,Standard Close,POINT (-123.11539 49.28665)
3,Revolver,4.7,2126.0,"325 Cambie Street, Vancouver",ChIJk5IDu3lxhlQRrQ0XKyJs_mk,2.0,57.0,Standard Open,Standard Close,Standard Open,Standard Close,POINT (-123.10948 49.28320)
4,Waves Coffee House - Hastings,4.1,424.0,"492 West Hastings Street, Vancouver",ChIJg8oU-HhxhlQRIZKCSINvdrg,2.0,75.0,Standard Open,Standard Close,Standard Open,Standard Close,POINT (-123.11194 49.28385)


In [12]:
# cafe_locations = gpd.read_file('data/geojson/point/cafe_details.geojson')

# set active geometry
cafe_locations = cafe_locations.set_geometry('geometry')

# Ensure that both GeoDataFrames are in the same CRS
cafe_locations = cafe_locations.to_crs(station_catchments.crs)

# associate each cafe with the station catchment area it is in
cafe_locations = gpd.sjoin(cafe_locations, station_catchments, how="left", predicate='intersects')

# calculate the distance from each cafe to the station
cafe_locations['distance_to_station'] = cafe_locations['station_location'].distance(cafe_locations['geometry'])

# Drop columns that are not needed
cafe_locations.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

cafe_locations.head()

Unnamed: 0,name,rating,user_ratings_total,vicinity,place_id,price_level,weekly_open_hours,open_weekday,close_weekday,open_weekend,close_weekend,geometry,station_id,distance_to_station
0,Trees Organic Coffee,4.3,1606.0,"450 Granville Street, Vancouver",ChIJx19lhHhxhlQR00Wyx2MBMZc,2.0,103.0,Standard Open,Late Close (> 10PM),Standard Open,Late Close (> 10PM),POINT (491685.246 5459127.377),16,116.631933
0,Trees Organic Coffee,4.3,1606.0,"450 Granville Street, Vancouver",ChIJx19lhHhxhlQR00Wyx2MBMZc,2.0,103.0,Standard Open,Late Close (> 10PM),Standard Open,Late Close (> 10PM),POINT (491685.246 5459127.377),1,233.227921
0,Trees Organic Coffee,4.3,1606.0,"450 Granville Street, Vancouver",ChIJx19lhHhxhlQR00Wyx2MBMZc,2.0,103.0,Standard Open,Late Close (> 10PM),Standard Open,Late Close (> 10PM),POINT (491685.246 5459127.377),17,499.017883
0,Trees Organic Coffee,4.3,1606.0,"450 Granville Street, Vancouver",ChIJx19lhHhxhlQR00Wyx2MBMZc,2.0,103.0,Standard Open,Late Close (> 10PM),Standard Open,Late Close (> 10PM),POINT (491685.246 5459127.377),8,201.556265
0,Trees Organic Coffee,4.3,1606.0,"450 Granville Street, Vancouver",ChIJx19lhHhxhlQR00Wyx2MBMZc,2.0,103.0,Standard Open,Late Close (> 10PM),Standard Open,Late Close (> 10PM),POINT (491685.246 5459127.377),6,425.506222


In [13]:
# save the joined data to a new GeoJSON file
cafe_locations.to_file('data/processed_data/cafe_locations_with_station.geojson', driver='GeoJSON')

#### bikeshare stations

In [287]:
bikeshare_stations = gpd.read_file('data/geojson/point/bikeshare_stations.geojson')

# set active geometry
bikeshare_stations = bikeshare_stations.set_geometry('geometry')

# Ensure that both GeoDataFrames are in the same CRS
bikeshare_stations = bikeshare_stations.to_crs(station_catchments.crs)

# associate each station with its catchment
bikeshare_stations = gpd.sjoin(bikeshare_stations, station_catchments, how="left", predicate='intersects')

# calculate the distance to the station
bikeshare_stations['distance_to_station'] = bikeshare_stations['station_location'].distance(bikeshare_stations['geometry'])

# Drop columns that are not needed
bikeshare_stations.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

bikeshare_stations.head()

Unnamed: 0,bike_station_name,capacity,bike_station_id,geometry,station_id,distance_to_station
0,10th & Cambie,36,7a19c49f486d7c0c02b3685d7b240448,POINT (491676.773 5456642.178),4,42.421715
0,10th & Cambie,36,7a19c49f486d7c0c02b3685d7b240448,POINT (491676.773 5456642.178),25,65.355357
0,10th & Cambie,36,7a19c49f486d7c0c02b3685d7b240448,POINT (491676.773 5456642.178),3,432.29861
1,Yaletown-Roundhouse Station,16,32603a87cfca71d0f7dfa3513bad69d5,POINT (491139.077 5457985.849),19,23.270869
2,Dunsmuir & Beatty,26,6d42fa40360f9a6b2bf641c7b8bb2862,POINT (491988.281 5458562.412),9,55.86013


In [289]:
# save the joined data to a new GeoJSON file
bikeshare_stations.to_file('data/processed_data/bikeshare_with_station.geojson', driver='GeoJSON')

In [290]:
storefront_locations = gpd.read_file('data/geojson/point/storefronts-inventory.geojson')

# set active geometry
storefront_locations = storefront_locations.set_geometry('geometry')

# Ensure that both GeoDataFrames are in the same CRS
storefront_locations = storefront_locations.to_crs(station_catchments.crs)

# associate each station with its catchment
storefront_locations = gpd.sjoin(storefront_locations, station_catchments, how="left", predicate='intersects')

# calculate the distance to station
storefront_locations['distance_to_station'] = storefront_locations['station_location'].distance(storefront_locations['geometry'])

# Drop columns that are not needed
storefront_locations.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

storefront_locations.head()

Unnamed: 0,id,unit,civic_number_parcel,street_name_parcel,business_name,retail_category,year_recorded,geo_local_area,geo_point_2d,geometry,station_id,distance_to_station
0,4852,,2050,W 4TH AV,Fig Facial,Service Commercial,2021,Kitsilano,"{'lon': -123.1516432157183, 'lat': 49.26786092...",POINT (488968.035 5457244.366),28,399.44022
1,4854,,2061,W 4TH AV,Nelly's Grill,Food & Beverage,2021,Kitsilano,"{'lon': -123.15183309734428, 'lat': 49.2683791...",POINT (488954.336 5457302.003),28,455.188102
2,4855,,2064,W 4TH AV,Duxiana,Comparison Goods,2021,Kitsilano,"{'lon': -123.15174798507738, 'lat': 49.2678650...",POINT (488960.414 5457244.836),28,398.944897
3,4856,101.0,2065,W 4TH AV,Bare Waxing Salon,Service Commercial,2021,Kitsilano,"{'lon': -123.1519379480197, 'lat': 49.26838069...",POINT (488946.709 5457302.192),28,454.761604
4,4861,,2076,W 4TH AV,Knix,Comparison Goods,2021,Kitsilano,"{'lon': -123.15227130057572, 'lat': 49.2678735...",POINT (488922.345 5457245.864),28,397.333788


In [291]:
# save the joined data to a new GeoJSON file
storefront_locations.to_file('data/processed_data/storefronts_with_station.geojson', driver='GeoJSON')

In [292]:
bus_stops = gpd.read_file('data/geojson/point/transit_stops.geojson')

# set active geometry
bus_stops = bus_stops.set_geometry('geometry')

# Ensure that both GeoDataFrames are in the same CRS
bus_stops = bus_stops.to_crs(station_catchments.crs)

# associate each station with its catchment
bus_stops = gpd.sjoin(bus_stops, station_catchments, how="left", predicate='intersects')

# calculate the distance to station
bus_stops['distance_to_station'] = bus_stops['station_location'].distance(bus_stops['geometry'])

# Drop columns that are not needed
bus_stops.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

# view bus stops that are associated with a station (where station_id is not null)
bus_stops[bus_stops['station_id'].notnull()].head()


Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,geometry,station_id,distance_to_station
39,215,50214,NB GRANVILLE ST FS W 12 AVE,GRANVILLE ST @ W 12 AVE,49.261086,-123.138524,ZN 99,,0,,POINT (489921.071 5456489.364),27,322.224494
40,11403,54826,NB GRANVILLE ST FS W 10 AVE,GRANVILLE ST @ W 10TH AVE,49.262957,-123.138459,ZN 99,,0,,POINT (489926.181 5456697.355),27,130.286009
41,218,50217,NB GRANVILLE ST FS W 7 AVE,GRANVILLE ST @ W 7 AVE,49.265651,-123.138349,ZN 99,,0,,POINT (489934.732 5456996.833),27,209.313115
42,4879,50218,NB GRANVILLE ST FS W 5 AVE,GRANVILLE ST @ W 5 AVE,49.267404,-123.138259,ZN 99,,0,,POINT (489941.636 5457191.703),27,396.877899
43,11426,58593,NB GRANVILLE ST FS DRAKE ST,GRANVILLE ST @ DRAKE ST,49.276487,-123.127092,ZN 99,,0,,POINT (490755.734 5458200.039),19,446.03766


In [293]:
# save the joined data to a new GeoJSON file
bus_stops.to_file('data/processed_data/bus_stops_with_station.geojson', driver='GeoJSON')

In [294]:
parking_meters = gpd.read_file('data/geojson/point/parking-meters.geojson')

# set active geometry
parking_meters = parking_meters.set_geometry('geometry')

# Ensure that both GeoDataFrames are in the same CRS
parking_meters = parking_meters.to_crs(station_catchments.crs)

# associate each station with its catchment
parking_meters = gpd.sjoin(parking_meters, station_catchments, how="left", predicate='intersects')

# calculate the distance to station
parking_meters['distance_to_station'] = parking_meters['station_location'].distance(parking_meters['geometry'])

# Drop columns that are not needed
parking_meters.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

parking_meters[parking_meters['station_id'].notnull()].head()

Unnamed: 0,meterhead,r_mf_9a_6p,r_mf_6p_10,r_sa_9a_6p,r_sa_6p_10,r_su_9a_6p,r_su_6p_10,rate_misc,timeineffe,t_mf_9a_6p,...,t_su_6p_10,time_misc,creditcard,pay_phone,geo_local_area,meterid,geo_point_2d,geometry,station_id,distance_to_station
6,Pay Station,$5.00,$1.00,$5.00,$1.00,$5.00,$1.00,,METER IN EFFECT: 9:00 AM TO 10:00 PM,30 min,...,30 min,,No,60016,Downtown,20928,"{'lon': -123.11473693605602, 'lat': 49.2878540...",POINT (491656.320 5459462.280),16,263.648879
6,Pay Station,$5.00,$1.00,$5.00,$1.00,$5.00,$1.00,,METER IN EFFECT: 9:00 AM TO 10:00 PM,30 min,...,30 min,,No,60016,Downtown,20928,"{'lon': -123.11473693605602, 'lat': 49.2878540...",POINT (491656.320 5459462.280),1,294.364466
6,Pay Station,$5.00,$1.00,$5.00,$1.00,$5.00,$1.00,,METER IN EFFECT: 9:00 AM TO 10:00 PM,30 min,...,30 min,,No,60016,Downtown,20928,"{'lon': -123.11473693605602, 'lat': 49.2878540...",POINT (491656.320 5459462.280),8,484.157514
6,Pay Station,$5.00,$1.00,$5.00,$1.00,$5.00,$1.00,,METER IN EFFECT: 9:00 AM TO 10:00 PM,30 min,...,30 min,,No,60016,Downtown,20928,"{'lon': -123.11473693605602, 'lat': 49.2878540...",POINT (491656.320 5459462.280),6,440.551786
7,Pay Station,$5.00,$5.00,$5.00,$5.00,$5.00,$5.00,$2.50,METER IN EFFECT: 9:00 AM TO 10:00 PM,2 Hr,...,4 Hr,,Yes,60196,Downtown,630915,"{'lon': -123.12428853169057, 'lat': 49.2810631...",POINT (490960.486 5458708.428),17,371.5504


In [295]:
# save the joined data to a new GeoJSON file
parking_meters.to_file('data/processed_data/parking_meters_with_station.geojson', driver='GeoJSON')

## Linestring data

In [296]:
bike_routes = gpd.read_file('data/geojson/linestring/bikeways.geojson')

# Ensure that both GeoDataFrames are in the same CRS
bike_routes = bike_routes.to_crs(station_catchments.crs)

# associate each station with its catchment
bike_routes = gpd.sjoin(bike_routes, station_catchments, how="left", predicate='intersects')

# Drop columns that are not needed
bike_routes.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

bike_routes[bike_routes['station_id'].notnull()].head()

Unnamed: 0,object_id,bike_route_name,street_name,bikeway_type,subtype,status,street_segment_type,overall_direction,bikeway_direction,vehicle_direction,...,e_s_bound_type,snow_removal,segment_length,year_of_construction,construction_note,upgrade_year,notes,geo_point_2d,geometry,station_id
16,344415,Quebec,Quebec,Painted Lanes,NB,Active,Sec Arterial,NS,OW,2W,...,Painted Lanes,Yes,204.88768,2009,"s part- not painted, W - protected other route",,,"{'lon': -123.10176475672168, 'lat': 49.2740442...","LINESTRING (492586.335 5458026.899, 492594.803...",10
17,344418,Cypress,Cypress,Local Street,,Active,Residential,NS,2W,2W,...,Local Street,Yes,75.724998,1996,Upgrade to AAA in 2018,2018.0,,"{'lon': -123.14801236325408, 'lat': 49.2668561...","LINESTRING (489232.988 5457169.989, 489230.949...",28
21,344424,Ontario,Ontario,Local Street,,Active,Residential,NS,2W,OW,...,Local Street,Yes,50.256163,1995,"Upgrade to AAA, OW for Vehicles NB in 2021",2021.0,,"{'lon': -123.10483586893582, 'lat': 49.2644470...","LINESTRING (492373.410 5456884.188, 492372.026...",24
22,344425,Ontario,Ontario,Local Street,,Active,Residential,NS,2W,2W,...,Local Street,Yes,50.276934,1995,Upgrade to AAA in 2021,2021.0,,"{'lon': -123.10485440671827, 'lat': 49.2639950...","LINESTRING (492372.026 5456833.951, 492372.025...",24
28,344443,Gladstone,Gladstone,Local Street,,Active,Residential,NS,2W,2W,...,Local Street,No,126.681112,2005,const date-source unknown,,,"{'lon': -123.0604550929681, 'lat': 49.24943897...","LINESTRING (495601.601 5455250.416, 495598.970...",22


In [297]:
# save the joined data to a new GeoJSON file
bike_routes.to_file('data/processed_data/bike_routes_with_station.geojson', driver='GeoJSON')


## Polygon data

In [298]:
parks = gpd.read_file('data/geojson/polygon/parks-polygon-representation.geojson')

# Ensure that both GeoDataFrames are in the same CRS
parks = parks.to_crs(station_catchments.crs)

# associate each station with its catchment
parks = gpd.sjoin(parks, station_catchments, how="left", predicate='intersects')


# Drop columns that are not needed
parks.drop(columns=['index_right', 'station_name', 'station_line', 'station_location', 'neighborhood'], inplace=True)

parks[parks['station_id'].notnull()].head()

Unnamed: 0,park_id,park_name,area_ha,park_url,geo_point_2d,geometry,station_id
0,108.0,Connaught Park,5.993645,http://covapp.vancouver.ca/parkfinder/parkdeta...,"{'lon': -123.16010507957903, 'lat': 49.2620555...","POLYGON ((488191.232 5456696.724, 488515.731 5...",28
1,81.0,Clark Park,4.295203,http://covapp.vancouver.ca/parkfinder/parkdeta...,"{'lon': -123.07235700114158, 'lat': 49.2571040...","POLYGON ((494905.046 5456101.749, 494903.250 5...",18
5,20.0,Emery Barnes Park,0.896743,http://covapp.vancouver.ca/parkfinder/parkdeta...,"{'lon': -123.12409417280017, 'lat': 49.2766436...","POLYGON ((491024.100 5458268.473, 491001.573 5...",19
7,264.0,Laurel Landbridge Park,0.153938,http://vancouver.ca/parks/,"{'lon': -123.12433345300423, 'lat': 49.2657423...","POLYGON ((490959.665 5456945.979, 490947.998 5...",26
8,233.0,Arbutus Greenway Park,0.687948,http://covapp.vancouver.ca/parkfinder/parkdeta...,"{'lon': -123.1555658097564, 'lat': 49.26197997...","POLYGON ((488705.482 5456580.833, 488704.523 5...",28


In [299]:
# save the joined data to a new GeoJSON file
parks.to_file('data/processed_data/parks_with_station.geojson', driver='GeoJSON')