<a href="https://colab.research.google.com/github/kavyajeetbora/modern_geospatial_stack/blob/master/notebooks/Station_Buildings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine
!pip install --quiet pandas
!pip install --quiet matplotlib
!pip install -q osmnx
!pip install -q pydeck

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.9/95.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.8/249.8 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.1/193.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.2/47.2 kB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.2/107.2 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m48.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import geopandas as gpd
import pandas as pd
import osmnx as ox
from tqdm import tqdm
import pydeck as pdk
import shapely
import duckdb

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql

Deploy Panel apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup


## Setup duckdb

In [3]:
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:
# %sql duckdb:///path/to/file.db

In [4]:
%%sql
INSTALL httpfs;
INSTALL spatial;

Unnamed: 0,Success


## Download OSM data

In [5]:
## Note: the coordinates are in West, South, East, North format

bboxs = {
    'Mumbai': (72.7712628039,18.8895552464,73.0912396106,19.322318006),
    'Hyderabad': (78.2288393192,17.1952635128,78.692325037,17.6244114807),
    'Bangalore': (77.4480254838,12.815481916,77.7549560258,13.1026335949),
    'Chennai': (80.0337033915,12.8678089117,80.335827415,13.2370438819),
    'Kolkata': (88.2431406976,22.4883995072,88.4944529534,22.7071046611),
    'Ahmedabad': (72.4617185823,22.9179551449,72.699297928,23.1470216925),
    'Delhi_NCR': (76.881197,28.31444,77.604922,28.893765)
}

columns = ['element_type', 'osmid', 'geometry', 'railway', 'internet_access',
       'name', 'name:hi', 'network', 'operator', 'public_transport',
       'ref', 'train','city']

## Select city
city = "Delhi_NCR"
W,S,E,N = bboxs[city]

In [11]:
%%time

public_transport = ox.features.features_from_bbox(
    bbox=(N,S,E,W),
    tags={
        'public_transport':['station', 'stop_position'],
        'railway': 'stop',
        'amenties': 'bus_station',
        'highway': 'bus_stop'
        }
).reset_index()

## Extract the station data:
pb = public_transport[public_transport['element_type']=='node'].copy()
columns = ['element_type', 'osmid', 'geometry', 'railway', 'internet_access',
       'name', 'network', 'operator', 'public_transport',
       'ref', 'train']

pb_gdf = pb[columns]
pb_gdf = pb_gdf.fillna('None')
metro_stations = pb_gdf[pb_gdf['network'].str.lower().str.contains('metro')]
print("Total nodes with station tag:",metro_stations.shape[0])
metro_stations.sample(min(5,len(metro_stations)))

Total nodes with station tag: 763
CPU times: user 886 ms, sys: 1.08 ms, total: 888 ms
Wall time: 893 ms


Unnamed: 0,element_type,osmid,geometry,railway,internet_access,name,network,operator,public_transport,ref,train
1648,node,5777601335,POINT (76.91918 28.69739),,,Brigadier Hoshiyar Singh,Delhi Metro,Delhi Metro Rail Corporation Limited,stop_position,,
1229,node,347676595,POINT (77.15826 28.65158),station,,Shadipur,Delhi Metro,Delhi Metro Rail Corporation Limited,station,,
1408,node,5215706767,POINT (77.19957 28.72983),station,,Burari X-ing,Delhi Metro,Delhi Metro Rail Corporation Limited,station,,
1669,node,5777629192,POINT (77.24102 28.62819),stop,,ITO,Delhi Metro,Delhi Metro Rail Corporation Limited,stop_position,ITO,
2206,node,663755930,POINT (77.25053 28.66843),station,,Shastri Park,Delhi Metro,Delhi Metro Rail Corporation Limited,station,,


Export the results

In [12]:
metro_stations.to_parquet('stations_stops.parquet')

In [13]:
pb_gdf.to_file('stations_stops.gpkg', driver='GPKG')

## Download Station Buildings

Downloading the buildings from overturemaps using duckdb

In [14]:
%%time

buildings_data_url = r"'s3://overturemaps-us-west-2/release/2024-06-13-beta.0/theme=buildings/type=*/*'"
stations_data_url = "'stations_stops.parquet'"

df = duckdb.sql(
    f'''
    LOAD spatial;
    LOAD httpfs;
    COPY(
        SELECT
            id,
            names.primary as name,
            buildings.height as height,
            stations.name as station_name,
            stations.network as network,
            stations.operator as operator,
            stations.railway as railway,
            stations.public_transport as public_transport,
            ST_GeomFromWKB(buildings.geometry) as geom
        FROM read_parquet({buildings_data_url}, filename=true, hive_partitioning=1) AS buildings
        JOIN read_parquet({stations_data_url}) as stations
        ON ST_Intersects(ST_GeomFromWKB(buildings.geometry), ST_GeomFromWKB(stations.geometry))
        WHERE buildings.bbox.xmin > {W}
        AND buildings.bbox.xmax < {E}
        AND buildings.bbox.ymin > {S}
        AND buildings.bbox.ymax < {N}
    ) TO 'station_buildings.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON');
    '''
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 27.1 s, sys: 5.06 s, total: 32.1 s
Wall time: 4min 15s


This will give all the building footprints surrounding the station nodes.
Note that buildings are based on overture maps which is not so well updated. There might me some stations without a enclosing building footprint

Let's find out those stations:

In [19]:
sbf = gpd.read_file('station_buildings.geojson').drop_duplicates('geometry')  ## Unique Building footprints
metro_stations.sjoin(sbf, how='outer', predicate='intersects')

ValueError: `how` was "outer" but is expected to be in ['left', 'right', 'inner']