<a href="https://colab.research.google.com/github/kavyajeetbora/modern_geospatial_stack/blob/master/notebooks/Station_Buildings.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --quiet duckdb
!pip install --quiet jupysql
!pip install --quiet duckdb-engine
!pip install --quiet pandas
!pip install --quiet matplotlib
!pip install -q osmnx
!pip install -q pydeck

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.9/95.9 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.8/249.8 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.1/193.1 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.3/41.3 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.2/47.2 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.2/107.2 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m45.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [11]:
import geopandas as gpd
import pandas as pd
import osmnx as ox
from tqdm import tqdm
import pydeck as pdk
import shapely
import duckdb

# Import jupysql Jupyter extension to create SQL cells
%load_ext sql



Deploy Shiny apps for free on Ploomber Cloud! Learn more: https://ploomber.io/s/signup


## Setup duckdb

In [16]:
%config SqlMagic.autopandas = True
%config SqlMagic.feedback = False
%config SqlMagic.displaycon = False

%sql duckdb:///:memory:
# %sql duckdb:///path/to/file.db

In [17]:
%%sql
INSTALL httpfs;
INSTALL spatial;

Unnamed: 0,Success


## Download OSM data

In [20]:
## Note: the coordinates are in West, South, East, North format

bboxs = {
    'Mumbai': (72.7712628039,18.8895552464,73.0912396106,19.322318006),
    'Hyderabad': (78.2288393192,17.1952635128,78.692325037,17.6244114807),
    'Bangalore': (77.4480254838,12.815481916,77.7549560258,13.1026335949),
    'Chennai': (80.0337033915,12.8678089117,80.335827415,13.2370438819),
    'Kolkata': (88.2431406976,22.4883995072,88.4944529534,22.7071046611),
    'Ahmedabad': (72.4617185823,22.9179551449,72.699297928,23.1470216925),
    'Delhi_NCR': (76.881197,28.31444,77.604922,28.893765)
}

columns = ['element_type', 'osmid', 'geometry', 'railway', 'internet_access',
       'name', 'name:hi', 'network', 'operator', 'public_transport',
       'ref', 'train','city']

## Select city
city = "Delhi_NCR"
W,S,E,N = bboxs[city]

In [26]:
%%time

public_transport = ox.features.features_from_bbox(
    bbox=(N,S,E,W),
    tags={
        'public_transport':['station', 'stop_position'],
        'railway': 'stop',
        'amenties': 'bus_station',
        'highway': 'bus_stop'
        }
).reset_index()

## Extract the station data:
pb = public_transport[public_transport['element_type']=='node'].copy()
columns = ['element_type', 'osmid', 'geometry', 'railway', 'internet_access',
       'name', 'network', 'operator', 'public_transport',
       'ref', 'train']

pb_gdf = pb[columns]
pb_gdf = pb_gdf.fillna('None')
print("Total nodes with station tag:",pb_gdf.shape[0])
pb_gdf.sample(min(5,len(pb_gdf)))

Total nodes with station tag: 3192
CPU times: user 1.7 s, sys: 22.1 ms, total: 1.72 s
Wall time: 10.5 s


Unnamed: 0,element_type,osmid,geometry,railway,internet_access,name,network,operator,public_transport,ref,train
1685,node,5777631164,POINT (77.07797 28.62897),stop,,Janakpuri West,Delhi Metro,Delhi Metro Rail Corporation Limited,stop_position,JPW,
1868,node,6372277581,POINT (77.21714 28.61038),,,Nirman Bhavan,,,platform,,
3185,node,7554999586,POINT (77.25420 28.58901),stop,,Hazrat Nizamuddin Junction,,,stop_position,4,yes
1279,node,913282201,POINT (77.23187 28.66024),,,Purani Dilhi Railway Station,,,stop_position,,
43,node,562620810,POINT (77.23428 28.56440),,,Moolchand,Delhi Metro,Delhi Metro Rail Corporation Limited,stop_position,,


In [23]:
pb_gdf.to_parquet('stations_stops.parquet')

In [27]:
pb_gdf.to_file('stations_stops.gpkg', driver='GPKG')

## Download Station Buildings

Downloading the buildings from overturemaps using duckdb

In [21]:
%%time

buildings_data_url = r"'s3://overturemaps-us-west-2/release/2024-06-13-beta.0/theme=buildings/type=*/*'"
stations_data_url = "'stations_stops.parquet'"

df = duckdb.sql(
    f'''
    LOAD spatial;
    LOAD httpfs;
    COPY(
        SELECT
            id,
            names.primary as name,
            buildings.height as height,
            stations.name as station_name,
            stations.network as network,
            stations.operator as operator,
            stations.railway as railway,
            stations.public_transport as public_transport,
            ST_GeomFromWKB(buildings.geometry) as geom
        FROM read_parquet({buildings_data_url}, filename=true, hive_partitioning=1) AS buildings
        JOIN read_parquet({stations_data_url}) as stations
        ON ST_Intersects(ST_GeomFromWKB(buildings.geometry), ST_GeomFromWKB(stations.geometry))
        WHERE buildings.bbox.xmin > {W}
        AND buildings.bbox.xmax < {E}
        AND buildings.bbox.ymin > {S}
        AND buildings.bbox.ymax < {N}
    ) TO 'station_buildings.geojson' WITH (FORMAT GDAL, DRIVER 'GeoJSON');
    '''
)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

CPU times: user 26.1 s, sys: 4.42 s, total: 30.5 s
Wall time: 4min 9s
