In [2]:
import os

import duckdb
from dotenv import load_dotenv
import urllib.request as res

In [3]:
url = "https://download.geofabrik.de/europe/norway-latest.osm.pbf"
local_file = "norway-latest.osm.pbf"

if not os.path.exists(local_file):
    print("Downloading Norway OSM data...")
    res.urlretrieve(url, local_file)
    print("Download complete.")

In [4]:
load_dotenv()
connection_string = os.getenv("BLOB_STORAGE_CONNECTION_STRING")

con = duckdb.connect(":memory:")
con.execute("INSTALL spatial; LOAD spatial;")
con.execute("INSTALL azure; LOAD azure;")
con.execute("SET azure_storage_connection_string = ?;", [os.getenv("BLOB_STORAGE_CONNECTION_STRING")])
con.execute("SET azure_transport_option_type = curl")

<_duckdb.DuckDBPyConnection at 0x7f41a1a7a130>

In [5]:
con.execute(
    """
    CREATE OR REPLACE TABLE buildings AS
    SELECT id,
           kind,
           tags['building'] AS building_type,
           tags
    FROM ST_ReadOSM('norway-latest.osm.pbf')
    WHERE tags['building'] IS NOT NULL
    """)

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

<_duckdb.DuckDBPyConnection at 0x7f41a1a7a130>

In [None]:
con.sql("""CREATE OR REPLACE TABLE buildings AS
WITH building_ways AS (
    SELECT
        osm.id AS way_id,
        osm.tags,
        t.node_id,
        t.idx
    FROM ST_ReadOSM('norway-latest.osm.pbf') AS osm,
    UNNEST(osm.refs) WITH ORDINALITY AS t(node_id, idx)
    WHERE
        osm.kind = 'way'
        AND osm.tags['building'] IS NOT NULL
),
node_points AS (
    SELECT
        id AS node_id,
        lat,
        lon
    FROM ST_ReadOSM('norway-latest.osm.pbf')
    WHERE kind = 'node'
),
way_geometries AS (
    SELECT
        bw.way_id,
        bw.tags,
        ST_MakeLine(
            LIST(ST_Point(np.lon, np.lat) ORDER BY bw.idx)
        ) AS linestring
    FROM building_ways bw
    JOIN node_points np ON bw.node_id = np.node_id
    GROUP BY bw.way_id, bw.tags
),
polygon_geometries AS (
    SELECT
        way_id,
        tags,
        ST_MakePolygon(linestring) AS geometry
    FROM way_geometries
    WHERE ST_IsClosed(linestring)
)
SELECT
    way_id,
    tags['building'] AS building_type,
    tags['industrial'] AS industrial_type,
    tags['name'] AS name,
    tags['operator'] AS operator,
    tags['start_date'] AS start_date,
    tags['description'] AS description,
    ST_Transform(geometry, 'EPSG:4326', 'EPSG:25833') AS geometry_25833,
    ST_Area(ST_Transform(geometry, 'EPSG:4326', 'EPSG:25833')) AS area_m2
FROM polygon_geometries;""")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))