<a href="https://colab.research.google.com/github/pacificspatial/flateau/blob/main/notebook/sdsc_bootcamp_tokyo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DuckDB を使った Plateauデータの解析ハンズオン！

In [1]:
# Duckdb is already in Colab!
import duckdb
import pandas as pd
import geopandas as gpd

## DuckDBが動くかテスト

In [2]:
# simple sql with an in-memory database
duckdb.sql('SELECT 42').show()

┌───────┐
│  42   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [3]:
# check relation
r1 = duckdb.sql('SELECT 42 AS i')
duckdb.sql('SELECT i * 2 AS k FROM r1').show()

┌───────┐
│   k   │
│ int32 │
├───────┤
│    84 │
└───────┘



## Extension を確認

In [53]:
#con = duckdb.connect(database=":memory:", read_only=False, config={"allow_unsigned_extensions": "true"});
#con.execute('select * from duckdb_extensions()').fetch_df()
duckdb.sql('select * from duckdb_extensions()')

┌──────────────────┬─────────┬───────────┬──────────────────────┬──────────────────────────────────┬───────────────────┐
│  extension_name  │ loaded  │ installed │     install_path     │           description            │      aliases      │
│     varchar      │ boolean │  boolean  │       varchar        │             varchar              │     varchar[]     │
├──────────────────┼─────────┼───────────┼──────────────────────┼──────────────────────────────────┼───────────────────┤
│ autocomplete     │ false   │ false     │                      │ Add supports for autocomplete …  │ []                │
│ fts              │ true    │ true      │ (BUILT-IN)           │ Adds support for Full-Text Sea…  │ []                │
│ httpfs           │ true    │ true      │ /root/.duckdb/exte…  │ Adds support for reading and w…  │ [http, https, s3] │
│ icu              │ true    │ true      │ (BUILT-IN)           │ Adds support for time zones an…  │ []                │
│ inet             │ false   │ f

## Extensionをインストールして読み込む

In [23]:
duckdb.sql("INSTALL 'httpfs'");
duckdb.sql("INSTALL 'spatial'");

duckdb.sql('LOAD httpfs')
duckdb.sql('LOAD spatial')

## Geometryを取り扱えるか確認

In [25]:
duckdb.sql('SELECT ST_POINT(0,0)')

┌────────────────┐
│ st_point(0, 0) │
│    geometry    │
├────────────────┤
│ POINT (0 0)    │
└────────────────┘

## 今回使うデータのリスト
-- https://flateau.s3.ap-northeast-1.amazonaws.com/data/plateau/tokyo23/2022/buildings/tokyo23_2022_buildings_centroid.parquet

-- https://flateau.s3.ap-northeast-1.amazonaws.com/data/plateau/tokyo23/2022/buildings/tokyo23_2022_buildings_polygon.parquet

-- https://flateau.s3.ap-northeast-1.amazonaws.com/data/topography/tokyo23_elevation_h3lvl10.parquet

-- https://flateau.s3.ap-northeast-1.amazonaws.com/data/topography/tokyo23_slope_h3lvl10.parquet


## まずデータを読み込む

In [33]:
duckdb.sql("drop table if exists building_centroid;")
duckdb.sql("create table building_centroid as select h3index10, cal_zmin_m, cal_height_m, ST_GeomFromWKB(geom) as geom from 'https://flateau.s3.ap-northeast-1.amazonaws.com/data/plateau/tokyo23/2022/buildings/tokyo23_2022_buildings_centroid.parquet' ")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [36]:
duckdb.sql("drop table if exists building_polygon")
duckdb.sql("create table building_polygon as select h3index10, cal_zmin_m, cal_height_m, ST_GeomFromWKB(geometry) as geom from 'https://flateau.s3.ap-northeast-1.amazonaws.com/data/plateau/tokyo23/2022/buildings/tokyo23_2022_buildings_polygon.parquet' ")

FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [42]:
duckdb.sql("drop table if exists elevation")
duckdb.sql("create table elevation as select h3index10, ST_GeomFromWKB(geometry) as geom from 'https://flateau.s3.ap-northeast-1.amazonaws.com/data/topography/tokyo23_elevation_h3lvl10.parquet'")


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [45]:
duckdb.sql("drop table if exists slope")
duckdb.sql("create table slope as select h3index10, val_max, val_mean, val_median, val_mode, ST_GeomFromWKB(geometry) as geom from 'https://flateau.s3.ap-northeast-1.amazonaws.com/data/topography/tokyo23_slope_h3lvl10.parquet'")


FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))

In [47]:
duckdb.sql('select t1.h3index10, t1.geom, t2.val_max from building_polygon t1, slope t2 where t1.h3index10 = t2.h3index10 order by val_max desc limit 10')

┌────────────────────┬─────────────────────────────────────────────────────────────────────────────┬───────────────────┐
│     h3index10      │                                    geom                                     │      val_max      │
│       uint64       │                                  geometry                                   │      double       │
├────────────────────┼─────────────────────────────────────────────────────────────────────────────┼───────────────────┤
│ 622329812660912127 │ POLYGON ((139.74449723500115 35.66740033892029, 139.74445959311024 35.667…  │ 55.72602844238281 │
│ 622329812660912127 │ POLYGON ((139.74477272466495 35.66700668755646, 139.7444687328894 35.6665…  │ 55.72602844238281 │
│ 622329812660912127 │ POLYGON ((139.74373599923877 35.667795773549145, 139.74374646925907 35.66…  │ 55.72602844238281 │
│ 622329812692303871 │ POLYGON ((139.73544700956978 35.680677084621124, 139.73511680749166 35.68…  │  55.2908935546875 │
│ 622329812692303871 │ POLYGON (

## クエリの結果をGeoJSONで出力

In [51]:
duckdb.sql("copy (select t1.geom, t2.val_max from building_polygon t1, slope t2 where t1.h3index10 = t2.h3index10 order by val_max desc limit 10) to 'test.geojson' with (format gdal, driver 'GeoJSON', LAYER_CREATION_OPTIONS 'WRITE_BBOX=YES')")
