# Ducklake experiments

In this notebook we'll try to create and use a Ducklake catalog:

- Minio will be used as data storage
- Postgres will be used as metadata storage

In [None]:
import duckdb
import ibis

from poor_man_lakehouse.config import settings

ibis.options.interactive = True

con = duckdb.connect()
con.install_extension("ducklake")
con.install_extension("postgres")
con.sql("UPDATE EXTENSIONS;")
con.load_extension("ducklake")
con.load_extension("postgres")

con.sql(f"""
CREATE OR REPLACE SECRET s3_secret (
        TYPE S3,
        KEY_ID '{settings.AWS_ACCESS_KEY_ID}',
        SECRET '{settings.AWS_SECRET_ACCESS_KEY}',
        ENDPOINT '{settings.AWS_ENDPOINT_URL}',
        URL_STYLE 'path',
        USE_SSL false
        );
""")

con.sql(f"""
CREATE OR REPLACE SECRET postgres_secret (
        TYPE postgres,
        HOST '{settings.POSTGRES_HOST}',
        DATABASE '{settings.POSTGRES_DB}',
        USER '{settings.POSTGRES_USER}',
        PASSWORD '{settings.POSTGRES_PASSWORD}'
        );
""")

con.sql(f"""
CREATE OR REPLACE SECRET ducklake_secret (
        TYPE ducklake,
        METADATA_PATH '',
        DATA_PATH '{settings.WAREHOUSE_BUCKET}ducklake/',
        METADATA_PARAMETERS MAP {{'TYPE': 'postgres', 'SECRET': 'postgres_secret'}}
        );
""")
con.sql("""
ATTACH OR REPLACE 'ducklake:ducklake_secret' AS my_ducklake;
""")

In [2]:
con.sql("""CREATE OR REPLACE TABLE nl_train_stations AS
    FROM 'https://blobs.duckdb.org/nl_stations.csv';""")

In [3]:
con.sql("""SELECT * from nl_train_stations""")

┌───────┬─────────┬─────────┬────────────┬──────────────────┬───────────────────────────┬───────────────────────────┬─────────┬───────────────────────────┬─────────────────┬─────────────────┐
│  id   │  code   │   uic   │ name_short │   name_medium    │         name_long         │           slug            │ country │           type            │     geo_lat     │     geo_lng     │
│ int64 │ varchar │  int64  │  varchar   │     varchar      │          varchar          │          varchar          │ varchar │          varchar          │     double      │     double      │
├───────┼─────────┼─────────┼────────────┼──────────────────┼───────────────────────────┼───────────────────────────┼─────────┼───────────────────────────┼─────────────────┼─────────────────┤
│   266 │ HT      │ 8400319 │ Den Bosch  │ 's-Hertogenbosch │ 's-Hertogenbosch          │ s-hertogenbosch           │ NL      │ knooppuntIntercitystation │        51.69048 │         5.29362 │
│   269 │ HTO     │ 8400320 │ Dn Bosch O

In [None]:
import ibis

from poor_man_lakehouse.config import settings

ibis.options.interactive = True

con = ibis.duckdb.connect(extensions="ducklake,postgres")


con.raw_sql(f"""
CREATE OR REPLACE SECRET s3_secret (
        TYPE S3,
        KEY_ID '{settings.AWS_ACCESS_KEY_ID}',
        SECRET '{settings.AWS_SECRET_ACCESS_KEY}',
        ENDPOINT '{settings.AWS_ENDPOINT_URL.replace("http://", "")}',
        URL_STYLE 'path',
        USE_SSL false
        );
""")

con.raw_sql(f"""
CREATE OR REPLACE SECRET postgres_secret (
        TYPE postgres,
        HOST '{settings.POSTGRES_HOST}',
        DATABASE '{settings.POSTGRES_DB}',
        USER '{settings.POSTGRES_USER}',
        PASSWORD '{settings.POSTGRES_PASSWORD}'
        );
""")

con.raw_sql(f"""
CREATE OR REPLACE SECRET ducklake_secret (
        TYPE ducklake,
        METADATA_PATH '',
        DATA_PATH '{settings.WAREHOUSE_BUCKET}ducklake/',
        METADATA_PARAMETERS MAP {{'TYPE': 'postgres', 'SECRET': 'postgres_secret'}}
        );
""")
con.raw_sql("ATTACH OR REPLACE 'ducklake:ducklake_secret' as my_ducklake;")
con.raw_sql("USE my_ducklake")
con.list_catalogs()

['__ducklake_metadata_my_ducklake', 'memory', 'my_ducklake', 'system', 'temp']

In [6]:
con.raw_sql("""CREATE OR REPLACE TABLE nl_train_stations AS
    FROM 'https://blobs.duckdb.org/nl_stations.csv';""")


<_duckdb.DuckDBPyConnection at 0x117a8a6b0>

In [7]:
con.table("nl_train_stations")