In [1]:
import duckdb
from datafusion import SessionContext
from pyarrow import dataset as ds
ctx = SessionContext()

### Example 1: The Answer to all things in the Universe

In [6]:
sql = "select 42 as x"
duckdb.sql(sql).show()
ctx.sql(sql).show()

┌───────┐
│   x   │
│ int32 │
├───────┤
│    42 │
└───────┘

DataFrame()
+----+
| x  |
+----+
| 42 |
+----+


#### Example 2: Reading Data

In [7]:
duckdb.sql("select * from read_csv('./dummy_data/*')").show()

┌───────┬───────┬───────┐
│   a   │   b   │   c   │
│ int64 │ int64 │ int64 │
├───────┼───────┼───────┤
│     1 │     2 │     3 │
│     4 │     5 │     6 │
│     7 │     8 │     9 │
│    10 │    11 │    12 │
└───────┴───────┴───────┘



In [None]:
ctx.register_csv("csv_data1", './dummy_data/*')
ctx.sql("select * from csv_data1").show()

#### Example 3: Exporting Data

In [4]:
duckdb.sql("copy (select * from read_csv('./dummy_data/*')) to 'ducks.parquet'")

In [5]:
ctx.sql("copy (select * from csv_data1) to 'datafusion1.parquet'")

count
4


#### Example 4: Generating Test Data

In [9]:
row_cnt = 5
sql =f"""
    select t.row_id, uuid() as txn_key, current_date as rpt_dt
        ,round(random() * 100,2) as some_val
    from generate_series(1,{row_cnt}) t(row_id)
"""
duckdb.sql(sql).show()

sql = f"""
    with data as (select unnest(range(1,{row_cnt}+1)) as x)
    select x as row_id, uuid() as txn_key, current_date as rpt_dt
        ,round(random()*100 ,2) as some_val
    from data
"""
ctx.sql(sql).show()

┌────────┬──────────────────────────────────────┬────────────┬──────────┐
│ row_id │               txn_key                │   rpt_dt   │ some_val │
│ int64  │                 uuid                 │    date    │  double  │
├────────┼──────────────────────────────────────┼────────────┼──────────┤
│      1 │ 319095da-9b93-436b-8d9a-f6c6f6fb9ebb │ 2025-01-08 │    48.52 │
│      2 │ 5bc675b3-6500-47fc-9577-f7b576fd14c3 │ 2025-01-08 │    55.86 │
│      3 │ 7b96e8e8-01c8-44e3-b8b7-8031a12acaab │ 2025-01-08 │    84.67 │
│      4 │ 5c77279c-532f-47f2-bc31-334d1ff7fe13 │ 2025-01-08 │     4.37 │
│      5 │ ba12879d-5875-4f78-b4af-749a4ab679c3 │ 2025-01-08 │     9.81 │
└────────┴──────────────────────────────────────┴────────────┴──────────┘

DataFrame()
+--------+--------------------------------------+------------+----------+
| row_id | txn_key                              | rpt_dt     | some_val |
+--------+--------------------------------------+------------+----------+
| 1      | 053c65db-ebc5-

#### Example 5: Reading an Iceberg Table

In [2]:
from pyiceberg.catalog.sql import SqlCatalog
warehouse_path = "./icehouse"
catalog = SqlCatalog(
    "default",
    **{
        "uri": f"sqlite:///{warehouse_path}/icyhot.db",
        "warehouse": f"file://{warehouse_path}",
    },
)
ice_table = catalog.load_table("test_ns.test_data")

In [3]:
cn = duckdb.connect()
ice_table.scan().to_duckdb(connection=cn, table_name="duck_iceberg")
cn.sql("select * from duck_iceberg limit 5").show()

┌────────┬──────────────────────────────────────┬────────────┬──────────┐
│ row_id │               txn_key                │   rpt_dt   │ some_val │
│ int64  │               varchar                │    date    │  double  │
├────────┼──────────────────────────────────────┼────────────┼──────────┤
│      1 │ e5fe0c33-8cf3-4045-a967-aed4bba95663 │ 2025-01-08 │    98.23 │
│      2 │ c123ce95-7538-4dca-8a71-45de321c2891 │ 2025-01-08 │    66.35 │
│      3 │ 235c36a0-c6b4-474b-82de-e9f3437c696c │ 2025-01-08 │    96.33 │
│      4 │ f8799d9f-e48a-4e93-aa73-692930391172 │ 2025-01-08 │    82.08 │
│      5 │ 4da16506-76ca-46ae-b044-00a1cfa355c9 │ 2025-01-08 │    43.91 │
└────────┴──────────────────────────────────────┴────────────┴──────────┘



In [4]:
ctx.register_dataset("datafusion_iceberg", ds.dataset(ice_table.scan().to_arrow()))
ctx.sql("select * from datafusion_iceberg limit 5").show()

DataFrame()
+--------+--------------------------------------+------------+----------+
| row_id | txn_key                              | rpt_dt     | some_val |
+--------+--------------------------------------+------------+----------+
| 1      | e5fe0c33-8cf3-4045-a967-aed4bba95663 | 2025-01-08 | 98.23    |
| 2      | c123ce95-7538-4dca-8a71-45de321c2891 | 2025-01-08 | 66.35    |
| 3      | 235c36a0-c6b4-474b-82de-e9f3437c696c | 2025-01-08 | 96.33    |
| 4      | f8799d9f-e48a-4e93-aa73-692930391172 | 2025-01-08 | 82.08    |
| 5      | 4da16506-76ca-46ae-b044-00a1cfa355c9 | 2025-01-08 | 43.91    |
+--------+--------------------------------------+------------+----------+
