In [3]:
import duckdb
duckdb.execute("copy (select 1 as x, 2 as y, 'bob' as z) to 'data.parquet'")

<duckdb.duckdb.DuckDBPyConnection at 0x1061a26f0>

In [1]:
from datafusion import SessionContext
import pyarrow as pa
import pyarrow.dataset as ds

In [2]:
ctx = SessionContext()

In [9]:
ctx.register_parquet("data2", "data.parquet")

In [10]:
ctx.sql("select * from data2").show()

DataFrame()
+---+---+-----+
| x | y | z   |
+---+---+-----+
| 1 | 2 | bob |
+---+---+-----+


In [None]:
dummy_data = {
    "id": [1, 2, 3, 4],
    "name": ["Alice", "Bob", "Charlie", "Diana"],
    "age": [25, 30, 35, 40]
}

# Create a PyArrow table
arrow_table = pa.Table.from_pydict(dummy_data)
ctx.from_arrow(arrow_table, "dummy_data1")


In [19]:
ctx.sql("select * from dummy_data1")

id,name,age
1,Alice,25
2,Bob,30
3,Charlie,35
4,Diana,40


In [16]:
sql = """
    select s.id, sum(t.age) as age_tot
    from dummy_data1 as s
        inner join dummy_data1 as t
            using(id)
    group by all
"""
ctx.sql(sql)

id,age_tot
3,35
2,30
4,40
1,25


In [1]:
from pyiceberg.catalog.sql import SqlCatalog
warehouse_path = "./warehouse"
catalog = SqlCatalog(
    "default",
    **{
        "uri": f"sqlite:///:memory:",
        "warehouse": f"file://{warehouse_path}",
    },
)

In [2]:
namespace = "test_ts"
catalog.create_namespace(namespace)

In [4]:
import duckdb
rows = 5000

#duckdb
sql = f"""
    select t.row_id, uuid() as txn_key, current_date as rpt_dt
        ,round(random() * 100,2) as some_val
    from generate_series(1,{rows}) t(row_id)
"""

duck_df = duckdb.execute(sql).arrow()


In [5]:
table_ducks = catalog.create_table(f"{namespace}.duckdb_data",schema = duck_df.schema)
table_ducks.append(duck_df)


In [10]:
ice_table1 = catalog.load_table(f"{namespace}.duckdb_data").scan().to_arrow()

In [11]:
ctx.from_arrow(ice_table1, "duck_data")

row_id,txn_key,rpt_dt,some_val
1,8a920983-df0d-4211-908c-78784dc548bf,2025-01-05,45.8
2,636fc7aa-1a9b-46e1-ba7e-fd531ba2a716,2025-01-05,64.79
3,ee1e45bc-e46b-4a55-aec2-0f0d38fa0c30,2025-01-05,73.56
4,fcfa971d-76cd-41f0-935c-4bf5953afc72,2025-01-05,98.5
5,9918932a-5f83-4f96-99b5-51660a4b88f9,2025-01-05,6.44
6,b201ab6b-b8c6-4bba-bfc1-7b6a12042d97,2025-01-05,85.38
7,100f7f65-84b7-4c53-929a-46934aed6e0a,2025-01-05,59.42
8,7935af52-5374-44ac-bef8-f5197b4c20b0,2025-01-05,44.91
9,0efcff37-bb77-4c05-a1ae-6050106ea138,2025-01-05,73.52
10,8d438cea-2673-4443-bbe6-95518cd8eea8,2025-01-05,11.76


In [12]:
ctx.sql("select rpt_dt, count(*) as rec_cntm, sum(some_val) as tot_val from duck_data group by all")

rpt_dt,rec_cntm,tot_val
2025-01-05,5000,251376.15999999983


In [1]:
import duckdb
duckdb.sql("SELECT 42 as x").show()

┌───────┐
│   x   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [2]:
from datafusion import SessionContext
ctx = SessionContext()
ctx.sql("select 42 as x").show()


DataFrame()
+----+
| x  |
+----+
| 42 |
+----+


In [3]:
duckdb.execute("copy (select 42 as x) to 'duck.parquet'")

<duckdb.duckdb.DuckDBPyConnection at 0x106ad3c30>

In [5]:
ctx.sql("copy (select 42 as x) to 'fusion.parquet'")

count
1


In [6]:
duckdb.sql("select * from 'duck.parquet'").show()

┌───────┐
│   x   │
│ int32 │
├───────┤
│    42 │
└───────┘



In [7]:
ctx.register_parquet("my_parquet_table", 'fusion.parquet')
ctx.sql("select * from my_parquet_table").show()

DataFrame()
+----+
| x  |
+----+
| 42 |
+----+


In [16]:
row_cnt = 5
sql = f"""
    with data as (select unnest(range(1,{row_cnt}+1)) as x)
    select x, uuid() as txn_key, current_date as rpt_dt
        ,round(random()*100 ,2) as some_val
    from data
"""
ctx.sql(sql)

x,txn_key,rpt_dt,some_val
1,0a484188-df64-4454-aed7-c6defd5ac7c2,2025-01-08,2.57
2,00e21116-9652-46e7-bbaa-4a8db1e802c7,2025-01-08,67.02
3,a13cd55c-1ea7-478b-967c-edeb744e4cc2,2025-01-08,23.5
4,82cdf517-5aae-40d1-a729-41454c2ccf48,2025-01-08,11.84
5,6cc9ec71-aa2d-4edb-9104-ef691a258eef,2025-01-08,3.99


In [15]:
import duckdb
sql =f"""
    select t.row_id, uuid() as txn_key, current_date as rpt_dt
        ,round(random() * 100,2) as some_val
    from generate_series(1,{row_cnt}) t(row_id)
"""
duckdb.sql(sql)

┌────────┬──────────────────────────────────────┬────────────┬──────────┐
│ row_id │               txn_key                │   rpt_dt   │ some_val │
│ int64  │                 uuid                 │    date    │  double  │
├────────┼──────────────────────────────────────┼────────────┼──────────┤
│      1 │ 084c3f7c-c125-42a3-9aaa-e34fdb073c4c │ 2025-01-08 │     97.4 │
│      2 │ de5705bf-d11e-4b4f-8989-559f83790348 │ 2025-01-08 │    94.58 │
│      3 │ bfa23440-acfd-4d85-8a4f-9fa74bad6624 │ 2025-01-08 │     5.16 │
│      4 │ fda17bf5-82f7-4434-b550-6d45b9af8c8b │ 2025-01-08 │    90.76 │
│      5 │ 2cb1ef9f-3aa4-4d07-94a1-0c91cf8fff03 │ 2025-01-08 │    93.13 │
└────────┴──────────────────────────────────────┴────────────┴──────────┘