In [1]:
import duckdb

In [2]:
cursor = duckdb.connect()

In [3]:
cursor.execute('SELECT 42').fetchall()

[(42,)]

In [36]:
cursor.execute("""
SELECT * FROM read_parquet('../sandbox--pyarrow/tmp/*.parquet') LIMIT 1;
""").fetchall()

[(2000000,
  '\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed sapien tellus,\naliquet eu semper ut, faucibus in augue. Donec mollis nisi vitae nulla tristique, ut\niaculis tortor ultricies. Nullam finibus tortor eros, et elementum dolor lacinia et.\nIn hac habitasse platea dictumst. Ut a tempus tellus. Nam hendrerit luctus nisi, at\ncondimentum nibh. Morbi sed eleifend dui. Donec pellentesque efficitur nunc ut consequat.\nIn id varius magna, sed accumsan elit. Maecenas lobortis lectus nec fermentum lacinia.\nSuspendisse placerat euismod sem eu pretium. Sed efficitur, dolor sed aliquet molestie,\nmauris enim tristique nulla, in elementum erat velit quis diam. Ut non nisi quis tellus\niaculis lacinia vel non sapien. Fusce eget dui ullamcorper, semper ipsum non, convallis\nmauris. Donec rutrum diam nisi, id tempus sapien pharetra eu. Nunc tempus varius erat,\nultricies congue est aliquam sed.\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed sapien tellus,\naliq

## Prepare data

In [86]:
duckdb.sql("""
CREATE OR REPLACE TABLE ids(id INTEGER);
INSERT INTO ids VALUES (4000000), (2000000);
""")

duckdb.sql("""
CREATE OR REPLACE TABLE t_original(id INTEGER, some_id INTEGER, text VARCHAR);
INSERT INTO t_original
VALUES (2000000, 21, 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'),
       (3000000, 22, 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'),
       (4000000, 21, 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.');
""")

In [87]:
duckdb.sql("""
SELECT ids.id, some_id, text
FROM ids
LEFT JOIN t_original
ON ids.id == t_original.id
""").write_parquet("data/test-joined.parquet")

In [88]:
duckdb.read_parquet("data/test-joined.parquet")

┌─────────┬─────────┬──────────────────────────────────────────────────────────┐
│   id    │ some_id │                           text                           │
│  int32  │  int32  │                         varchar                          │
├─────────┼─────────┼──────────────────────────────────────────────────────────┤
│ 4000000 │      21 │ Lorem ipsum dolor sit amet, consectetur adipiscing elit. │
│ 2000000 │      21 │ Lorem ipsum dolor sit amet, consectetur adipiscing elit. │
└─────────┴─────────┴──────────────────────────────────────────────────────────┘

In [102]:
# Can join with the record read from a Parquet file
cursor.execute("""
SELECT table_a.id, some_id, text
FROM (VALUES (4000000)) table_a(id)
LEFT JOIN read_parquet("data/test-joined.parquet") table_b
ON table_a.id == table_b.id
""").df().pipe(print)

        id  some_id                                               text
0  4000000       21  Lorem ipsum dolor sit amet, consectetur adipis...


## How about large data?

In [90]:
%%time
cursor.execute("""
SELECT table_a.id, to_be_ignored, text
FROM (VALUES (4000000), (6000000)) table_a(id)
LEFT JOIN read_parquet("../sandbox--pyarrow/tmp/*.parquet") table_b
ON table_a.id == table_b.id
""").fetchall()

CPU times: user 255 ms, sys: 14.4 ms, total: 269 ms
Wall time: 269 ms


[(4000000,
  6,
  '\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed sapien tellus,\naliquet eu semper ut, faucibus in augue. Donec mollis nisi vitae nulla tristique, ut\niaculis tortor ultricies. Nullam finibus tortor eros, et elementum dolor lacinia et.\nIn hac habitasse platea dictumst. Ut a tempus tellus. Nam hendrerit luctus nisi, at\ncondimentum nibh. Morbi sed eleifend dui. Donec pellentesque efficitur nunc ut consequat.\nIn id varius magna, sed accumsan elit. Maecenas lobortis lectus nec fermentum lacinia.\nSuspendisse placerat euismod sem eu pretium. Sed efficitur, dolor sed aliquet molestie,\nmauris enim tristique nulla, in elementum erat velit quis diam. Ut non nisi quis tellus\niaculis lacinia vel non sapien. Fusce eget dui ullamcorper, semper ipsum non, convallis\nmauris. Donec rutrum diam nisi, id tempus sapien pharetra eu. Nunc tempus varius erat,\nultricies congue est aliquam sed.\nLorem ipsum dolor sit amet, consectetur adipiscing elit. Sed sapien tellus,\