In [1]:
import ibis
import pyarrow as pa

In [2]:
# Assume 'con' is an Ibis connection object (e.g., to a DuckDB, PostgreSQL, etc.)
# and 'table' is an Ibis table expression.
# For demonstration purposes, let's create a dummy Ibis table and connection.

# 1. Create a dummy Ibis table (replace with your actual Ibis expression)
data = pa.table(
    {
        "a": [1, 2, 3, 4],
        "b": ["apple", "banana", "cherry", "date"],
        "c": [True, False, True, False],
    }
)

In [4]:
# Connect to a polars for a simple example
con = ibis.polars.connect()
table = con.create_table("my_data", data)


In [6]:
# For example, select all columns or perform some operations
expression = table.select(table.a, table.b.cast('string').length().name('b_length'))

In [8]:
# You can specify chunk_size, limit, and params as needed
batches_reader = expression.to_pyarrow_batches(chunk_size=2, limit=None)
expression

In [9]:
# You can specify chunk_size, limit, and params as needed
batches_reader = expression.to_pyarrow_batches(chunk_size=2, limit=None)

In [10]:
print("Iterating through PyArrow record batches:")

for i, batch in enumerate(batches_reader):
    print(f"\n--- Batch {i+1} ---")
    print(batch)
    print(f"Number of rows in batch: {batch.num_rows}")
    print(f"Schema of batch: {batch.schema}")

    # You can further process each batch here
    # For example, convert to pandas DataFrame:
    # df = batch.to_pandas()
    # print(f"Batch {i+1} as Pandas DataFrame:\n{df}")

Iterating through PyArrow record batches:

--- Batch 1 ---
pyarrow.RecordBatch
a: int64
b_length: int32
----
a: [1,2]
b_length: [5,6]
Number of rows in batch: 2
Schema of batch: a: int64
b_length: int32

--- Batch 2 ---
pyarrow.RecordBatch
a: int64
b_length: int32
----
a: [3,4]
b_length: [6,4]
Number of rows in batch: 2
Schema of batch: a: int64
b_length: int32
