# Python API Chaining and Pipelines

This notebook covers advanced usage patterns:
- Method chaining patterns
- Reusable pipelines with `pipe()`
- Using the `ops` module for PyArrow integration
- Performance comparison

In [None]:
import pyarrow.parquet as pq

import geoparquet_io as gpio
from geoparquet_io.api import Table, ops, pipe, read

## Method Chaining Patterns

All transformation methods return a new Table, enabling fluent chains.

In [None]:
# Simple chain
result = gpio.read("data/sample.parquet").add_bbox().sort_hilbert()

result.info()

In [None]:
# Complex chain with filtering
result = (
    gpio.read("data/sample.parquet")
    .extract(limit=50)
    .add_bbox()
    .add_quadkey(resolution=12)
    .sort_hilbert()
)

print(f"Columns: {result.column_names}")
print(f"Rows: {result.num_rows}")

## Reusable Pipelines with `pipe()`

Define standard processing pipelines that can be applied to any table.

In [None]:
# Define a reusable optimization pipeline
optimize = pipe(
    lambda t: t.add_bbox(),
    lambda t: t.sort_hilbert(),
)

# Apply to a file
result = optimize(read("data/sample.parquet"))
result.info()

In [None]:
# More complex pipeline with parameters
def create_enrichment_pipeline(h3_resolution=9, quadkey_resolution=12):
    return pipe(
        lambda t: t.add_bbox(),
        lambda t: t.add_h3(resolution=h3_resolution),
        lambda t: t.add_quadkey(resolution=quadkey_resolution),
        lambda t: t.sort_hilbert(),
    )


# Create different pipelines
detailed_pipeline = create_enrichment_pipeline(h3_resolution=9)
coarse_pipeline = create_enrichment_pipeline(h3_resolution=6, quadkey_resolution=8)

# Apply the detailed pipeline
result = detailed_pipeline(read("data/sample.parquet"))
print(f"Columns: {result.column_names}")

## Using the `ops` Module

For integration with existing PyArrow workflows, use the pure functions in `ops`.

In [None]:
# Read with PyArrow directly
arrow_table = pq.read_table("data/sample.parquet")
print(f"PyArrow table: {arrow_table.num_rows} rows")

In [None]:
# Apply ops functions
arrow_table = ops.add_bbox(arrow_table)
arrow_table = ops.add_quadkey(arrow_table, resolution=12)
arrow_table = ops.sort_hilbert(arrow_table)

print(f"Columns: {arrow_table.column_names}")

In [None]:
# Wrap result in Table for proper GeoParquet output
table = Table(arrow_table)
table.info()

## Converting Between APIs

In [None]:
# From gpio.Table to PyArrow
table = gpio.read("data/sample.parquet")
arrow_table = table.to_arrow()
print(f"Arrow table: {type(arrow_table)}")

# From PyArrow to gpio.Table
table_again = Table(arrow_table)
print(f"GPIO table: {type(table_again)}")

## Conditional Processing

In [None]:
def smart_optimize(input_path):
    """Apply different processing based on data characteristics."""
    table = gpio.read(input_path)

    # Always add bbox and sort
    result = table.add_bbox().sort_hilbert()

    # Add H3 for larger datasets (useful for later partitioning)
    if table.num_rows > 100:
        result = result.add_h3(resolution=9)

    return result


result = smart_optimize("data/sample.parquet")
print(f"Columns: {result.column_names}")

## Error Handling

In [None]:
def safe_process(input_path, output_path):
    """Process a file with error handling."""
    try:
        gpio.read(input_path).add_bbox().sort_hilbert().write(output_path)
        return True, None
    except Exception as e:
        return False, str(e)


success, error = safe_process("data/sample.parquet", "/tmp/safe_output.parquet")
print(f"Success: {success}")

## Next Steps

- [03_spatial_indices.ipynb](03_spatial_indices.ipynb) - Understanding spatial indices
- [04_partitioning.ipynb](04_partitioning.ipynb) - Partitioning large datasets