### Create a Table

In [2]:
import pyarrow as pa

schema = pa.schema([
    ('order_id', pa.int32()),
    ('order_type', pa.string()),
    ('price', pa.float64())
])

data = [
    [1, 'online', 20.03],
    [2, 'instore', 19.44],
    [3, 'instore', 28.45],
    [4, 'online', 45.04]
]

table = pa.Table.from_pylist([{
        'order_id': row[0],
        'order_type': row[1],
        'price': row[2]
    }
for row in data], schema=schema)

print(table)

pyarrow.Table
order_id: int32
order_type: string
price: double
----
order_id: [[1,2,3,4]]
order_type: [["online","instore","instore","online"]]
price: [[20.03,19.44,28.45,45.04]]


### Filter with a simple mask

In [3]:
mask = [True, False, False, True]

print(table.filter(mask))

pyarrow.Table
order_id: int32
order_type: string
price: double
----
order_id: [[1,4]]
order_type: [["online","online"]]
price: [[20.03,45.04]]


### Filtering on a Criteria

In [4]:
mask_dy = []
for row in data:
    if row[2] >= 21:
        mask_dy.append(True)
    else:
        mask_dy.append(False)

print(table.filter(mask_dy))

pyarrow.Table
order_id: int32
order_type: string
price: double
----
order_id: [[3,4]]
order_type: [["instore","online"]]
price: [[28.45,45.04]]


### Use PyArrow Compute

In [5]:
import pyarrow.compute as pc

price_mask = pc.greater_equal(table['price'],21)
print(table.filter(price_mask))

pyarrow.Table
order_id: int32
order_type: string
price: double
----
order_id: [[3,4]]
order_type: [["instore","online"]]
price: [[28.45,45.04]]


### Aggregation

In [6]:
grouped = table.group_by(['order_type']).aggregate([('price','sum')])

print(grouped)

pyarrow.Table
order_type: string
price_sum: double
----
order_type: [["online","instore"]]
price_sum: [[65.07,47.89]]
