In [2]:
import pyarrow as pa
import pyarrow.compute as pc

# Here is our data
names = ['Alice', 'Bob', 'Charlie', 'David']
ages = [25, 31, 46, 19]

# Create a PyArrow schema for the table
schema = pa.schema([
    ('name', pa.string()),
    ('age', pa.int32())
])

# Create a PyArrow array for each column
name_array = pa.array(names)
age_array = pa.array(ages)

# Create a PyArrow table from the arrays and schema
table = pa.Table.from_arrays([name_array, age_array], schema=schema)

print(table)

pyarrow.Table
name: string
age: int32
----
name: [["Alice","Bob","Charlie","David"]]
age: [[25,31,46,19]]


In [3]:
# Use PyArrow compute functions on the table

# Compute the sum of the age column
age_sum = pc.sum(table['age'])
print(f"Sum of ages: {age_sum}")

# Compute the maximum age in the table
age_max = pc.max(table['age'])
print(f"Maximum age: {age_max}")

# Filter the table to only include rows where age is greater than 25
condition = pc.greater(table['age'], 25)
filtered_table = table.filter(condition)

Sum of ages: 121
Maximum age: 46
