In [1]:
from pyiceberg.catalog import load_catalog

# If using Amazon S3 or Azure storage
from pyiceberg.io.fsspec import FsspecFileIO

# If using Google Cloud Storage
from pyiceberg.io.pyarrow import PyArrowFileIO

In [2]:
account = "<your-account-locator>"
principal_client_id = "<your-principal-client-id>"
principal_secret = "<your-principal-secret>"

In [3]:
catalog = load_catalog(
    **{
        "type": "rest",
        "header.X-Iceberg-Access-Delegation": "vended-credentials",
        "uri": f"https://{account}.snowflakecomputing.com/polaris/api/catalog",
        "credential": f"{principal_client_id}:{principal_secret}",
        "warehouse": "<your-catalog-name>",
        "scope": "PRINCIPAL_ROLE:data_engineer",
        "token-refresh-enabled": "true",
        # Only one of below depending on your chosen object storage
        # Amazon S3
        "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"
        # Azure Storage
        "py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"
        # Google Cloud Storage
        "py-io-impl": "pyiceberg.io.pyarrow.PyArrowFileIO"
    }
)

In [4]:
# Define table schema
from pyiceberg.schema import Schema
from pyiceberg.types import NestedField, IntegerType, StringType

In [5]:
schema = Schema(
  NestedField(field_id=1, name='id', field_type=IntegerType(), required=True),
  NestedField(field_id=2, name='name', field_type=StringType(), required=True),
  NestedField(field_id=3, name='value', field_type=IntegerType(), required=True),
)

In [6]:
# Create namespace
catalog.create_namespace_if_not_exists('demo_db')

In [7]:
# Create table
iceberg_table = catalog.create_table_if_not_exists(
  identifier='demo_db.table_1',
  schema=schema
)

In [None]:
catalog.list_namespaces()

In [None]:
catalog.list_tables('demo_db')

In [None]:
iceberg_table.schema()

In [11]:

import pyarrow as pa
from pyiceberg.table import Table

# Create the PyArrow table using the Iceberg table's schema
pa_table_data = pa.Table.from_pylist([
  {'id': 1, 'name': 'metric_1', 'value': 5},
  {'id': 2, 'name': 'metric_2', 'value': 10},
  {'id': 3, 'name': 'metric_1', 'value': 5},
  {'id': 4, 'name': 'metric_2', 'value': 10},
  {'id': 5, 'name': 'metric_1', 'value': 5}
], schema=iceberg_table.schema().as_arrow())

In [12]:
iceberg_table.append(
  df=pa_table_data
)

In [None]:
iceberg_table.scan().to_arrow()

In [17]:
import pyarrow as pa
from pyiceberg.expressions import EqualTo

# Query for the row to update.
id_1_tbl = iceberg_table \
  .scan(row_filter=EqualTo('id', 1)) \
  .to_arrow()

# Determine the index of the value column and retrieve the column's field
value_column_index = id_1_tbl.column_names.index('value')
value_column_field = id_1_tbl.field(value_column_index)

# Modify the resulting PyArrow table by replacing the value column
id_1_tbl = id_1_tbl.set_column(
  value_column_index, 
  value_column_field, 
  pa.array([15], type=pa.int32()) # Ensure the data types align
)

# Update the Iceberg table by overwriting the row
iceberg_table.overwrite(
  df=id_1_tbl,
  overwrite_filter=EqualTo('id', 1)
)

In [None]:
iceberg_table.scan().to_arrow()

In [19]:
from pyiceberg.expressions import EqualTo

iceberg_table.delete(
  delete_filter=EqualTo('id', 1)
)

In [None]:
iceberg_table.scan().to_arrow()

In [27]:
with iceberg_table.update_schema() as update:
    # Rename value to measure
    update.rename_column("value", "measure")

In [None]:
iceberg_table.scan().to_arrow()