In [1]:
import pyspark
from pyspark.conf import SparkConf
from pyspark.sql import SparkSession
import pandas as pd

# This CATALOG_URL works for the "docker compose" testing and development environment
# Change 'server' if you are not running on "docker compose" (f. ex. 'localhost' if Lakekeeper is running locally).
CATALOG_URL = "http://server:8181/catalog"
WAREHOUSE = "demo"

SPARK_VERSION = pyspark.__version__
SPARK_MINOR_VERSION = '.'.join(SPARK_VERSION.split('.')[:2])
ICEBERG_VERSION = "1.6.1"

# Connect with Spark

In [2]:
config = {
    f"spark.sql.catalog.lakekeeper": "org.apache.iceberg.spark.SparkCatalog",
    f"spark.sql.catalog.lakekeeper.type": "rest",
    f"spark.sql.catalog.lakekeeper.uri": CATALOG_URL,
    f"spark.sql.catalog.lakekeeper.warehouse": WAREHOUSE,
    f"spark.sql.catalog.lakekeeper.io-impl": "org.apache.iceberg.aws.s3.S3FileIO",
    "spark.sql.extensions": "org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions",
    "spark.sql.defaultCatalog": "lakekeeper",
    "spark.jars.packages": f"org.apache.iceberg:iceberg-spark-runtime-{SPARK_MINOR_VERSION}_2.12:{ICEBERG_VERSION},org.apache.iceberg:iceberg-aws-bundle:{ICEBERG_VERSION}",
}


In [3]:
spark_config = SparkConf().setMaster('local').setAppName("Iceberg-REST")
for k, v in config.items():
    spark_config = spark_config.set(k, v)

spark = SparkSession.builder.config(conf=spark_config).getOrCreate()

spark.sql("USE lakekeeper")

DataFrame[]

## Read and Write Tables

In [4]:
spark.sql(f"CREATE NAMESPACE IF NOT EXISTS my_namespace")
spark.sql("SHOW NAMESPACES").toPandas()

Unnamed: 0,namespace
0,test_replace_view
1,my_namespace


In [5]:
data = pd.DataFrame([[1, 'a-string', 2.2]], columns=['id', 'strings', 'floats'])
sdf = spark.createDataFrame(data)

In [6]:
sdf.writeTo(f"my_namespace.my_table").createOrReplace()

In [33]:
spark.sql("create view my_namespace.v as select * from my_namespace.my_table")

DataFrame[]

In [7]:
spark.sql(f"SELECT * FROM my_namespace.my_table").toPandas()

Unnamed: 0,id,strings,floats
0,1,a-string,2.2


In [5]:
import requests

In [22]:
import requests
requests.get("http://server:8181/catalog/v1/dfc1871c-0af5-11f0-9c89-cb40547908b6/namespaces?returnUuids=true&returnProtectionStatus=true").json()

{'namespaces': [['test_replace_view'], ['my_namespace']],
 'namespace-uuids': ['0195d727-7765-70b2-a7dc-349abaeb8851',
  '0195d880-cdec-75d3-85aa-a7d1adf9052b'],
 'protection-status': [False, True]}

In [21]:
requests.post("http://server:8181/management/v1/warehouse/dfc1871c-0af5-11f0-9c89-cb40547908b6/namespace/0195d880-cdec-75d3-85aa-a7d1adf9052b/protection?protected=true")

<Response [204]>

In [23]:
requests.delete("http://server:8181/catalog/v1/dfc1871c-0af5-11f0-9c89-cb40547908b6/namespaces/my_namespace?recursive=true").content

b'{"error":{"message":"Namespace is protected","type":"NamespaceProtected","code":409,"stack":["0195d8c4-bd67-74f2-9a9d-4429d1680c9b"]}}'

In [34]:
requests.get("http://server:8181/catalog/v1/dfc1871c-0af5-11f0-9c89-cb40547908b6/namespaces/my_namespace/views?returnUuids=true&returnProtectionStatus=true").json()

{'identifiers': [{'namespace': ['my_namespace'], 'name': 'v'}],
 'table-uuids': ['0195d8c7-2462-7fc3-920e-d1cca5eb349a'],
 'protection-status': [False]}

In [37]:
requests.post("http://server:8181/management/v1/warehouse/dfc1871c-0af5-11f0-9c89-cb40547908b6/view/0195d8c7-2462-7fc3-920e-d1cca5eb349a/protection?protected=true")

<Response [204]>

In [38]:
requests.get("http://server:8181/catalog/v1/dfc1871c-0af5-11f0-9c89-cb40547908b6/namespaces/my_namespace/views?returnUuids=true&returnProtectionStatus=true").json()

{'identifiers': [{'namespace': ['my_namespace'], 'name': 'v'}],
 'table-uuids': ['0195d8c7-2462-7fc3-920e-d1cca5eb349a'],
 'protection-status': [True]}

In [40]:
spark.sql("DROP view my_namespace.v")

AnalysisException: [VIEW_NOT_FOUND] The view my_namespace.v cannot be found. Verify the spelling and correctness of the schema and catalog.
If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
To tolerate the error on drop use DROP VIEW IF EXISTS.