Self check to write a small delta table and then read it from a new spark session.

In [None]:
result = {
    "area": "spark",
    "description": "Check that spark can use delta tables.",
    "passed": False,
    "message": "",
    "plugin": "spark",
}

In [None]:
import json

from freeds.spark import get_spark_session, show_spark_info

try:
    spark = get_spark_session("self-check1")
    db_name = "selfcheck_db"
    table_name = f"{db_name}.selfcheck_tbl"

    # create some data in delta
    print(f"Dropping and recreating database {db_name}")
    spark.sql(f"DROP DATABASE IF EXISTS {db_name} CASCADE")
    spark.sql(f"CREATE DATABASE IF NOT EXISTS {db_name}")
    print(f"Writing table {table_name}")
    data = spark.range(100)
    (
        data.write.option(  # .mode("overwrite")  # Options: 'overwrite', 'append', 'ignore', 'error' (default)
            "mergeSchema", "true"
        )
        .format("delta")  # Options: 'parquet', 'csv', 'json', 'orc', etc.
        .saveAsTable(table_name)
    )
    spark.stop()

    # read some data in delta
    spark = get_spark_session("self-check2")
    data = spark.table(table_name)
    show_spark_info(spark)
    data.show(5)

    # clean up
    spark.sql(f"DROP DATABASE IF EXISTS {db_name} CASCADE")
    spark.stop()

    result["message"] = "Executed spark cell ok."
    result["passed"] = True

except Exception as ex:
    result["message"] = str(ex)
    result["passed"] = False

In [None]:
print(json.dumps(result, indent=4))