## Example: Create and use Pyspark-session.

In [1]:
import os
if "SPARK_REMOTE" in os.environ:
    del os.environ["SPARK_REMOTE"]
if "SPARK_LOCAL" in os.environ:
    del os.environ["SPARK_LOCAL"]
    
try:
    from pyspark.sql import SparkSession
    if hasattr(SparkSession.Builder, "_validate_startup_urls"):
        original_method = SparkSession.Builder._validate_startup_urls
        SparkSession.Builder._validate_startup_urls = lambda self: None
except Exception:
    pass

In [2]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("local-uc-test") \
    .master("local[*]") \
    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.1,io.unitycatalog:unitycatalog-spark_2.12:0.2.1") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "io.unitycatalog.spark.UCSingleCatalog") \
    .config("spark.sql.catalog.unity", "io.unitycatalog.spark.UCSingleCatalog") \
    .config("spark.sql.catalog.unity.uri", "http://server:8080") \
    .config("spark.sql.catalog.unity.token", "") \
    .config("spark.sql.catalog.my_catalog", "io.unitycatalog.spark.UCSingleCatalog") \
    .config("spark.sql.catalog.my_catalog.uri", "http://server:8080") \
    .config("spark.sql.catalog.my_catalog.token", "") \
    .config("spark.sql.defaultCatalog", "unity") \
    .config("spark.databricks.delta.catalog.update.enabled", "true") \
    .enableHiveSupport() \
    .getOrCreate()

try:
    if "original_method" in locals():
        SparkSession.Builder._validate_startup_urls = original_method
except:
    pass

:: loading settings :: url = jar:file:/opt/spark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /root/.ivy2/cache
The jars for the packages stored in: /root/.ivy2/jars
io.delta#delta-spark_2.12 added as a dependency
io.unitycatalog#unitycatalog-spark_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-678745b5-5c06-498b-ba5a-558aef3c3dc5;1.0
	confs: [default]
	found io.delta#delta-spark_2.12;3.2.1 in central
	found io.delta#delta-storage;3.2.1 in central
	found org.antlr#antlr4-runtime;4.9.3 in central
	found io.unitycatalog#unitycatalog-spark_2.12;0.2.1 in central
	found io.unitycatalog#unitycatalog-client;0.2.1 in central
	found org.slf4j#slf4j-api;2.0.13 in central
	found org.apache.logging.log4j#log4j-slf4j2-impl;2.23.1 in central
	found org.apache.logging.log4j#log4j-api;2.23.1 in central
	found org.apache.logging.log4j#log4j-core;2.23.1 in central
	found com.fasterxml.jackson.datatype#jackson-datatype-jsr310;2.17.0 in central
	found org.openapitools#jackson-databind-nullable;0.2.6 in central
	found com.googl

In [None]:
# Review the configuration
spark.sql('SHOW CATALOGS').show()
spark.sql("SHOW TABLES IN unity.default").show()
spark.sql("DESCRIBE EXTENDED unity.default.employees").show(truncate=False)
df = spark.table("unity.default.employees")
df.show()

+-------------+
|      catalog|
+-------------+
|spark_catalog|
|        unity|
+-------------+

+---------+-----------------+-----------+
|namespace|        tableName|isTemporary|
+---------+-----------------+-----------+
|  default|        employees|      false|
|  default|       employees2|      false|
|  default|        marksheet|      false|
|  default|marksheet_uniform|      false|
|  default|          numbers|      false|
|  default|   user_countries|      false|
+---------+-----------------+-----------+

+----------------------------+------------------------------------------------------------------------+-----------------------------------+
|col_name                    |data_type                                                               |comment                            |
+----------------------------+------------------------------------------------------------------------+-----------------------------------+
|employee_id                 |int                             

25/06/26 02:00:14 WARN SparkStringUtils: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

+-----------+----------+---------+--------------------+-----------+-------------------+-------+
|employee_id|first_name|last_name|               email| department|          hire_date| salary|
+-----------+----------+---------+--------------------+-----------+-------------------+-------+
|          5|     David|    Brown|david.brown@compa...|Engineering|2023-04-05 00:00:00|80000.0|
|          3|      Mike|  Johnson|mike.johnson@comp...|      Sales|2023-03-10 00:00:00|72000.0|
|          2|      Jane|    Smith|jane.smith@compan...|  Marketing|2023-02-20 00:00:00|68000.0|
|          1|      John|      Doe|john.doe@company.com|Engineering|2023-01-15 00:00:00|75000.0|
|          4|     Sarah|   Wilson|sarah.wilson@comp...|         HR|2023-01-25 00:00:00|65000.0|
+-----------+----------+---------+--------------------+-----------+-------------------+-------+



In [4]:
display(df.toPandas())

Unnamed: 0,employee_id,first_name,last_name,email,department,hire_date,salary
0,5,David,Brown,david.brown@company.com,Engineering,2023-04-05,80000.0
1,3,Mike,Johnson,mike.johnson@company.com,Sales,2023-03-10,72000.0
2,2,Jane,Smith,jane.smith@company.com,Marketing,2023-02-20,68000.0
3,1,John,Doe,john.doe@company.com,Engineering,2023-01-15,75000.0
4,4,Sarah,Wilson,sarah.wilson@company.com,HR,2023-01-25,65000.0


In [5]:
df.write.mode("overwrite").option("header", "true").csv("employees_spark.csv")