In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, FloatType

from delta import *

In [3]:
spark = (
    SparkSession
    .builder
    .master("local[*]")
    .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.3.0")
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
    .getOrCreate()
)

In [7]:
spark

In [20]:
spark.sql(
    """
    CREATE TABLE location_delta (location_id INT, country STRING, continent STRING, population DOUBLE) USING delta
    """
)

DataFrame[]

In [21]:
spark.sql("select * from location_delta").show()

+-----------+-------+---------+----------+
|location_id|country|continent|population|
+-----------+-------+---------+----------+
+-----------+-------+---------+----------+



In [22]:
from delta.tables import DeltaTable

location = DeltaTable.forPath(spark, "./spark-warehouse/location_delta")

In [24]:
location.history().show()

+-------+--------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+----------------+------------+--------------------+
|version|           timestamp|userId|userName|   operation| operationParameters| job|notebook|clusterId|readVersion|isolationLevel|isBlindAppend|operationMetrics|userMetadata|          engineInfo|
+-------+--------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+----------------+------------+--------------------+
|      0|2025-04-21 20:58:...|  NULL|    NULL|CREATE TABLE|{partitionBy -> [...|NULL|    NULL|     NULL|       NULL|  Serializable|         true|              {}|        NULL|Apache-Spark/3.5....|
+-------+--------------------+------+--------+------------+--------------------+----+--------+---------+-----------+--------------+-------------+----------------+------------+--------------------+



In [32]:
spark.sql("INSERT INTO location_delta VALUES (1, 'Afghanistan', 'Asia', 41450000)")



DataFrame[]

In [34]:
spark.sql("select * from location_delta").show()

+-----------+-----------+---------+----------+
|location_id|    country|continent|population|
+-----------+-----------+---------+----------+
|          1|Afghanistan|     Asia|   4.145E7|
+-----------+-----------+---------+----------+



In [35]:
location.history().show(truncate=False)

+-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+-----------------------------------------------------------+------------+-----------------------------------+
|version|timestamp              |userId|userName|operation   |operationParameters                                                                           |job |notebook|clusterId|readVersion|isolationLevel|isBlindAppend|operationMetrics                                           |userMetadata|engineInfo                         |
+-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+-----------------------------------------------------------+------------+-----------------------------------+
|1  

In [36]:
spark.sql(
    """
    alter table location_delta add column people_vaccinated DOUBLE
    """
)

DataFrame[]

In [37]:
spark.sql(
    """
    select * from location_delta
    """
).show()

+-----------+-----------+---------+----------+-----------------+
|location_id|    country|continent|population|people_vaccinated|
+-----------+-----------+---------+----------+-----------------+
|          1|Afghanistan|     Asia|   4.145E7|             NULL|
+-----------+-----------+---------+----------+-----------------+



In [39]:
spark.sql(
    """
    update location_delta set people_vaccinated = 14000000  where location_id = 1
    """
)

DataFrame[num_affected_rows: bigint]

In [40]:
spark.sql(
    """
    select * from location_delta
    """
).show()

+-----------+-----------+---------+----------+-----------------+
|location_id|    country|continent|population|people_vaccinated|
+-----------+-----------+---------+----------+-----------------+
|          1|Afghanistan|     Asia|   4.145E7|            1.4E7|
+-----------+-----------+---------+----------+-----------------+



In [41]:
DeltaTable.isDeltaTable(spark, "spark-warehouse/location_delta")

True

In [42]:
spark.sql('select * from location_delta').show()

+-----------+-----------+---------+----------+-----------------+
|location_id|    country|continent|population|people_vaccinated|
+-----------+-----------+---------+----------+-----------------+
|          1|Afghanistan|     Asia|   4.145E7|            1.4E7|
+-----------+-----------+---------+----------+-----------------+



In [43]:
spark.sql("""
  DELETE FROM location_delta 
  WHERE location_id = 1
""")

DataFrame[num_affected_rows: bigint]

In [44]:
spark.sql('select * from location_delta').show()

+-----------+-------+---------+----------+-----------------+
|location_id|country|continent|population|people_vaccinated|
+-----------+-------+---------+----------+-----------------+
+-----------+-------+---------+----------+-----------------+



In [45]:
spark.sql('describe HISTORY location_delta').show(truncate=False);

+-------+-----------------------+------+--------+------------+----------------------------------------------------------------------------------------------------+----+--------+---------+-----------+--------------+-------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+------------+-----------------------------------+
|version|timestamp              |userId|userName|operation   |operationParameters                                                                                 |job |notebook|clusterId|readVersion|isolationLevel|isBlindAppend|operationMetrics                                                                                                                                                               