In [0]:
import pandas as pd
from pyspark.sql import SparkSession

# Create a Spark session
spark = SparkSession.builder.getOrCreate()

# Sample employee data as a Pandas DataFrame
emp_data_pd = pd.DataFrame({
    "emp_id": [1, 2, 3, 4],
    "emp_name": ["Alice", "Bob", "Charlie", "Diana"],
    "department": ["HR", "Engineering", "Finance", "Marketing"],
    "salary": [70000, 90000, 80000, 75000]
})

# Convert to Spark DataFrame
emp_data = spark.createDataFrame(emp_data_pd)

# Show the DataFrame
emp_data.show()


+------+--------+-----------+------+
|emp_id|emp_name| department|salary|
+------+--------+-----------+------+
|     1|   Alice|         HR| 70000|
|     2|     Bob|Engineering| 90000|
|     3| Charlie|    Finance| 80000|
|     4|   Diana|  Marketing| 75000|
+------+--------+-----------+------+



In [0]:
# Write the DataFrame to a Delta table (managed table in the default catalog and schema)
emp_data.write.format("delta").mode("overwrite").saveAsTable("default.emp_data")

In [0]:
%sql
-- SQL Syntax to create shallow clone
CREATE TABLE IF NOT EXISTS default.delta_shallow_clone
SHALLOW CLONE default.emp_data;

source_table_size,source_num_of_files,num_removed_files,num_copied_files,removed_files_size,copied_files_size
1638,1,0,0,0,0


In [0]:
%sql
-- SQL Syntax for deep clone
CREATE TABLE IF NOT EXISTS default.delta_deep_clone
DEEP CLONE default.emp_data;
-- or simply
CREATE TABLE IF NOT EXISTS default.delta_deep_clone
CLONE default.emp_data;

source_table_size,source_num_of_files,num_removed_files,num_copied_files,removed_files_size,copied_files_size


In [0]:
# Now insert new employee into the original table
spark.sql("INSERT INTO default.emp_data VALUES (5, 'Frank', 'Finance', 100000)")

DataFrame[num_affected_rows: bigint, num_inserted_rows: bigint]

In [0]:
# 5. Check data original table
print("Original Table:")
spark.sql("SELECT * FROM emp_data").show()

Original Table:
+------+--------+-----------+------+
|emp_id|emp_name| department|salary|
+------+--------+-----------+------+
|     1|   Alice|         HR| 70000|
|     2|     Bob|Engineering| 90000|
|     3| Charlie|    Finance| 80000|
|     4|   Diana|  Marketing| 75000|
|     5|   Frank|    Finance|100000|
+------+--------+-----------+------+



In [0]:
# Check the shallow cloned data
print("Shallow Clone:")
spark.sql("SELECT * FROM delta_shallow_clone").show()

Shallow Clone:
+------+--------+-----------+------+
|emp_id|emp_name| department|salary|
+------+--------+-----------+------+
|     1|   Alice|         HR| 70000|
|     2|     Bob|Engineering| 90000|
|     3| Charlie|    Finance| 80000|
|     4|   Diana|  Marketing| 75000|
+------+--------+-----------+------+



In [0]:
# Check the deep cloned data
print("Deep Clone:")
spark.sql("SELECT * FROM delta_deep_clone").show()

Deep Clone:
+------+--------+-----------+------+
|emp_id|emp_name| department|salary|
+------+--------+-----------+------+
|     1|   Alice|         HR| 70000|
|     2|     Bob|Engineering| 90000|
|     3| Charlie|    Finance| 80000|
|     4|   Diana|  Marketing| 75000|
+------+--------+-----------+------+



In [0]:
%sql
-- To Synchronize the changes from original table, re-create the shallow clone
CREATE OR REPLACE TABLE delta_shallow_clone SHALLOW CLONE emp_data;


source_table_size,source_num_of_files,num_removed_files,num_copied_files,removed_files_size,copied_files_size
3193,2,0,0,0,0


In [0]:
print("Shallow Clone after re-create:")
spark.sql("SELECT * FROM delta_shallow_clone").show()

Shallow Clone:
+------+--------+-----------+------+
|emp_id|emp_name| department|salary|
+------+--------+-----------+------+
|     1|   Alice|         HR| 70000|
|     2|     Bob|Engineering| 90000|
|     3| Charlie|    Finance| 80000|
|     4|   Diana|  Marketing| 75000|
|     5|   Frank|    Finance|100000|
+------+--------+-----------+------+



In [0]:
%sql
-- To synchronize the deep clone, run deep clone command again - Deep clone supports incremental synchronization
CREATE OR REPLACE TABLE delta_deep_clone DEEP CLONE emp_data;


source_table_size,source_num_of_files,num_removed_files,num_copied_files,removed_files_size,copied_files_size
3193,2,0,1,0,1555


In [0]:
print("Deep Clone after synchronization:")
spark.sql("SELECT * FROM delta_deep_clone").show()

Deep Clone after synchronization:
+------+--------+-----------+------+
|emp_id|emp_name| department|salary|
+------+--------+-----------+------+
|     1|   Alice|         HR| 70000|
|     2|     Bob|Engineering| 90000|
|     3| Charlie|    Finance| 80000|
|     4|   Diana|  Marketing| 75000|
|     5|   Frank|    Finance|100000|
+------+--------+-----------+------+

