In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import StructField, StructType, IntegerType, StringType, FloatType, BooleanType

schema = StructType([
    StructField('empid', IntegerType()), 
    StructField('empname', StringType()),
    StructField('salary', IntegerType()),
    StructField('department', StringType())
])

In [0]:
data = [(600, 'sam2', 9898, 'Data Analyst'),
        (300, 'gowri', 34949, 'Data engineer')]

In [0]:
spark = SparkSession.builder.appName('scd').getOrCreate()

In [0]:
dfx = spark.createDataFrame(data, schema=schema)

In [0]:
df2.show()

+-----+-------+------+-------------+
|empid|empname|salary|   department|
+-----+-------+------+-------------+
|  600|   sam2|  9898| Data Analyst|
|  300|  gowri| 34949|Data engineer|
+-----+-------+------+-------------+



In [0]:
%sql
CREATE OR REPLACE TABLE employee_delta_table (
    empid INT,
    empname STRING,
    salary INT,
    department STRING
)
USING DELTA
LOCATION 'dbfs:/mnt/employee_delta_table';


In [0]:
%sql
CREATE OR REPLACE TABLE employee_delta_table (
    empid INT,
    empname STRING,
    salary INT,
    department STRING
)
USING DELTA
LOCATION '/mnt/datalake/employee_delta_table';


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
400,mahima,30000,Data Engineer


In [0]:
df.createOrReplaceTempView('emp')

In [0]:
%sql
select * from emp

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
%sql
insert into employee_delta_table values (200, 'mahima', 50000, 'Engineer')

num_affected_rows,num_inserted_rows
1,1


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
400,mahima,30000,Data Engineer
200,mahima,50000,Engineer


In [0]:
%sql
select * from emp

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
%sql
merge into employee_delta_table as target
using emp as source
on target.empid = source.empid
when matched then
  update set
    target.empname = source.empname,
    target.salary = source.salary,
    target.department = source.department
when not matched then
  INSERT (empid, empname, salary, department)
  values (source.empid, source.empname, source.salary, source.department);


num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,1,0,2


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Engineer
400,mahima,30000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
%sql
truncate table employee_delta_table

In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department


In [0]:
%sql
merge into employee_delta_table as target
using emp as source
on target.empid = source.empid
when matched then
  update set
    target.empname = source.empname,
    target.salary = source.salary,
    target.department = source.department
when not matched then
  INSERT (empid, empname, salary, department)
  values (source.empid, source.empname, source.salary, source.department);

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,0,0,3


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
df2.createOrReplaceTempView('updated_emp')

In [0]:
%sql
select * from updated_emp

empid,empname,salary,department
100,mahendra,65000,Data Scientist
200,mahi,45000,Data Analyst
300,sam,59000,Data Analyst


In [0]:
%sql
merge into employee_delta_table as target
using updated_emp as source
on target.empid = source.empid
when matched then
update set target.empname = source.empname,
  target.salary = source.salary,
  target.department = source.department
when not matched then
insert (empid, empname, salary, department) values (source.empid, source.empname, source.salary, source.department)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,3,0,0


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Scientist
200,mahi,45000,Data Analyst
300,sam,59000,Data Analyst


In [0]:
%sql
merge into employee_delta_table as target
using emp as source
on target.empid = source.empid
when matched then
  update set
    target.empname = source.empname,
    target.salary = source.salary,
    target.department = source.department
when not matched then
  INSERT (empid, empname, salary, department)
  values (source.empid, source.empname, source.salary, source.department);

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,3,0,0


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
from delta.tables import DeltaTable

In [0]:
delta_df = DeltaTable.forPath(spark, "dbfs:/mnt/employee_delta_table")

In [0]:
df2.show()

+-----+--------+------+--------------+
|empid| empname|salary|    department|
+-----+--------+------+--------------+
|  100|mahendra| 65000|Data Scientist|
|  200|    mahi| 45000|  Data Analyst|
|  300|     sam| 59000|  Data Analyst|
+-----+--------+------+--------------+



In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
from delta.tables import DeltaTable
from pyspark.sql import SparkSession

# Create Spark session
spark = SparkSession.builder \
    .appName("Merge DF2 into Delta Table") \
    .getOrCreate()

# Define path or table name for delta target
delta_table_path = "dbfs:/mnt/employee_delta_table"  # or use "delta_table_name" if using metastore

# Load delta table
delta_table = DeltaTable.forPath(spark, delta_table_path)

delta_table.alias("target").merge(
    source=df2.alias("source"),
    condition="target.empid = source.empid"
).whenMatchedUpdate(set={
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).whenNotMatchedInsert(values={
    "empid": "source.empid",
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).execute()


Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/databricks/python/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3378, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<command-807025702974571>", line 7, in <module>
    _sqldf = ____databricks_percent_sql()
  File "<command-807025702974571>", line 4, in ____databricks_percent_sql
    df = spark.sql(base64.standard_b64decode("c2VsZWN0ICogZnJvbSBkZWx0YV90YWJsZQ==").decode())
  File "/databricks/spark/python/pyspark/instrumentation_utils.py", line 48, in wrapper
    res = func(*args, **kwargs)
  File "/databricks/spark/python/pyspark/sql/session.py", line 1387, in sql
    return DataFrame(self._jsparkSession.sql(sqlQuery, litArgs), self)
  File "/databricks/spark/python/lib/py4j-0.10.9.5-src.zip/py4j/java_gateway.py", line 1321, in __call__
    return_value = get_return_value(
  File "/databricks/spark/python/pyspark/errors/exceptions.py", line 234, in deco
    raise converted from None
pyspark.er



In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
df2.show()

+-----+-------+------+-------------+
|empid|empname|salary|   department|
+-----+-------+------+-------------+
|  600|   sam2|  9898| Data Analyst|
|  300|  gowri| 34949|Data engineer|
+-----+-------+------+-------------+



In [0]:
from delta.tables import DeltaTable
from pyspark.sql import SparkSession

# Define path or table name for delta target
delta_table_path = "dbfs:/mnt/employee_delta_table"  # or use "delta_table_name" if using metastore

# Load delta table
delta_table = DeltaTable.forPath(spark, delta_table_path)

delta_table.alias("target").merge(
    source=dfx.alias("source"),
    condition="target.empid = source.empid"
).whenMatchedUpdate(set={
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).whenNotMatchedInsert(values={
    "empid": "source.empid",
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).execute()


In [0]:
%sql
select * from employee_delta_table

empid,empname,salary,department
100,mahendra,65000,Data Engineer
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst


In [0]:
from pyspark.sql import SparkSession
from delta.tables import DeltaTable

# ---------------------------------------
# STEP 1: Create initial DataFrame
data_initial = [
    (101, "Alice", 60000, "HR"),
    (102, "Bob", 70000, "Finance"),
    (103, "Charlie", 80000, "IT")
]

columns = ["empid", "empname", "salary", "department"]
df11 = spark.createDataFrame(data_initial, columns)

# STEP 2: Write df1 as Delta table
delta_path = "dbfs:/mnt/employee_delta_table1"
df11.write.format("delta").mode("overwrite").save(delta_path)

# ---------------------------------------
# STEP 3: Create second DataFrame with updates + new record
data_updates = [
    (102, "Bob", 75000, "Finance"),         # Updated salary
    (104, "Diana", 90000, "Marketing")      # New employee
]
df2 = spark.createDataFrame(data_updates, columns)

# ---------------------------------------
# STEP 4: Merge df2 into Delta table
delta_table = DeltaTable.forPath(spark, delta_path)



In [0]:
df11.show()

+-----+-------+------+----------+
|empid|empname|salary|department|
+-----+-------+------+----------+
|  101|  Alice| 60000|        HR|
|  102|    Bob| 70000|   Finance|
|  103|Charlie| 80000|        IT|
+-----+-------+------+----------+



In [0]:

delta_table.alias("target").merge(
    source=df2.alias("source"),
    condition="target.empid = source.empid"
).whenMatchedUpdate(set={
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).whenNotMatchedInsert(values={
    "empid": "source.empid",
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).execute()

# ---------------------------------------
# STEP 5: Display final results
delta_table.toDF().show()

+-----+-------+------+----------+
|empid|empname|salary|department|
+-----+-------+------+----------+
|  104|  Diana| 90000| Marketing|
|  102|    Bob| 75000|   Finance|
|  103|Charlie| 80000|        IT|
|  101|  Alice| 60000|        HR|
+-----+-------+------+----------+



[0;31m---------------------------------------------------------------------------[0m
[0;31mAnalysisException[0m                         Traceback (most recent call last)
File [0;32m<command-807025702974579>:7[0m
[1;32m      5[0m     display(df)
[1;32m      6[0m     [38;5;28;01mreturn[39;00m df
[0;32m----> 7[0m   _sqldf [38;5;241m=[39m [43m____databricks_percent_sql[49m[43m([49m[43m)[49m
[1;32m      8[0m [38;5;28;01mfinally[39;00m:
[1;32m      9[0m   [38;5;28;01mdel[39;00m ____databricks_percent_sql

File [0;32m<command-807025702974579>:4[0m, in [0;36m____databricks_percent_sql[0;34m()[0m
[1;32m      2[0m [38;5;28;01mdef[39;00m [38;5;21m____databricks_percent_sql[39m():
[1;32m      3[0m   [38;5;28;01mimport[39;00m [38;5;21;01mbase64[39;00m
[0;32m----> 4[0m   df [38;5;241m=[39m [43mspark[49m[38;5;241;43m.[39;49m[43msql[49m[43m([49m[43mbase64[49m[38;5;241;43m.[39;49m[43mstandard_b64decode[49m[43m([49m[38;5;124;43m"[39;49

In [0]:
data1 = [
    (1, "Alice", 60000, "HR"),
    (2, "Bob", 70000, "Finance"),
    (3, "Charlie", 80000, "IT")
]
columns = ["empid", "name", "salary", "department"]

df1 = spark.createDataFrame(data1, columns)
df1.createOrReplaceTempView("emp_staging")


In [0]:
%sql
CREATE OR REPLACE TABLE employee_delta_table12 (
    empid INT,
    empname STRING,
    salary INT,
    department STRING
)
USING DELTA
LOCATION 'dbfs:/mnt/employee_delta_table12';


In [0]:
%sql
select * from employee_delta_table12

empid,empname,salary,department


In [0]:
df1.createOrReplaceTempView('emp')

In [0]:
%sql
merge into employee_delta_table12 as target 
using emp as source
on target.empid = source.empid
when matched then
update set 
  target.empname =source.name,
  target.salary = source.salary,
  target.department = source.department
when not matched then
insert (empid, empname, salary, department) values (
  source.empid, source.name, source.salary, source.department
)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
3,0,0,3


In [0]:
%sql
select * from emp

empid,name,salary,department
1,Alice,60000,HR
2,Bob,70000,Finance
3,Charlie,80000,IT


In [0]:
%sql
select * from employee_delta_table12

empid,empname,salary,department
2,Bob,70000,Finance
3,Charlie,80000,IT
1,Alice,60000,HR


In [0]:
dfx = spark.createDataFrame([(4, 'mahendra', 55000, 'Data engineer'),
                            (2, 'mahi', 60000, 'Data Analyst')], schema = schema)

In [0]:
dfx.show()

+-----+--------+------+-------------+
|empid| empname|salary|   department|
+-----+--------+------+-------------+
|    4|mahendra| 55000|Data engineer|
|    2|    mahi| 60000| Data Analyst|
+-----+--------+------+-------------+



In [0]:
dfx.createOrReplaceTempView('emp1')

In [0]:
%sql
select * from emp1

empid,empname,salary,department
4,mahendra,55000,Data engineer
2,mahi,60000,Data Analyst


In [0]:
%sql
select * from employee_delta_table12

empid,empname,salary,department
2,Bob,70000,Finance
3,Charlie,80000,IT
1,Alice,60000,HR


In [0]:
%sql
select * from emp1

empid,empname,salary,department
4,mahendra,55000,Data engineer
2,mahi,60000,Data Analyst


In [0]:
%sql
merge into employee_delta_table12 as t
using emp1 as s 
on t.empid = s.empid 
when matched then
  update set t.empname = s.empname, t.salary = s.salary, t.department =s.department
when not matched then
  insert (empid, empname, salary, department) values (s.empid, s.empname, s.salary,s.department)

num_affected_rows,num_updated_rows,num_deleted_rows,num_inserted_rows
2,1,0,1


In [0]:
%sql
select * from employee_delta_table12

empid,empname,salary,department
4,mahendra,55000,Data engineer
2,mahi,60000,Data Analyst
3,Charlie,80000,IT
1,Alice,60000,HR


In [0]:
df.show()

+-----+--------+------+-------------+
|empid| empname|salary|   department|
+-----+--------+------+-------------+
|  100|mahendra| 65000|Data Engineer|
|  200|    mahi| 45000| Data Analyst|
|  300|     sam| 56000| Data Analyst|
+-----+--------+------+-------------+



In [0]:
from delta.tables import DeltaTable
delta_table = DeltaTable.forPath(spark,'dbfs:/mnt/employee_delta_table12')

In [0]:
delta_table.alias("target").merge(
    source=df.alias("source"),
    condition="target.empid = source.empid"
).whenMatchedUpdate(set={
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).whenNotMatchedInsert(values={
    "empid": "source.empid",
    "empname": "source.empname",
    "salary": "source.salary",
    "department": "source.department"
}).execute()

In [0]:
%sql
select * from employee_delta_table12

empid,empname,salary,department
4,mahendra,55000,Data engineer
100,mahendra,65000,Data Engineer
2,mahi,60000,Data Analyst
200,mahi,45000,Data Analyst
300,sam,56000,Data Analyst
3,Charlie,80000,IT
1,Alice,60000,HR
