<div style="text-align: center; line-height: 0; padding-top: 9px;">
  <img src="https://blog.scholarnest.com/wp-content/uploads/2023/03/scholarnest-academy-scaled.jpg" alt="ScholarNest Academy" style="width: 1400px">
</div>

#####Cleanup previous runs

In [0]:
%run ../utils/cleanup

Dropping the dev catalog ...Done
Dropping the external-data ...Done


#####Setup

In [0]:
%sql
CREATE CATALOG IF NOT EXISTS dev;
CREATE DATABASE IF NOT EXISTS dev.demo_db;

CREATE OR REPLACE TABLE dev.demo_db.people(
  id INT,
  firstName STRING,
  lastName STRING,
  birthDate STRING
) USING DELTA;

INSERT OVERWRITE TABLE dev.demo_db.people
SELECT id, fname as firstName, lname as lastName, dob as birthDate
FROM JSON.`/mnt/files/dataset_ch7/people.json`;

SELECT * FROM dev.demo_db.people;

id,firstName,lastName,birthDate
101,prashant,pandey,1975-05-25
102,abdul,hamid,1986-12-28
103,M David,turner,1979-08-23
104,Kailash,Patil,1972-09-02


#####1. Delete one record from the above table using Spark SQL

In [0]:
%sql
delete from dev.demo_db.people where firstName = "M David"

num_affected_rows
1


#####2. Delete one record from the above table using API

In [0]:
from delta import DeltaTable

people_dt = DeltaTable.forName(spark, "dev.demo_db.people")
people_dt.delete("firstName = 'abdul'")

#####3. Update one record in the delta table using API

In [0]:
import pyspark.sql.functions as f
people_dt.update(
  condition = "birthDate = '1975-05-25'",
  set = { "firstName": f.initcap("firstName"), "lastName":  f.initcap("lastName") }
)

#####4. Merge the given dataframe into the delta table

In [0]:
source_df = spark.read.format("json").load("/mnt/files/dataset_ch7/people.json")
display(source_df)

dob,fname,id,lname
1975-05-25,prashant,101,pandey
1986-12-28,abdul,102,hamid
1979-08-23,M David,103,turner
1972-09-02,Kailash,104,Patil


In [0]:
(people_dt.alias("tgt")
    .merge(source_df.alias("src"), "src.id=tgt.id")
    .whenMatchedDelete(condition="tgt.firstName='Kailash' and tgt.lastName='Patil'")
    .whenMatchedUpdate(condition="tgt.id = 101", set = {"tgt.birthDate": "src.dob"})
    .whenMatchedUpdate(set = {"tgt.id": "src.id", "tgt.firstName":"src.fname", "tgt.lastName":"src.lname", "tgt.birthDate":"src.dob"})
    .whenNotMatchedInsert(values = {"tgt.id": "src.id", "tgt.firstName":"src.fname", "tgt.lastName":"src.lname", "tgt.birthDate":"src.dob"})
    .execute()
)

&copy; 2021-2023 ScholarNest Technologies Pvt. Ltd. All rights reserved.<br/>
Apache, Apache Spark, Spark and the Spark logo are trademarks of the <a href="https://www.apache.org/">Apache Software Foundation</a>.<br/>
Databricks, Databricks Cloud and the Databricks logo are trademarks of the <a href="https://www.databricks.com/">Databricks Inc</a>.<br/>
<br/>
<a href="https://www.scholarnest.com/privacy/">Privacy Policy</a> | 
<a href="https://www.scholarnest.com/terms/">Terms of Use</a> | <a href="https://www.scholarnest.com/contact/">Contact Us</a>