In [0]:
# Move the file from Workspace to DBFS
dbutils.fs.cp("file:/Workspace/Shared/sales_data.csv", "dbfs:/FileStore/sales_data.csv")

True

In [0]:
# Load the file from DBFS
df = spark.read.format("csv").option("header", "true").load("/FileStore/sales_data.csv")
df.show()

+----------+------+--------+--------+-----+
|      Date|Region| Product|Quantity|Price|
+----------+------+--------+--------+-----+
|2024-09-01| North|Widget A|      10|25.50|
|2024-09-01| South|Widget B|       5|15.75|
|2024-09-02| North|Widget A|      12|25.50|
|2024-09-02|  East|Widget C|       8|22.50|
|2024-09-03|  West|Widget A|      15|25.50|
|2024-09-03| South|Widget B|      20|15.75|
|2024-09-03|  East|Widget C|      10|22.50|
|2024-09-04| North|Widget D|       7|30.00|
|2024-09-04|  West|Widget B|       9|15.75|
+----------+------+--------+--------+-----+



In [0]:
#Read the data from the csv file
df_csv=spark.read.format("csv").option("header", "true").option("inferSchema", "true").load("dbfs:/FileStore/sales_data.csv")
df_csv.show()

+----------+------+--------+--------+-----+
|      Date|Region| Product|Quantity|Price|
+----------+------+--------+--------+-----+
|2024-09-01| North|Widget A|      10| 25.5|
|2024-09-01| South|Widget B|       5|15.75|
|2024-09-02| North|Widget A|      12| 25.5|
|2024-09-02|  East|Widget C|       8| 22.5|
|2024-09-03|  West|Widget A|      15| 25.5|
|2024-09-03| South|Widget B|      20|15.75|
|2024-09-03|  East|Widget C|      10| 22.5|
|2024-09-04| North|Widget D|       7| 30.0|
|2024-09-04|  West|Widget B|       9|15.75|
+----------+------+--------+--------+-----+



In [0]:
# Create a SQL table from the dataframe
df.write.saveAsTable("new_sales_table")

In [0]:
# read the table using spark SQL
df_table=spark.read.table("sales_table")
df_table.show()

+----------+------+--------+--------+-----+
|      Date|Region| Product|Quantity|Price|
+----------+------+--------+--------+-----+
|2024-09-01| North|Widget A|      10|25.50|
|2024-09-01| South|Widget B|       5|15.75|
|2024-09-02| North|Widget A|      12|25.50|
|2024-09-02|  East|Widget C|       8|22.50|
|2024-09-03|  West|Widget A|      15|25.50|
|2024-09-03| South|Widget B|      20|15.75|
|2024-09-03|  East|Widget C|      10|22.50|
|2024-09-04| North|Widget D|       7|30.00|
|2024-09-04|  West|Widget B|       9|15.75|
+----------+------+--------+--------+-----+



In [0]:
# Register the Delta table
spark.sql("CREATE TABLE IF NOT EXISTS delta_sales_table USING DELTA LOCATION '/Workspace/Shared/delta_sales_data'")


DataFrame[]

In [0]:
# Writing data to a Delta table with schema evolution enabled
df_csv.write.format("delta") \
    .option("mergeSchema", "true") \
    .mode("overwrite") \
    .save("/Workspace/Shared/delta_sales_data")

In [0]:
# Reading data from a Delta table
df_delta = spark.read.format("delta").load("/Workspace/Shared/delta_sales_data")
df_delta.show()


+----------+------+--------+--------+-----+
|      Date|Region| Product|Quantity|Price|
+----------+------+--------+--------+-----+
|2024-09-01| North|Widget A|      10| 25.5|
|2024-09-01| South|Widget B|       5|15.75|
|2024-09-02| North|Widget A|      12| 25.5|
|2024-09-02|  East|Widget C|       8| 22.5|
|2024-09-03|  West|Widget A|      15| 25.5|
|2024-09-03| South|Widget B|      20|15.75|
|2024-09-03|  East|Widget C|      10| 22.5|
|2024-09-04| North|Widget D|       7| 30.0|
|2024-09-04|  West|Widget B|       9|15.75|
+----------+------+--------+--------+-----+



In [0]:
# Query Delta Table with SQL
df_query = spark.sql("SELECT * FROM delta_sales_table WHERE Quantity > 10")
df_query.show()

+----------+------+--------+--------+-----+
|      Date|Region| Product|Quantity|Price|
+----------+------+--------+--------+-----+
|2024-09-02| North|Widget A|      12| 25.5|
|2024-09-03|  West|Widget A|      15| 25.5|
|2024-09-03| South|Widget B|      20|15.75|
+----------+------+--------+--------+-----+

