### Create delta tables


In [None]:
%%pyspark
df = spark.read.load('abfss://files@datalakeadk97g2.dfs.core.windows.net/products/products.csv', format='csv', header=True)
display(df.limit(10))

In [None]:
delta_table_path = "/delta/products-delta"
df.write.format("delta").save(delta_table_path)

In [None]:
from delta.tables import *
from pyspark.sql.functions import *

# Create a deltaTable object
deltaTable = DeltaTable.forPath(spark, delta_table_path)

# Update the table (reduce price of product 771 by 10%)
deltaTable.update(
    condition = "ProductID == 771",
    set = { "ListPrice": "ListPrice * 0.9" })

# View the updated data as a dataframe
deltaTable.toDF().show(10)

In [None]:
new_df = spark.read.format("delta").option("versionAsOf", 0).load(delta_table_path)
new_df.show(10)

In [None]:
deltaTable.history(10).show(20, False, True)

### Create catalog tables

In [None]:
spark.sql("CREATE DATABASE AdventureWorks")
spark.sql("CREATE TABLE AdventureWorks.ProductsExternal USING DELTA LOCATION '{0}'".format(delta_table_path))
spark.sql("DESCRIBE EXTENDED AdventureWorks.ProductsExternal").show(truncate=False)

In [None]:
%%sql

USE AdventureWorks;

SELECT * FROM ProductsExternal;

In [None]:
df.write.format("delta").saveAsTable("AdventureWorks.ProductsManaged")
spark.sql("DESCRIBE EXTENDED AdventureWorks.ProductsManaged").show(truncate=False)

In [None]:
%%sql

USE AdventureWorks;

SELECT * FROM ProductsManaged;

In [None]:
%%sql

USE AdventureWorks;

SHOW TABLES;

In [None]:
%%sql

USE AdventureWorks;

DROP TABLE IF EXISTS ProductsExternal;
DROP TABLE IF EXISTS ProductsManaged;

### Create a table using SQL

In [None]:
%%sql

USE AdventureWorks;

CREATE TABLE Products
USING DELTA
LOCATION '/delta/products-delta';

In [None]:
%%sql

USE AdventureWorks;

SELECT * FROM Products;

### Use delta tables for streaming data

In [None]:
from notebookutils import mssparkutils
from pyspark.sql.types import *
from pyspark.sql.functions import *

# Create a folder
inputPath = '/data/'
mssparkutils.fs.mkdirs(inputPath)

# Create a stream that reads data from the folder, using a JSON schema
jsonSchema = StructType([
StructField("device", StringType(), False),
StructField("status", StringType(), False)
])
iotstream = spark.readStream.schema(jsonSchema).option("maxFilesPerTrigger", 1).json(inputPath)

# Write some event data to the folder
device_data = '''{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"ok"}
{"device":"Dev2","status":"error"}
{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"error"}
{"device":"Dev2","status":"ok"}
{"device":"Dev2","status":"error"}
{"device":"Dev1","status":"ok"}'''
mssparkutils.fs.put(inputPath + "data.txt", device_data, True)
print("Source stream created...")

In [None]:
# Write the stream to a delta table
delta_stream_table_path = '/delta/iotdevicedata'
checkpointpath = '/delta/checkpoint'
deltastream = iotstream.writeStream.format("delta").option("checkpointLocation", checkpointpath).start(delta_stream_table_path)
print("Streaming to delta sink...")

In [None]:
# Read the data in delta format into a dataframe
df = spark.read.format("delta").load(delta_stream_table_path)
display(df)

In [None]:
# create a catalog table based on the streaming sink
spark.sql("CREATE TABLE IotDeviceData USING DELTA LOCATION '{0}'".format(delta_stream_table_path))

In [None]:
%%sql

SELECT * FROM IotDeviceData;

In [None]:
# Add more data to the source stream
more_data = '''{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"ok"}
{"device":"Dev1","status":"error"}
{"device":"Dev2","status":"error"}
{"device":"Dev1","status":"ok"}'''

mssparkutils.fs.put(inputPath + "more-data.txt", more_data, True)

In [None]:
%%sql

SELECT * FROM IotDeviceData;

In [None]:
deltastream.stop()

### Query a delta table from a serverless SQL pool

In [None]:
-- This is auto-generated code

SELECT
    TOP 100 *
FROM
    OPENROWSET(
        BULK 'https://datalakeadk97g2.dfs.core.windows.net/files/delta/products-delta/',
        FORMAT = 'DELTA'
    ) AS [result]


In [None]:
-- This is auto-generated code
USE AdventureWorks;

SELECT * FROM Products;