### 4.1 Streaming Query

In [0]:
%run ./Includes/Classroom-Setup

* read streaming data

In [0]:
schema = "device STRING, ecommerce STRUCT<purchase_revenue_in_usd: DOUBLE, total_item_quantity: BIGINT, unique_items: BIGINT>, event_name STRING, event_previous_timestamp BIGINT, event_timestamp BIGINT, geo STRUCT<city: STRING, state: STRING>, items ARRAY<STRUCT<coupon: STRING, item_id: STRING, item_name: STRING, item_revenue_in_usd: DOUBLE, price_in_usd: DOUBLE, quantity: BIGINT>>, traffic_source STRING, user_first_touch_timestamp BIGINT, user_id STRING"

# stream
df = spark.readStream.schema(schema).option("maxFilesPerTrigger", 1).parquet(eventsPath)

# non-stream
# df = spark.read.schema(schema).option("maxFilesPerTrigger", 1).parquet(eventsPath)

display(df)

device,ecommerce,event_name,event_previous_timestamp,event_timestamp,geo,items,traffic_source,user_first_touch_timestamp,user_id
Windows,"List(null, null, null)",add_item,1593604013418531.0,1593604166967411,"List(Aurora, CO)","List(List(null, M_STAN_T, Standard Twin Mattress, 595.0, 595.0, 1))",google,1593603932991227,UA000000106484072
macOS,"List(null, null, null)",guest,1593792667777632.0,1593793102376026,"List(Atlanta, GA)","List(List(NEWBED10, M_STAN_Q, Standard Queen Mattress, 940.5, 1045.0, 1), List(NEWBED10, P_FOAM_S, Standard Foam Pillow, 53.1, 59.0, 1), List(NEWBED10, M_PREM_Q, Premium Queen Mattress, 1615.5, 1795.0, 1))",email,1593606662775893,UA000000106492759
Linux,"List(null, null, null)",main,,1593610121403996,"List(Uvalde, TX)",List(),google,1593610121403996,UA000000106506379
Android,"List(null, null, null)",add_item,1593595457982777.0,1593595673594309,"List(Houston, TX)","List(List(null, M_STAN_T, Standard Twin Mattress, 595.0, 595.0, 1))",email,1593595457982777,UA000000106466992
Android,"List(null, null, null)",mattresses,,1593577304912675,"List(Reno, NV)",List(),instagram,1593577304912675,UA000000106458058
Windows,"List(null, null, null)",main,,1593609793654886,"List(Phoenix, AZ)",List(),google,1593609793654886,UA000000106504949
Android,"List(null, null, null)",pillows,,1593619480937685,"List(Chicago, IL)",List(),google,1593619480937685,UA000000106555879
macOS,"List(null, null, null)",add_item,1593618267281779.0,1593618623952958,"List(San Diego, CA)","List(List(null, M_STAN_F, Standard Full Mattress, 945.0, 945.0, 1))",google,1593618267281779,UA000000106548706
Windows,"List(null, null, null)",main,,1593601683208285,"List(Muskegon, MI)",List(),google,1593601683208285,UA000000106478125
Android,"List(null, null, null)",add_item,1593594736126253.0,1593596897183596,"List(Lancaster, OH)","List(List(null, M_PREM_T, Premium Twin Mattress, 2190.0, 1095.0, 2))",youtube,1593594736126253,UA000000106466114


* check whether it is streaming or not

In [0]:
df.isStreaming

Out[5]: True

* transformation streaming data

In [0]:
from pyspark.sql.functions import col, approx_count_distinct, count

emailTrafficDF = (df.filter(col("traffic_source") == "email")
  .withColumn("mobile", col("device").isin(["iOS", "Android"]))
  .select("user_id", "event_timestamp", "mobile")
)
display(emailTrafficDF)

user_id,event_timestamp,mobile
UA000000106492759,1593793102376026,False
UA000000106466992,1593595673594309,True
UA000000106466758,1593596243792384,True
UA000000106507716,1593613199618903,True
UA000000106460658,1593587139594631,True
UA000000106527631,1593614494006388,True
UA000000106459980,1593654663757403,True
UA000000106532534,1593615408404063,False
UA000000106492591,1593606616384640,True
UA000000106499964,1593608599877382,False


* Write streaming data

In [0]:
checkpointPath = userhome + "/email_traffic/checkpoint"
outputPath = userhome + "/email_traffic/output"

devicesQuery = (emailTrafficDF.\
                writeStream.\
                outputMode("append").\
                format("parquet").\
                queryName("email_traffic_p").\
                trigger(processingTime="1 second").\
                option("checkpointLocation", checkpointPath).\
                start(outputPath)
)

* Monitor streaming query

In [0]:
devicesQuery.id

Out[8]: '3a768212-cb40-4a7c-930b-6b83955441e9'

In [0]:
devicesQuery.status

Out[9]: {'message': 'Initializing sources',
 'isDataAvailable': False,
 'isTriggerActive': False}

In [0]:
devicesQuery.awaitTermination(5)

Out[10]: False

* Stop streaming query

In [0]:
devicesQuery.stop()

In [0]:
devicesQuery.isActive

Out[12]: False

### 4.2 Aggregating Streams

In [0]:
schema = "device STRING, ecommerce STRUCT<purchase_revenue_in_usd: DOUBLE, total_item_quantity: BIGINT, unique_items: BIGINT>, event_name STRING, event_previous_timestamp BIGINT, event_timestamp BIGINT, geo STRUCT<city: STRING, state: STRING>, items ARRAY<STRUCT<coupon: STRING, item_id: STRING, item_name: STRING, item_revenue_in_usd: DOUBLE, price_in_usd: DOUBLE, quantity: BIGINT>>, traffic_source STRING, user_first_touch_timestamp BIGINT, user_id STRING"

# hourly events logged from the BedBricks website on July 3, 2020
hourlyEventsPath = "/mnt/training/ecommerce/events/events-2020-07-03.json"

df = (spark.\
      readStream.\
      schema(schema).\
      option("maxFilesPerTrigger", 1).\
      json(hourlyEventsPath)
)

display(df)

device,ecommerce,event_name,event_previous_timestamp,event_timestamp,geo,items,traffic_source,user_first_touch_timestamp,user_id,hour
iOS,"List(1075.5, 1, 1)",finalize,1593801817162695.0,1593803182518649,"List(McKinney, TX)","List(List(NEWBED10, M_STAN_K, Standard King Mattress, 1075.5, 1195.0, 1))",email,1593439231519133,UA000000106032467,19
Linux,"List(null, null, null)",cart,1593804318237854.0,1593804691399513,"List(Winter Garden, FL)","List(List(NEWBED10, M_PREM_K, Premium King Mattress, 1795.5, 1995.0, 1))",email,1593443919308326,UA000000106055757,19
Linux,"List(null, null, null)",add_item,1593803473586594.0,1593804318237854,"List(Winter Garden, FL)","List(List(NEWBED10, M_PREM_K, Premium King Mattress, 1795.5, 1995.0, 1))",email,1593443919308326,UA000000106055757,19
macOS,"List(850.5, 1, 1)",finalize,1593806248979773.0,1593806352498548,"List(Jacksonville, FL)","List(List(NEWBED10, M_STAN_F, Standard Full Mattress, 850.5, 945.0, 1))",email,1593425063951856,UA000000105991459,19
iOS,"List(null, null, null)",cart,1593806229866506.0,1593806233001836,"List(Laurel, MD)","List(List(NEWBED10, M_STAN_K, Standard King Mattress, 1075.5, 1195.0, 1))",email,1593437732970217,UA000000106025899,19
macOS,"List(null, null, null)",cc_info,1593805919578229.0,1593806248979773,"List(Jacksonville, FL)","List(List(NEWBED10, M_STAN_F, Standard Full Mattress, 850.5, 945.0, 1))",email,1593425063951856,UA000000105991459,19
Linux,"List(null, null, null)",shipping_info,1593805075989147.0,1593805141364750,"List(Winter Garden, FL)","List(List(NEWBED10, M_PREM_K, Premium King Mattress, 1795.5, 1995.0, 1))",email,1593443919308326,UA000000106055757,19
macOS,"List(null, null, null)",cart,1593804276972372.0,1593804279833526,"List(Jacksonville, FL)","List(List(NEWBED10, M_STAN_F, Standard Full Mattress, 850.5, 945.0, 1))",email,1593425063951856,UA000000105991459,19
macOS,"List(null, null, null)",shipping_info,1593802932642269.0,1593803241760836,"List(Ellisville, MO)","List(List(NEWBED10, M_STAN_Q, Standard Queen Mattress, 940.5, 1045.0, 1))",email,1593351371498734,UA000000105772187,19
macOS,"List(null, null, null)",shipping_info,1593802884816480.0,1593803225097531,"List(Portland, OR)","List(List(null, M_PREM_Q, Premium Queen Mattress, 1795.0, 1795.0, 1))",google,1593801016487011,UA000000107143337,19


In [0]:
df.isStreaming

Out[14]: True

`withWatermark()`

In [0]:
from pyspark.sql.functions import *

eventsDF = (df.\
            withColumn("createdAt", (col("event_timestamp") / 1e6).cast("timestamp")).\
            withWatermark(eventTime="createdAt", delayThreshold="2 hours")
)

display(eventsDF)

device,ecommerce,event_name,event_previous_timestamp,event_timestamp,geo,items,traffic_source,user_first_touch_timestamp,user_id,hour,createdAt
iOS,"List(1075.5, 1, 1)",finalize,1593801817162695.0,1593803182518649,"List(McKinney, TX)","List(List(NEWBED10, M_STAN_K, Standard King Mattress, 1075.5, 1195.0, 1))",email,1593439231519133,UA000000106032467,19,2020-07-03T19:06:22.518+0000
Linux,"List(null, null, null)",cart,1593804318237854.0,1593804691399513,"List(Winter Garden, FL)","List(List(NEWBED10, M_PREM_K, Premium King Mattress, 1795.5, 1995.0, 1))",email,1593443919308326,UA000000106055757,19,2020-07-03T19:31:31.399+0000
Linux,"List(null, null, null)",add_item,1593803473586594.0,1593804318237854,"List(Winter Garden, FL)","List(List(NEWBED10, M_PREM_K, Premium King Mattress, 1795.5, 1995.0, 1))",email,1593443919308326,UA000000106055757,19,2020-07-03T19:25:18.237+0000
macOS,"List(850.5, 1, 1)",finalize,1593806248979773.0,1593806352498548,"List(Jacksonville, FL)","List(List(NEWBED10, M_STAN_F, Standard Full Mattress, 850.5, 945.0, 1))",email,1593425063951856,UA000000105991459,19,2020-07-03T19:59:12.498+0000
iOS,"List(null, null, null)",cart,1593806229866506.0,1593806233001836,"List(Laurel, MD)","List(List(NEWBED10, M_STAN_K, Standard King Mattress, 1075.5, 1195.0, 1))",email,1593437732970217,UA000000106025899,19,2020-07-03T19:57:13.001+0000
macOS,"List(null, null, null)",cc_info,1593805919578229.0,1593806248979773,"List(Jacksonville, FL)","List(List(NEWBED10, M_STAN_F, Standard Full Mattress, 850.5, 945.0, 1))",email,1593425063951856,UA000000105991459,19,2020-07-03T19:57:28.979+0000
Linux,"List(null, null, null)",shipping_info,1593805075989147.0,1593805141364750,"List(Winter Garden, FL)","List(List(NEWBED10, M_PREM_K, Premium King Mattress, 1795.5, 1995.0, 1))",email,1593443919308326,UA000000106055757,19,2020-07-03T19:39:01.364+0000
macOS,"List(null, null, null)",cart,1593804276972372.0,1593804279833526,"List(Jacksonville, FL)","List(List(NEWBED10, M_STAN_F, Standard Full Mattress, 850.5, 945.0, 1))",email,1593425063951856,UA000000105991459,19,2020-07-03T19:24:39.833+0000
macOS,"List(null, null, null)",shipping_info,1593802932642269.0,1593803241760836,"List(Ellisville, MO)","List(List(NEWBED10, M_STAN_Q, Standard Queen Mattress, 940.5, 1045.0, 1))",email,1593351371498734,UA000000105772187,19,2020-07-03T19:07:21.760+0000
macOS,"List(null, null, null)",shipping_info,1593802884816480.0,1593803225097531,"List(Portland, OR)","List(List(null, M_PREM_Q, Premium Queen Mattress, 1795.0, 1795.0, 1))",google,1593801016487011,UA000000107143337,19,2020-07-03T19:07:05.097+0000


* transformation streaming data

In [0]:
spark.conf.set("spark.sql.shuffle.partitions", sc.defaultParallelism)

from pyspark.sql.functions import approx_count_distinct, hour, window

trafficDF = (eventsDF.\
             groupBy("traffic_source", window(timeColumn="createdAt", windowDuration="1 hour")).\
             agg(approx_count_distinct("user_id").alias("active_users")).\
             select("traffic_source", "active_users", hour("window.start").alias("hour")).\
#              sort("hour", ascending=True)
             sort(col("hour").asc())
)

display(trafficDF)

traffic_source,active_users,hour
youtube,479,0
facebook,1503,0
instagram,907,0
google,2523,0
email,787,0
direct,806,0
google,1852,1
facebook,868,1
email,739,1
instagram,618,1


In [0]:
display(trafficDF, streamName="hourly_traffic_p")

traffic_source,active_users,hour
youtube,479,0
facebook,1503,0
instagram,907,0
google,2523,0
email,787,0
direct,806,0
google,1852,1
facebook,868,1
email,739,1
instagram,618,1


### 4.3 Delta Lake

* __Create a Delta Table__

In [0]:
deltaPath = workingDir + "/delta-events"

# read parquet
eventsDF = spark.read.parquet(eventsPath)

# write delta file 
eventsDF.write.format("delta").mode("overwrite").save(deltaPath)

* We can also create a Delta table in the metastore

In [0]:
eventsDF.write.format("delta").mode("overwrite").saveAsTable("delta_events")

In [0]:
%sql
select *
from delta_events

device,ecommerce,event_name,event_previous_timestamp,event_timestamp,geo,items,traffic_source,user_first_touch_timestamp,user_id
macOS,"List(null, null, null)",warranty,1593878899217692.0,1593878946592107,"List(Montrose, MI)",List(),google,1593878899217692,UA000000107379500
Windows,"List(null, null, null)",press,1593876662175340.0,1593877011756535,"List(Northampton, MA)",List(),google,1593876662175340,UA000000107359357
macOS,"List(null, null, null)",add_item,1593878792892652.0,1593878815459100,"List(Salinas, CA)","List(List(null, M_STAN_T, Standard Twin Mattress, 595.0, 595.0, 1))",youtube,1593878455472030,UA000000107375547
iOS,"List(null, null, null)",mattresses,1593878178791663.0,1593878809276923,"List(Everett, MA)",List(),facebook,1593877903116176,UA000000107370581
Windows,"List(null, null, null)",mattresses,,1593878628143633,"List(Cottage Grove, MN)",List(),google,1593878628143633,UA000000107377108
Windows,"List(null, null, null)",main,,1593878634344194,"List(Medina, MN)",List(),youtube,1593878634344194,UA000000107377161
iOS,"List(null, null, null)",main,,1593877936171803,"List(Mount Pleasant, UT)",List(),direct,1593877936171803,UA000000107370851
macOS,"List(null, null, null)",main,,1593876843215329,"List(Piedmont, AL)",List(),instagram,1593876843215329,UA000000107360961
Android,"List(null, null, null)",warranty,1593878529774474.0,1593879213196400,"List(Rancho Santa Margarita, CA)",List(),instagram,1593878529774474,UA000000107376205
Windows,"List(null, null, null)",main,,1593876713246514,"List(Elyria, OH)",List(),facebook,1593876713246514,UA000000107359805


* Delta supports partitioning your data using unique values in a specified column

In [0]:
from pyspark.sql.functions import *

stateEventsDF = (eventsDF.\
                 withColumn("state", col("geo.state"))
                )

stateEventsDF.write.format("delta").mode("overwrite").partitionBy("state").option("overwriteSchema", "true").save(deltaPath)

* __Understand the Transaction Log__

In [0]:
display(dbutils.fs.ls(deltaPath))

path,name,size
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/,_delta_log/,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/part-00000-7bb99c27-7b29-4cb1-bbfc-988da58f6148-c000.snappy.parquet,part-00000-7bb99c27-7b29-4cb1-bbfc-988da58f6148-c000.snappy.parquet,74600524
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/part-00001-6822a03e-7001-4fff-a10b-5609b2f53f78-c000.snappy.parquet,part-00001-6822a03e-7001-4fff-a10b-5609b2f53f78-c000.snappy.parquet,74596237
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/part-00002-49d16c3f-2839-4957-aa2c-ffe341e9d934-c000.snappy.parquet,part-00002-49d16c3f-2839-4957-aa2c-ffe341e9d934-c000.snappy.parquet,74575576
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/part-00003-157e7911-c5ad-4675-9c71-62b254ba2098-c000.snappy.parquet,part-00003-157e7911-c5ad-4675-9c71-62b254ba2098-c000.snappy.parquet,74575459
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=AK/,state=AK/,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=AL/,state=AL/,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=AR/,state=AR/,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=AZ/,state=AZ/,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/,state=CA/,0


In [0]:
display(dbutils.fs.ls(deltaPath + "/_delta_log/"))

path,name,size
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/.s3-optimization-0,.s3-optimization-0,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/.s3-optimization-1,.s3-optimization-1,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/.s3-optimization-2,.s3-optimization-2,0
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/00000000000000000000.crc,00000000000000000000.crc,94
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/00000000000000000000.json,00000000000000000000.json,8254
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/00000000000000000001.crc,00000000000000000001.crc,96
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/_delta_log/00000000000000000001.json,00000000000000000001.json,291071


In [0]:
display(spark.read.json(deltaPath + "/_delta_log/00000000000000000000.json"))

add,commitInfo,metaData,protocol
,,,"List(1, 2)"
,,"List(1650942617155, List(parquet), 94ec2d84-4a5d-4aa1-a7ad-7582ab60faa6, List(), {""type"":""struct"",""fields"":[{""name"":""device"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""ecommerce"",""type"":{""type"":""struct"",""fields"":[{""name"":""purchase_revenue_in_usd"",""type"":""double"",""nullable"":true,""metadata"":{}},{""name"":""total_item_quantity"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""unique_items"",""type"":""long"",""nullable"":true,""metadata"":{}}]},""nullable"":true,""metadata"":{}},{""name"":""event_name"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""event_previous_timestamp"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""event_timestamp"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""geo"",""type"":{""type"":""struct"",""fields"":[{""name"":""city"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""state"",""type"":""string"",""nullable"":true,""metadata"":{}}]},""nullable"":true,""metadata"":{}},{""name"":""items"",""type"":{""type"":""array"",""elementType"":{""type"":""struct"",""fields"":[{""name"":""coupon"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""item_id"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""item_name"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""item_revenue_in_usd"",""type"":""double"",""nullable"":true,""metadata"":{}},{""name"":""price_in_usd"",""type"":""double"",""nullable"":true,""metadata"":{}},{""name"":""quantity"",""type"":""long"",""nullable"":true,""metadata"":{}}]},""containsNull"":true},""nullable"":true,""metadata"":{}},{""name"":""traffic_source"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""user_first_touch_timestamp"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""user_id"",""type"":""string"",""nullable"":true,""metadata"":{}}]})",
"List(true, 1650942888000, part-00000-7bb99c27-7b29-4cb1-bbfc-988da58f6148-c000.snappy.parquet, 74600524, {""numRecords"":2424522,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592205687627986,""event_timestamp"":1592539200194694,""geo"":{""city"":""Abbeville"",""state"":""AK""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592196947865522,""user_id"":""UA000000102357807""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":5289.0,""total_item_quantity"":5,""unique_items"":5},""event_name"":""warranty"",""event_previous_timestamp"":1593879287625340,""event_timestamp"":1593879299370625,""geo"":{""city"":""Zumbrota"",""state"":""WY""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593892583883212,""user_id"":""UA000000107499832""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":2379597,""total_item_quantity"":2379597,""unique_items"":2379597},""event_name"":0,""event_previous_timestamp"":1016253,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650942873000000, 268435456))",,,
"List(true, 1650942886000, part-00001-6822a03e-7001-4fff-a10b-5609b2f53f78-c000.snappy.parquet, 74596237, {""numRecords"":2424505,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592199621810965,""event_timestamp"":1592539202466157,""geo"":{""city"":""Abbeville"",""state"":""AK""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592196947865522,""user_id"":""UA000000102357807""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":4985.0,""total_item_quantity"":5,""unique_items"":5},""event_name"":""warranty"",""event_previous_timestamp"":1593884557091316,""event_timestamp"":1593879299750326,""geo"":{""city"":""Zumbrota"",""state"":""WY""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593892583883212,""user_id"":""UA000000107499832""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":2379217,""total_item_quantity"":2379217,""unique_items"":2379217},""event_name"":0,""event_previous_timestamp"":1016578,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650942873000001, 268435456))",,,
"List(true, 1650942873000, part-00002-49d16c3f-2839-4957-aa2c-ffe341e9d934-c000.snappy.parquet, 74575576, {""numRecords"":2424537,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592197539430780,""event_timestamp"":1592539216262230,""geo"":{""city"":""Abbeville"",""state"":""AK""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592196947865522,""user_id"":""UA000000102357807""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":5485.0,""total_item_quantity"":5,""unique_items"":4},""event_name"":""warranty"",""event_previous_timestamp"":1593892853177619,""event_timestamp"":1593879299756928,""geo"":{""city"":""Zumbrota"",""state"":""WY""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593892583883212,""user_id"":""UA000000107499832""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":2379415,""total_item_quantity"":2379415,""unique_items"":2379415},""event_name"":0,""event_previous_timestamp"":1017843,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650942873000002, 268435456))",,,
"List(true, 1650942888000, part-00003-157e7911-c5ad-4675-9c71-62b254ba2098-c000.snappy.parquet, 74575459, {""numRecords"":2424186,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592204888344383,""event_timestamp"":1592539202702440,""geo"":{""city"":""Abbeville"",""state"":""AK""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592197275580686,""user_id"":""UA000000102357841""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":5830.0,""total_item_quantity"":5,""unique_items"":5},""event_name"":""warranty"",""event_previous_timestamp"":1593879293771819,""event_timestamp"":1593879299923863,""geo"":{""city"":""Zumbrota"",""state"":""WY""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593892583883212,""user_id"":""UA000000107499832""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":2378843,""total_item_quantity"":2378843,""unique_items"":2378843},""event_name"":0,""event_previous_timestamp"":1017912,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650942873000003, 268435456))",,,
,"List(0426-013321-7nf4oz3u, false, WriteSerializable, List(1338300120007481), WRITE, List(4, 298347796, 9697750), List(Overwrite, []), 1650942894261, 8983615237679342, zhicheng.l@worldlink-us.com)",,


In [0]:
display(spark.read.json(deltaPath + "/_delta_log/00000000000000000001.json"))

add,commitInfo,metaData,remove
,,"List(1650942617155, List(parquet), 94ec2d84-4a5d-4aa1-a7ad-7582ab60faa6, List(state), {""type"":""struct"",""fields"":[{""name"":""device"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""ecommerce"",""type"":{""type"":""struct"",""fields"":[{""name"":""purchase_revenue_in_usd"",""type"":""double"",""nullable"":true,""metadata"":{}},{""name"":""total_item_quantity"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""unique_items"",""type"":""long"",""nullable"":true,""metadata"":{}}]},""nullable"":true,""metadata"":{}},{""name"":""event_name"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""event_previous_timestamp"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""event_timestamp"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""geo"",""type"":{""type"":""struct"",""fields"":[{""name"":""city"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""state"",""type"":""string"",""nullable"":true,""metadata"":{}}]},""nullable"":true,""metadata"":{}},{""name"":""items"",""type"":{""type"":""array"",""elementType"":{""type"":""struct"",""fields"":[{""name"":""coupon"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""item_id"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""item_name"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""item_revenue_in_usd"",""type"":""double"",""nullable"":true,""metadata"":{}},{""name"":""price_in_usd"",""type"":""double"",""nullable"":true,""metadata"":{}},{""name"":""quantity"",""type"":""long"",""nullable"":true,""metadata"":{}}]},""containsNull"":true},""nullable"":true,""metadata"":{}},{""name"":""traffic_source"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""user_first_touch_timestamp"",""type"":""long"",""nullable"":true,""metadata"":{}},{""name"":""user_id"",""type"":""string"",""nullable"":true,""metadata"":{}},{""name"":""state"",""type"":""string"",""nullable"":true,""metadata"":{}}]})",
"List(true, 1650943301000, List(AK), state=AK/part-00000-227a9f0b-d174-4b35-80a7-9793f3624166.c000.snappy.parquet, 58811, {""numRecords"":1655,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":59.0,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592323580173801,""event_timestamp"":1592545676942458,""geo"":{""city"":""Akutan"",""state"":""AK""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592323572704674,""user_id"":""UA000000102702972""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":2290.0,""total_item_quantity"":2,""unique_items"":2},""event_name"":""warranty"",""event_previous_timestamp"":1593878868803435,""event_timestamp"":1593879245394745,""geo"":{""city"":""Wasilla"",""state"":""AK""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879245394745,""user_id"":""UA000000107382708""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":1626,""total_item_quantity"":1626,""unique_items"":1626},""event_name"":0,""event_previous_timestamp"":673,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000000, 268435456))",,,
"List(true, 1650943308000, List(AL), state=AL/part-00000-b78ce5ca-20ae-4979-8ff6-e5c4658c7fc1.c000.snappy.parquet, 1112497, {""numRecords"":36648,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592216134896907,""event_timestamp"":1592540545809692,""geo"":{""city"":""Abbeville"",""state"":""AL""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592198812458125,""user_id"":""UA000000102358054""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":4444.0,""total_item_quantity"":4,""unique_items"":4},""event_name"":""warranty"",""event_previous_timestamp"":1593879185063377,""event_timestamp"":1593879292720486,""geo"":{""city"":""York"",""state"":""AL""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593883964023919,""user_id"":""UA000000107426715""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":35988,""total_item_quantity"":35988,""unique_items"":35988},""event_name"":0,""event_previous_timestamp"":15542,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000001, 268435456))",,,
"List(true, 1650943312000, List(AR), state=AR/part-00000-9d90ff2a-debc-4280-a597-ec5e849335cd.c000.snappy.parquet, 775580, {""numRecords"":25249,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592239626395357,""event_timestamp"":1592539371264803,""geo"":{""city"":""Alexander"",""state"":""AR""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592216541175409,""user_id"":""UA000000102372976""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":3411.0,""total_item_quantity"":3,""unique_items"":3},""event_name"":""warranty"",""event_previous_timestamp"":1593879177492859,""event_timestamp"":1593879235320337,""geo"":{""city"":""Yellville"",""state"":""AR""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879183831629,""user_id"":""UA000000107382134""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":24762,""total_item_quantity"":24762,""unique_items"":24762},""event_name"":0,""event_previous_timestamp"":10615,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000002, 268435456))",,,
"List(true, 1650943317000, List(AZ), state=AZ/part-00000-ba86302a-8215-4165-ad22-85977eccd6c5.c000.snappy.parquet, 2075693, {""numRecords"":69000,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592225813398359,""event_timestamp"":1592539318804749,""geo"":{""city"":""Apache Junction"",""state"":""AZ""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592201848205824,""user_id"":""UA000000102358714""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":3723.0,""total_item_quantity"":5,""unique_items"":5},""event_name"":""warranty"",""event_previous_timestamp"":1593879208563901,""event_timestamp"":1593879281222851,""geo"":{""city"":""Yuma"",""state"":""AZ""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879281222851,""user_id"":""UA000000107383063""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":67686,""total_item_quantity"":67686,""unique_items"":67686},""event_name"":0,""event_previous_timestamp"":28826,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000003, 268435456))",,,
"List(true, 1650943353000, List(CA), state=CA/part-00000-842f1770-08f5-49d1-a79e-f054db2043d5.c000.snappy.parquet, 12803957, {""numRecords"":437474,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592213931491149,""event_timestamp"":1592539217839800,""geo"":{""city"":""Adelanto"",""state"":""CA""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592196947865522,""user_id"":""UA000000102357807""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":4351.5,""total_item_quantity"":5,""unique_items"":5},""event_name"":""warranty"",""event_previous_timestamp"":1593879260019806,""event_timestamp"":1593879299089066,""geo"":{""city"":""Yucaipa"",""state"":""CA""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879299089066,""user_id"":""UA000000107383219""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":429323,""total_item_quantity"":429323,""unique_items"":429323},""event_name"":0,""event_previous_timestamp"":183426,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000004, 268435456))",,,
"List(true, 1650943359000, List(CO), state=CO/part-00000-1587c25b-45ff-4aea-a217-842187ccf185.c000.snappy.parquet, 1549796, {""numRecords"":50945,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592233695548490,""event_timestamp"":1592539205571717,""geo"":{""city"":""Alamosa"",""state"":""CO""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592221777404201,""user_id"":""UA000000102387004""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":3790.0,""total_item_quantity"":5,""unique_items"":4},""event_name"":""warranty"",""event_previous_timestamp"":1593879205509353,""event_timestamp"":1593879291840284,""geo"":{""city"":""Yuma"",""state"":""CO""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879291840284,""user_id"":""UA000000107383148""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":50015,""total_item_quantity"":50015,""unique_items"":50015},""event_name"":0,""event_previous_timestamp"":21377,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000005, 268435456))",,,
"List(true, 1650943361000, List(CT), state=CT/part-00000-453f26ef-6782-4441-ae70-7a5c54e04931.c000.snappy.parquet, 542625, {""numRecords"":17742,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592222390079786,""event_timestamp"":1592540001955600,""geo"":{""city"":""Ansonia"",""state"":""CT""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592220972573615,""user_id"":""UA000000102384413""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":3987.0,""total_item_quantity"":4,""unique_items"":3},""event_name"":""warranty"",""event_previous_timestamp"":1593879228646929,""event_timestamp"":1593879293101959,""geo"":{""city"":""West Haven"",""state"":""CT""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879293101959,""user_id"":""UA000000107383161""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":17428,""total_item_quantity"":17428,""unique_items"":17428},""event_name"":0,""event_previous_timestamp"":7482,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000006, 268435456))",,,
"List(true, 1650943362000, List(DC), state=DC/part-00000-eaa2095a-3498-41e4-82ff-31864a72c8b9.c000.snappy.parquet, 281559, {""numRecords"":9302,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":53.1,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592230966458779,""event_timestamp"":1592543247210594,""geo"":{""city"":""Washington"",""state"":""DC""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592230961080476,""user_id"":""UA000000102428200""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":3904.0,""total_item_quantity"":4,""unique_items"":4},""event_name"":""warranty"",""event_previous_timestamp"":1593879279827803,""event_timestamp"":1593879284190905,""geo"":{""city"":""Washington"",""state"":""DC""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879279827803,""user_id"":""UA000000107383051""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":9127,""total_item_quantity"":9127,""unique_items"":9127},""event_name"":0,""event_previous_timestamp"":3948,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000007, 268435456))",,,
"List(true, 1650943363000, List(DE), state=DE/part-00000-3ad8c5ad-417d-4bf8-9639-150930b7e3ea.c000.snappy.parquet, 76114, {""numRecords"":2239,""minValues"":{""device"":""Android"",""ecommerce"":{""purchase_revenue_in_usd"":59.0,""total_item_quantity"":1,""unique_items"":1},""event_name"":""add_item"",""event_previous_timestamp"":1592487055859296,""event_timestamp"":1592542982225078,""geo"":{""city"":""Delaware City"",""state"":""DE""},""traffic_source"":""direct"",""user_first_touch_timestamp"":1592322212297264,""user_id"":""UA000000102694167""},""maxValues"":{""device"":""macOS"",""ecommerce"":{""purchase_revenue_in_usd"":1795.0,""total_item_quantity"":2,""unique_items"":2},""event_name"":""warranty"",""event_previous_timestamp"":1593878446179035,""event_timestamp"":1593879002936312,""geo"":{""city"":""Wilmington"",""state"":""DE""},""traffic_source"":""youtube"",""user_first_touch_timestamp"":1593879002936312,""user_id"":""UA000000107380422""},""nullCount"":{""device"":0,""ecommerce"":{""purchase_revenue_in_usd"":2200,""total_item_quantity"":2200,""unique_items"":2200},""event_name"":0,""event_previous_timestamp"":973,""event_timestamp"":0,""geo"":{""city"":0,""state"":0},""items"":0,""traffic_source"":0,""user_first_touch_timestamp"":0,""user_id"":0}}, List(1650943300000008, 268435456))",,,


In [0]:
display(dbutils.fs.ls(deltaPath + "/state=CA/"))

path,name,size
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00000-842f1770-08f5-49d1-a79e-f054db2043d5.c000.snappy.parquet,part-00000-842f1770-08f5-49d1-a79e-f054db2043d5.c000.snappy.parquet,12803957
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00001-4b22bab6-ae0a-4a59-a3ee-d5e64b267c2b.c000.snappy.parquet,part-00001-4b22bab6-ae0a-4a59-a3ee-d5e64b267c2b.c000.snappy.parquet,12847990
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00002-e0e1ee5e-c3f0-4c7f-b2a8-30853b9b5b29.c000.snappy.parquet,part-00002-e0e1ee5e-c3f0-4c7f-b2a8-30853b9b5b29.c000.snappy.parquet,12824231
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00003-22bbc25f-cf5b-41a9-86e6-1f9deb45140d.c000.snappy.parquet,part-00003-22bbc25f-cf5b-41a9-86e6-1f9deb45140d.c000.snappy.parquet,12834506


* Read and Update your Delta table

In [0]:
df = spark.read.format("delta").load(deltaPath)

df_update = stateEventsDF.filter(col("device").isin(["Android", "iOS"]))

df_update.write.format("delta").mode("overwrite").save(deltaPath)

In [0]:
display(dbutils.fs.ls(deltaPath + "/state=CA/"))

path,name,size
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00000-842f1770-08f5-49d1-a79e-f054db2043d5.c000.snappy.parquet,part-00000-842f1770-08f5-49d1-a79e-f054db2043d5.c000.snappy.parquet,12803957
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00000-f2598abd-91c6-4f54-affc-e8b89b5300ac.c000.snappy.parquet,part-00000-f2598abd-91c6-4f54-affc-e8b89b5300ac.c000.snappy.parquet,5738470
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00001-4b22bab6-ae0a-4a59-a3ee-d5e64b267c2b.c000.snappy.parquet,part-00001-4b22bab6-ae0a-4a59-a3ee-d5e64b267c2b.c000.snappy.parquet,12847990
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00001-f52f6f6e-9cb3-4f57-9849-1b6a559975f4.c000.snappy.parquet,part-00001-f52f6f6e-9cb3-4f57-9849-1b6a559975f4.c000.snappy.parquet,5757076
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00002-3884c576-452c-4110-a7cd-99c64f49456e.c000.snappy.parquet,part-00002-3884c576-452c-4110-a7cd-99c64f49456e.c000.snappy.parquet,5736201
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00002-e0e1ee5e-c3f0-4c7f-b2a8-30853b9b5b29.c000.snappy.parquet,part-00002-e0e1ee5e-c3f0-4c7f-b2a8-30853b9b5b29.c000.snappy.parquet,12824231
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00003-22bbc25f-cf5b-41a9-86e6-1f9deb45140d.c000.snappy.parquet,part-00003-22bbc25f-cf5b-41a9-86e6-1f9deb45140d.c000.snappy.parquet,12834506
dbfs:/user/zhicheng.l@worldlink-us.com/spark_programming/spark_ch4/p/delta-events/state=CA/part-00003-9eabb777-c500-4ee3-9f51-edb695b5d54e.c000.snappy.parquet,part-00003-9eabb777-c500-4ee3-9f51-edb695b5d54e.c000.snappy.parquet,5731766


* Access previous versions of table using Time Travel

In [0]:
spark.sql("DROP TABLE IF EXISTS train_delta")
spark.sql(f"CREATE TABLE train_delta USING DELTA LOCATION '{deltaPath}'")

Out[28]: DataFrame[]

In [0]:
%sql
DESCRIBE HISTORY train_delta

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata
2,2022-04-26T03:28:57.000+0000,8983615237679342,zhicheng.l@worldlink-us.com,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(1338300120007481),0426-013321-7nf4oz3u,1.0,WriteSerializable,False,"Map(numFiles -> 200, numOutputRows -> 4361535, numOutputBytes -> 132262981)",
1,2022-04-26T03:25:41.000+0000,8983615237679342,zhicheng.l@worldlink-us.com,WRITE,"Map(mode -> Overwrite, partitionBy -> [""state""])",,List(1338300120007481),0426-013321-7nf4oz3u,0.0,WriteSerializable,False,"Map(numFiles -> 200, numOutputRows -> 9697750, numOutputBytes -> 291969219)",
0,2022-04-26T03:14:55.000+0000,8983615237679342,zhicheng.l@worldlink-us.com,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(1338300120007481),0426-013321-7nf4oz3u,,WriteSerializable,False,"Map(numFiles -> 4, numOutputRows -> 9697750, numOutputBytes -> 298347796)",


In [0]:
# Using the versionAsOf option allows you to easily access previous versions of our Delta Table.
df = spark.read.format("delta").option("versionAsOf", 0).load(deltaPath)
display(df)

device,ecommerce,event_name,event_previous_timestamp,event_timestamp,geo,items,traffic_source,user_first_touch_timestamp,user_id
macOS,"List(null, null, null)",warranty,1593878899217692.0,1593878946592107,"List(Montrose, MI)",List(),google,1593878899217692,UA000000107379500
Windows,"List(null, null, null)",press,1593876662175340.0,1593877011756535,"List(Northampton, MA)",List(),google,1593876662175340,UA000000107359357
macOS,"List(null, null, null)",add_item,1593878792892652.0,1593878815459100,"List(Salinas, CA)","List(List(null, M_STAN_T, Standard Twin Mattress, 595.0, 595.0, 1))",youtube,1593878455472030,UA000000107375547
iOS,"List(null, null, null)",mattresses,1593878178791663.0,1593878809276923,"List(Everett, MA)",List(),facebook,1593877903116176,UA000000107370581
Windows,"List(null, null, null)",mattresses,,1593878628143633,"List(Cottage Grove, MN)",List(),google,1593878628143633,UA000000107377108
Windows,"List(null, null, null)",main,,1593878634344194,"List(Medina, MN)",List(),youtube,1593878634344194,UA000000107377161
iOS,"List(null, null, null)",main,,1593877936171803,"List(Mount Pleasant, UT)",List(),direct,1593877936171803,UA000000107370851
macOS,"List(null, null, null)",main,,1593876843215329,"List(Piedmont, AL)",List(),instagram,1593876843215329,UA000000107360961
Android,"List(null, null, null)",warranty,1593878529774474.0,1593879213196400,"List(Rancho Santa Margarita, CA)",List(),instagram,1593878529774474,UA000000107376205
Windows,"List(null, null, null)",main,,1593876713246514,"List(Elyria, OH)",List(),facebook,1593876713246514,UA000000107359805


In [0]:
salesDF.write.format("delta").save(deltaSalesPath)

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-2288738603029960>[0m in [0;36m<module>[0;34m[0m
[0;32m----> 1[0;31m [0msalesDF[0m[0;34m.[0m[0mwrite[0m[0;34m.[0m[0mformat[0m[0;34m([0m[0;34m"delta"[0m[0;34m)[0m[0;34m.[0m[0msave[0m[0;34m([0m[0mdeltaSalesPath[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
[0;31mNameError[0m: name 'salesDF' is not defined

`size()` array size

In [0]:
from pyspark.sql.functions import size

updatedSalesDF = (salesDF.\
                 withColumn("items", size("items")))
display(updatedSalesDF)



In [0]:
updatedSalesDF.write.format('delta').mode("overwrite").option("overwriteSchema", "true").save(deltaSalesPath)



In [0]:
spark.sql("DROP TABLE IF EXISTS sales_delta")
spark.sql("CREATE TABLE sales_delta USING DELTA LOCATION '{}'".format(deltaSalesPath))



__sql table -> dataframe__

* 1
%sql
select *
from sales_delta

* 2
display(spark.sql("DESCRIBE HISTORY sales_delta"))

* 3
display(spark.table("sales_delta"))

Time travel to read previous version. Read delta table at deltaSalesPath at version 0

In [0]:
oldSalesDF = spark.read.format("delta").option("versionAsOf", 0).load(deltaSalesPath)
display(oldSalesDF)

