
# Prerequisites

In [0]:
user_df = spark.read.csv(path="dbfs:/FileStore/synechron/user_dataset/users_001.csv",
                         header=True,
                         inferSchema=True)
user_df.display()

id,name,dob,email,gender,country,region,city,asset,marital_status
1,Heather Gibbs,2024-10-31,heathergibbs6243@gmail.com,Female,United States,Virginia,Virginia Beach,734388,Married
2,Herrod Petersen,2024-02-19,herrodpetersen@yahoomail.com,Male,United States,Arizona,Phoenix,113506,Single
3,Ocean Workman,2024-10-10,oceanworkman2328@ymail.com,Male,United States,Tennessee,Clarksville,139985,Married
4,Xaviera Maxwell,2025-03-09,xavieramaxwell@gmail.com,Transgender,United States,Ohio,Cleveland,511409,Married
5,Bo Underwood,2024-06-30,bounderwood@ymail.com,Male,India,Tamil Nadu,Madurai,366783,Married
6,Graiden Mcleod,2023-11-09,graidenmcleod1069@ymail.com,Male,United States,Oregon,Salem,638977,Divorced
7,Chantale Nixon,2023-09-06,chantalenixon404@ymail.com,Female,India,Andaman and Nicobar Islands,Port Blair,717994,Married
8,Ashton Willis,2025-02-22,ashtonwillis@gmail.com,Male,India,Dadra and Nagar Haveli,Silvassa,483841,Married
9,Mercedes Lawrence,2024-08-15,mercedeslawrence4115@ymail.com,Male,India,Punjab,Gujranwala,598378,Single
10,Norman Patton,2024-02-25,normanpatton6042@yahoomail.com,Transgender,India,Chhattisgarh,Durg,412938,Married


# Transaction 01 - Write Data as Delta Format

In [0]:
user_df.write.format("delta").mode("overwrite").save(
    "dbfs:/FileStore/synechron/user_dataset/output_delta"
)

# Transaction 02

In [0]:
from pyspark.sql.functions import col
user_df.filter(col("city") == "Bangalore").write.format("delta").mode("overwrite").save(
    "dbfs:/FileStore/synechron/user_dataset/output_delta"
)

# Read the latest transaction data

In [0]:
spark.read.format("delta").load("dbfs:/FileStore/synechron/user_dataset/output_delta").display()

id,name,dob,email,gender,country,region,city,asset,marital_status
263,Nelle Blake,2024-09-17,nelleblake5744@ymail.com,Transgender,India,Karnataka,Bangalore,908573,Married


# Read Transaction Log

## Approach 01

In [0]:
spark.read.format("text").load(
    "dbfs:/FileStore/synechron/user_dataset/output_delta/_delta_log/00000000000000000000.json"
).display()

value
"{""commitInfo"":{""timestamp"":1733381150018,""userId"":""6836536383695527"",""userName"":""naveenpn.trainer@gmail.com"",""operation"":""WRITE"",""operationParameters"":{""mode"":""Overwrite"",""partitionBy"":""[]""},""notebook"":{""notebookId"":""2354002827294210""},""clusterId"":""1205-063609-7pn0cn52"",""isolationLevel"":""WriteSerializable"",""isBlindAppend"":false,""operationMetrics"":{""numFiles"":""1"",""numOutputRows"":""500"",""numOutputBytes"":""28541""},""engineInfo"":""Databricks-Runtime/12.2.x-scala2.12"",""txnId"":""d2bf26b4-3fd8-4b46-9044-957e108d8a6f""}}"
"{""protocol"":{""minReaderVersion"":1,""minWriterVersion"":2}}"
"{""metaData"":{""id"":""90272fbc-7c63-4699-bfb8-ea89a86b5e1e"",""format"":{""provider"":""parquet"",""options"":{}},""schemaString"":""{\""type\"":\""struct\"",\""fields\"":[{\""name\"":\""id\"",\""type\"":\""integer\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""name\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""dob\"",\""type\"":\""date\"",\""nullable\"":true,\""metadata\"":{\""__detected_date_formats\"":\""yyyy-M-d\""}},{\""name\"":\""email\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""gender\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""country\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""region\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""city\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""asset\"",\""type\"":\""integer\"",\""nullable\"":true,\""metadata\"":{}},{\""name\"":\""marital_status\"",\""type\"":\""string\"",\""nullable\"":true,\""metadata\"":{}}]}"",""partitionColumns"":[],""configuration"":{},""createdTime"":1733381139228}}"
"{""add"":{""path"":""part-00000-55037dc2-243f-4722-a0b2-e29e1dabbb0c-c000.snappy.parquet"",""partitionValues"":{},""size"":28541,""modificationTime"":1733381146000,""dataChange"":true,""stats"":""{\""numRecords\"":500,\""minValues\"":{\""id\"":1,\""name\"":\""Abbot Parker\"",\""dob\"":\""2023-03-28\"",\""email\"":\""abbotparker@ymail.com\"",\""gender\"":\""Female\"",\""country\"":\""India\"",\""region\"":\""Alabama\"",\""city\"":\""Agartala\"",\""asset\"":101599,\""marital_status\"":\""Common Law\""},\""maxValues\"":{\""id\"":500,\""name\"":\""Zorita Oliver\"",\""dob\"":\""2025-03-28\"",\""email\"":\""zoritaoliver6755@ymail.com\"",\""gender\"":\""Transgender\"",\""country\"":\""United States\"",\""region\"":\""Wyoming\"",\""city\"":\""Yamuna Nagar\"",\""asset\"":997997,\""marital_status\"":\""Single\""},\""nullCount\"":{\""id\"":0,\""name\"":0,\""dob\"":0,\""email\"":0,\""gender\"":0,\""country\"":0,\""region\"":0,\""city\"":0,\""asset\"":0,\""marital_status\"":0}}"",""tags"":{""INSERTION_TIME"":""1733381146000000"",""MIN_INSERTION_TIME"":""1733381146000000"",""MAX_INSERTION_TIME"":""1733381146000000"",""OPTIMIZE_TARGET_SIZE"":""268435456""}}}"


## Fetch all versions

In [0]:
from delta.tables import DeltaTable

delta_table = DeltaTable.forPath(spark,"dbfs:/FileStore/synechron/user_dataset/output_delta/")
delta_table.history().display()

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2024-12-05T06:54:30.000+0000,6836536383695527,naveenpn.trainer@gmail.com,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(2354002827294210),1205-063609-7pn0cn52,0.0,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1, numOutputBytes -> 3084)",,Databricks-Runtime/12.2.x-scala2.12
0,2024-12-05T06:45:51.000+0000,6836536383695527,naveenpn.trainer@gmail.com,WRITE,"Map(mode -> Overwrite, partitionBy -> [])",,List(2354002827294210),1205-063609-7pn0cn52,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 500, numOutputBytes -> 28541)",,Databricks-Runtime/12.2.x-scala2.12


# Maintains versions

In [0]:
spark.read.format("delta").option("versionAsof", 1).load(
    "dbfs:/FileStore/synechron/user_dataset/output_delta"
).display()

id,name,dob,email,gender,country,region,city,asset,marital_status
263,Nelle Blake,2024-09-17,nelleblake5744@ymail.com,Transgender,India,Karnataka,Bangalore,908573,Married
