1. Delta Automatically versions every operation that you perform . You can time travel to historical versions 
2. This versioning makes it easy to audit data changes, roll back data in case of accidental bad writes or deletes and reproduce experiments and reports

In [0]:
from pyspark.sql.types import *

In [0]:
path='dbfs:/mnt/DeltaLake/Test/SchemaManagementDelta.csv'

df = spark.read.format('csv').option('Header', True).option('inferSchema', True).load(path=path)

In [0]:
df.printSchema()

root
 |-- Education_Level: string (nullable = true)
 |-- Line_Number: integer (nullable = true)
 |-- Employed: integer (nullable = true)
 |-- Unemployed: integer (nullable = true)
 |-- Industry: string (nullable = true)
 |-- Gender: string (nullable = true)
 |-- Date_Inserted: string (nullable = true)
 |-- dense_rank: integer (nullable = true)



In [0]:
## write to delta table 

df.write.format('delta').mode('overwrite').saveAsTable('`delta`.VersionTable')

In [0]:
%sql 

select * from `delta`.versiontable

Education_Level,Line_Number,Employed,Unemployed,Industry,Gender,Date_Inserted,dense_rank
Bachelor's Degree,326,118000,2000,Agriculture,Male,6/1/1999,1
High School Diploma,195,11134000,240000,Agriculture,Female,9/1/1996,2
High School Diploma,129,675000,15000,Retail,Female,5/1/1995,3
Master's Degree,406,131500,3500,Agriculture,Male,2/1/2001,4
High School,1463,4031000,115000,Retail Trade,Male,12/1/2000,5
Bachelor's Degree,426,136900,4100,Manufacturing,Male,7/1/2001,6
Bachelor's Degree,458,136900,4100,Agriculture,Male,3/1/2002,6
Bachelor's Degree,410,136900,4100,Agriculture,Male,3/1/2001,6
High School,1462,4018000,123000,Retail Trade,Female,11/1/2000,7
Associate's Degree,507,53350,1650,Education,Male,3/1/2003,8


In [0]:
%sql 
INSERT INTO `delta`.versiontable
VALUES
    ('Bachelor', 1, 4500, 500, 'Networking', 'Male', '2023-07-12',  1),
    ('Master', 2, 6500, 500, 'Networking', 'Female', '2023-07-12', 2),
    ('High School', 3, 3500, 500, 'Networking', 'Male', '2023-07-12', 3),
    ('PhD', 4, 5500, 500, 'Networking', 'Female', '2023-07-12', 4);


num_affected_rows,num_inserted_rows
4,4


In [0]:
%sql

DESCRIBE HISTORY `delta`.VersionTable

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2024-02-27T13:53:12Z,4700893722167418,lokeswar.valluru@xyenta.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(4089641095547163),1227-053858-m1tsqg19,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 4, numOutputBytes -> 2158)",,Databricks-Runtime/13.3.x-photon-scala2.12
0,2024-02-27T13:50:19Z,4700893722167418,lokeswar.valluru@xyenta.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], description -> null, isManaged -> true, properties -> {}, statsOnLoad -> false)",,List(4089641095547163),1227-053858-m1tsqg19,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1524, numOutputBytes -> 26014)",,Databricks-Runtime/13.3.x-photon-scala2.12


In [0]:
%sql 

Update `delta`.versiontable
set Education_Level = 'PhD'
where Industry = 'Networking'

num_affected_rows
4


In [0]:
%sql

DESCRIBE HISTORY `delta`.VersionTable

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2024-02-27T13:57:26Z,4700893722167418,lokeswar.valluru@xyenta.com,UPDATE,"Map(predicate -> [""(Industry#4935 = Networking)""])",,List(4089641095547163),1227-053858-m1tsqg19,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2158, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1627, scanTimeMs -> 968, numAddedFiles -> 1, numUpdatedRows -> 4, numAddedBytes -> 2186, rewriteTimeMs -> 635)",,Databricks-Runtime/13.3.x-photon-scala2.12
1,2024-02-27T13:53:12Z,4700893722167418,lokeswar.valluru@xyenta.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(4089641095547163),1227-053858-m1tsqg19,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 4, numOutputBytes -> 2158)",,Databricks-Runtime/13.3.x-photon-scala2.12
0,2024-02-27T13:50:19Z,4700893722167418,lokeswar.valluru@xyenta.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], description -> null, isManaged -> true, properties -> {}, statsOnLoad -> false)",,List(4089641095547163),1227-053858-m1tsqg19,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1524, numOutputBytes -> 26014)",,Databricks-Runtime/13.3.x-photon-scala2.12


## Get Back to Previous version:means where No Update using versionAsOf Pyspark code

In [0]:
df_1 = spark.read.format('delta').option('versionAsOf', '1').load('dbfs:/user/hive/warehouse/delta.db/versiontable')

In [0]:
df_1.filter(df_1['Industry'] == 'Networking').select('Education_level', 'Industry').show()

+---------------+----------+
|Education_level|  Industry|
+---------------+----------+
|       Bachelor|Networking|
|         Master|Networking|
|    High School|Networking|
|            PhD|Networking|
+---------------+----------+



## Get Back to Previous version:means where No Update using versionAsOf SQL

In [0]:
%sql

select Education_Level, Industry from `delta`.versiontable version as of 1
where Industry = 'Networking'

Education_Level,Industry
Bachelor,Networking
Master,Networking
High School,Networking
PhD,Networking


## using timestampAsOf pyspark : from describe we have TimeStamp Column

In [0]:
%sql 
DESCRIBE HISTORY `delta`.versiontable

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2024-02-27T13:57:26Z,4700893722167418,lokeswar.valluru@xyenta.com,UPDATE,"Map(predicate -> [""(Industry#4935 = Networking)""])",,List(4089641095547163),1227-053858-m1tsqg19,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2158, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1627, scanTimeMs -> 968, numAddedFiles -> 1, numUpdatedRows -> 4, numAddedBytes -> 2186, rewriteTimeMs -> 635)",,Databricks-Runtime/13.3.x-photon-scala2.12
1,2024-02-27T13:53:12Z,4700893722167418,lokeswar.valluru@xyenta.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(4089641095547163),1227-053858-m1tsqg19,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 4, numOutputBytes -> 2158)",,Databricks-Runtime/13.3.x-photon-scala2.12
0,2024-02-27T13:50:19Z,4700893722167418,lokeswar.valluru@xyenta.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], description -> null, isManaged -> true, properties -> {}, statsOnLoad -> false)",,List(4089641095547163),1227-053858-m1tsqg19,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1524, numOutputBytes -> 26014)",,Databricks-Runtime/13.3.x-photon-scala2.12


In [0]:
# better to use 1 sec greater than actual value to avoid latency issues 

df_t_1 = spark.read.format('delta').option('timestampAsOf', '2024-02-27T13:54:13Z').load('dbfs:/user/hive/warehouse/delta.db/versiontable')

In [0]:
df_t_1.filter(df_t_1['Industry'] == "Networking").show()

+---------------+-----------+--------+----------+----------+------+-------------+----------+
|Education_Level|Line_Number|Employed|Unemployed|  Industry|Gender|Date_Inserted|dense_rank|
+---------------+-----------+--------+----------+----------+------+-------------+----------+
|       Bachelor|          1|    4500|       500|Networking|  Male|   2023-07-12|         1|
|         Master|          2|    6500|       500|Networking|Female|   2023-07-12|         2|
|    High School|          3|    3500|       500|Networking|  Male|   2023-07-12|         3|
|            PhD|          4|    5500|       500|Networking|Female|   2023-07-12|         4|
+---------------+-----------+--------+----------+----------+------+-------------+----------+



## using timestampAsOf sql : from describe we have TimeStamp Column

In [0]:
%sql

select * from `delta`.versiontable timestamp as of "2024-02-27T13:54:13Z"
where Industry = 'Networking'

Education_Level,Line_Number,Employed,Unemployed,Industry,Gender,Date_Inserted,dense_rank
Bachelor,1,4500,500,Networking,Male,2023-07-12,1
Master,2,6500,500,Networking,Female,2023-07-12,2
High School,3,3500,500,Networking,Male,2023-07-12,3
PhD,4,5500,500,Networking,Female,2023-07-12,4


In [0]:
%sql
-- or with date also we can do version . either prevous date if we have (But if we run here we will get error, becuse we don't have previous date)

select * from `delta`.versiontable timestamp as of '2024-02-27' where Industry = 'Networking'


## Now restore table entirly . so far we are selecting for resotre use RESTORE

In [0]:
%sql
RESTORE TABLE `delta`.versiontable TO VERSION AS OF 1

table_size_after_restore,num_of_files_after_restore,num_removed_files,num_restored_files,removed_files_size,restored_files_size
28172,2,1,1,2186,2158


In [0]:
%sql
-- check 

select * from `delta`.versiontable where Industry = 'Networking'


Education_Level,Line_Number,Employed,Unemployed,Industry,Gender,Date_Inserted,dense_rank
Bachelor,1,4500,500,Networking,Male,2023-07-12,1
Master,2,6500,500,Networking,Female,2023-07-12,2
High School,3,3500,500,Networking,Male,2023-07-12,3
PhD,4,5500,500,Networking,Female,2023-07-12,4


In [0]:
%sql
DESCRIBE HISTORY `delta`.versiontable

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
3,2024-02-27T14:18:13Z,4700893722167418,lokeswar.valluru@xyenta.com,RESTORE,"Map(version -> 1, timestamp -> null)",,List(4089641095547163),1227-053858-m1tsqg19,2.0,Serializable,False,"Map(numRestoredFiles -> 1, removedFilesSize -> 2186, numRemovedFiles -> 1, restoredFilesSize -> 2158, numOfFilesAfterRestore -> 2, tableSizeAfterRestore -> 28172)",,Databricks-Runtime/13.3.x-photon-scala2.12
2,2024-02-27T13:57:26Z,4700893722167418,lokeswar.valluru@xyenta.com,UPDATE,"Map(predicate -> [""(Industry#4935 = Networking)""])",,List(4089641095547163),1227-053858-m1tsqg19,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 2158, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 1627, scanTimeMs -> 968, numAddedFiles -> 1, numUpdatedRows -> 4, numAddedBytes -> 2186, rewriteTimeMs -> 635)",,Databricks-Runtime/13.3.x-photon-scala2.12
1,2024-02-27T13:53:12Z,4700893722167418,lokeswar.valluru@xyenta.com,WRITE,"Map(mode -> Append, statsOnLoad -> false, partitionBy -> [])",,List(4089641095547163),1227-053858-m1tsqg19,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 4, numOutputBytes -> 2158)",,Databricks-Runtime/13.3.x-photon-scala2.12
0,2024-02-27T13:50:19Z,4700893722167418,lokeswar.valluru@xyenta.com,CREATE OR REPLACE TABLE AS SELECT,"Map(partitionBy -> [], description -> null, isManaged -> true, properties -> {}, statsOnLoad -> false)",,List(4089641095547163),1227-053858-m1tsqg19,,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 1524, numOutputBytes -> 26014)",,Databricks-Runtime/13.3.x-photon-scala2.12
