#### TIME TRAVELLING AND VERSIONING IN DELTA TABLES

* Creating a database

In [0]:
%sql
create database my_db;

* Creating a table in above DB

In [0]:
%sql
create table my_db.customers
(
  custId int,
  custName string,
  salary int,
  city string
) using delta;

In [0]:
%sql
insert into my_db.customers(custId,custname,salary,city) 
values (1,'Ram',10000,'Chennai'),
(2,'Sai',20000,'Hyderabad'),
(3,'Naveen',30000,'Mumbai');

num_affected_rows,num_inserted_rows
3,3


In [0]:
%sql
describe history my_db.customers

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2025-01-28T10:00:31.000+0000,6821501072026142,rockyrams1998@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(1023446289508587),0128-095149-ehsvrvpo,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 3, numOutputBytes -> 1343)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-01-28T09:56:28.000+0000,6821501072026142,rockyrams1998@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1023446289508587),0128-095149-ehsvrvpo,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


* In the above result , it is showing the versions of table with 2 indicators (Version Number and Timestamp).
* Version 0 or Timestamp at '2025-01-28T09:56:28.000+00:00' => Table was empty (we just created).
* Version 1 or Timestamp at '2025-01-28T10:00:31.000+00:00' => Table have few records(after inserting the new records)

* Lets create another version of table by dropping a record

In [0]:
%sql
delete from my_db.customers where custID = 2;

num_affected_rows
1


In [0]:
%sql
describe history my_db.customers

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2025-01-28T10:05:47.000+0000,6821501072026142,rockyrams1998@gmail.com,DELETE,"Map(predicate -> [""(custID#1279 = 2)""])",,List(1023446289508587),0128-095149-ehsvrvpo,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1343, numCopiedRows -> 2, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 8683, numDeletedRows -> 1, scanTimeMs -> 7053, numAddedFiles -> 1, numAddedBytes -> 1314, rewriteTimeMs -> 1610)",,Databricks-Runtime/12.2.x-scala2.12
1,2025-01-28T10:00:31.000+0000,6821501072026142,rockyrams1998@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(1023446289508587),0128-095149-ehsvrvpo,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 3, numOutputBytes -> 1343)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-01-28T09:56:28.000+0000,6821501072026142,rockyrams1998@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1023446289508587),0128-095149-ehsvrvpo,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


* Version 2 will all records after custID = 2 got dropped.

##### KEY : 
After every transaction , the data in the table will be versioned properly for future reference.

#### Travelling back to older versions of data

* By default, if we query the delta table it will always pick the data from most recent version.  
* But still, we can query the data on older version of table data using either  Version Number or Timestamp value.

In [0]:
%sql
describe history my_db.customers

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2025-01-28T10:05:47.000+0000,6821501072026142,rockyrams1998@gmail.com,DELETE,"Map(predicate -> [""(custID#1279 = 2)""])",,List(1023446289508587),0128-095149-ehsvrvpo,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1343, numCopiedRows -> 2, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 8683, numDeletedRows -> 1, scanTimeMs -> 7053, numAddedFiles -> 1, numAddedBytes -> 1314, rewriteTimeMs -> 1610)",,Databricks-Runtime/12.2.x-scala2.12
1,2025-01-28T10:00:31.000+0000,6821501072026142,rockyrams1998@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(1023446289508587),0128-095149-ehsvrvpo,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 3, numOutputBytes -> 1343)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-01-28T09:56:28.000+0000,6821501072026142,rockyrams1998@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1023446289508587),0128-095149-ehsvrvpo,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


In [0]:
%sql
select * from my_db.customers version as of 1 -- Querying the data from version 2 using Version Number

custId,custName,salary,city
1,Ram,10000,Chennai
2,Sai,20000,Hyderabad
3,Naveen,30000,Mumbai


In [0]:
%sql
select * from my_db.customers timestamp as of '2025-01-28T09:56:28.000+00:00' -- Querying the data using Timestamp

custId,custName,salary,city


In [0]:
%sql
describe detail my_db.customers

format,id,name,description,location,createdAt,lastModified,partitionColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics
delta,525c1a9d-6744-4c06-b716-0ce4ddc05c9d,spark_catalog.my_db.customers,,dbfs:/user/hive/warehouse/my_db.db/customers,2025-01-28T09:56:25.428+0000,2025-01-28T10:05:47.000+0000,List(),1,1314,Map(),1,2,"List(appendOnly, invariants)",Map()


* Using spark Dataframe approach

In [0]:
df = (
    spark.read. 
    format('delta')
    .option('versionAsOf',1)
    .load('dbfs:/user/hive/warehouse/my_db.db/customers')
)

In [0]:
df.show(truncate=False)

+------+--------+------+---------+
|custId|custName|salary|city     |
+------+--------+------+---------+
|1     |Ram     |10000 |Chennai  |
|2     |Sai     |20000 |Hyderabad|
|3     |Naveen  |30000 |Mumbai   |
+------+--------+------+---------+



In [0]:
df_ts = (
    spark.read
    .option('timestampAsOf','2025-01-28T10:00:31.000+00:00')
    .table('my_db.customers')
)

In [0]:
df_ts.show(truncate=0)

+------+--------+------+---------+
|custId|custName|salary|city     |
+------+--------+------+---------+
|1     |Ram     |10000 |Chennai  |
|2     |Sai     |20000 |Hyderabad|
|3     |Naveen  |30000 |Mumbai   |
+------+--------+------+---------+



* Suppose, if want to restore our table to any one of the previous version of data, then we can use RESTORE command.

* Dropping all the records and we will restore back to version 1.

In [0]:
%sql
delete from my_db.customers where 1=1

num_affected_rows
2


In [0]:
%sql
select * from my_db.customers

custId,custName,salary,city


In [0]:
%sql
describe history my_db.customers

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
3,2025-01-28T10:17:58.000+0000,6821501072026142,rockyrams1998@gmail.com,DELETE,"Map(predicate -> [""true""])",,List(1023446289508587),0128-095149-ehsvrvpo,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1314, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 431, numDeletedRows -> 2, scanTimeMs -> 429, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 0)",,Databricks-Runtime/12.2.x-scala2.12
2,2025-01-28T10:05:47.000+0000,6821501072026142,rockyrams1998@gmail.com,DELETE,"Map(predicate -> [""(custID#1279 = 2)""])",,List(1023446289508587),0128-095149-ehsvrvpo,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1343, numCopiedRows -> 2, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 8683, numDeletedRows -> 1, scanTimeMs -> 7053, numAddedFiles -> 1, numAddedBytes -> 1314, rewriteTimeMs -> 1610)",,Databricks-Runtime/12.2.x-scala2.12
1,2025-01-28T10:00:31.000+0000,6821501072026142,rockyrams1998@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(1023446289508587),0128-095149-ehsvrvpo,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 3, numOutputBytes -> 1343)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-01-28T09:56:28.000+0000,6821501072026142,rockyrams1998@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1023446289508587),0128-095149-ehsvrvpo,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


In [0]:
%sql
RESTORE TABLE my_db.customers to version as of 1;

table_size_after_restore,num_of_files_after_restore,num_removed_files,num_restored_files,removed_files_size,restored_files_size
1343,1,0,1,0,1343


In [0]:
%sql
describe history my_db.customers

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
4,2025-01-28T10:20:59.000+0000,6821501072026142,rockyrams1998@gmail.com,RESTORE,"Map(version -> 1, timestamp -> null)",,List(1023446289508587),0128-095149-ehsvrvpo,3.0,Serializable,False,"Map(numRestoredFiles -> 1, removedFilesSize -> 0, numRemovedFiles -> 0, restoredFilesSize -> 1343, numOfFilesAfterRestore -> 1, tableSizeAfterRestore -> 1343)",,Databricks-Runtime/12.2.x-scala2.12
3,2025-01-28T10:17:58.000+0000,6821501072026142,rockyrams1998@gmail.com,DELETE,"Map(predicate -> [""true""])",,List(1023446289508587),0128-095149-ehsvrvpo,2.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1314, numCopiedRows -> 0, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 431, numDeletedRows -> 2, scanTimeMs -> 429, numAddedFiles -> 0, numAddedBytes -> 0, rewriteTimeMs -> 0)",,Databricks-Runtime/12.2.x-scala2.12
2,2025-01-28T10:05:47.000+0000,6821501072026142,rockyrams1998@gmail.com,DELETE,"Map(predicate -> [""(custID#1279 = 2)""])",,List(1023446289508587),0128-095149-ehsvrvpo,1.0,WriteSerializable,False,"Map(numRemovedFiles -> 1, numRemovedBytes -> 1343, numCopiedRows -> 2, numDeletionVectorsAdded -> 0, numDeletionVectorsRemoved -> 0, numAddedChangeFiles -> 0, executionTimeMs -> 8683, numDeletedRows -> 1, scanTimeMs -> 7053, numAddedFiles -> 1, numAddedBytes -> 1314, rewriteTimeMs -> 1610)",,Databricks-Runtime/12.2.x-scala2.12
1,2025-01-28T10:00:31.000+0000,6821501072026142,rockyrams1998@gmail.com,WRITE,"Map(mode -> Append, partitionBy -> [])",,List(1023446289508587),0128-095149-ehsvrvpo,0.0,WriteSerializable,True,"Map(numFiles -> 1, numOutputRows -> 3, numOutputBytes -> 1343)",,Databricks-Runtime/12.2.x-scala2.12
0,2025-01-28T09:56:28.000+0000,6821501072026142,rockyrams1998@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1023446289508587),0128-095149-ehsvrvpo,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


In [0]:
%sql
select * from my_db.customers

custId,custName,salary,city
1,Ram,10000,Chennai
2,Sai,20000,Hyderabad
3,Naveen,30000,Mumbai


- **END OF CONCEPT**