# Create Catalog, Schema / Database and Table

In [0]:
dbutils.fs.rm("dbfs:/user/hive/warehouse/quickstart_schema.db/users",True)

Out[3]: True

In [0]:
%sql

CREATE SCHEMA IF NOT EXISTS quickstart_schema;

col_name,data_type,comment
id,int,
name,string,
dob,date,
email,string,
gender,string,
country,string,
region,string,
city,string,
asset,int,
marital_status,string,


## Transaction 00 - Create Table

## Internal Table

In [0]:
%sql
CREATE TABLE IF NOT EXISTS quickstart_schema.users(
  id INT,
  name STRING,
  dob DATE,
  email STRING,
  gender STRING,
  country STRING,
  region STRING,
  city STRING,
  asset INT,
  marital_status STRING
);
DESCRIBE EXTENDED quickstart_schema.users;

## External Table

In [0]:
%sql
CREATE TABLE IF NOT EXISTS quickstart_schema.users_ext(
  id INT,
  name STRING,
  dob DATE,
  email STRING,
  gender STRING,
  country STRING,
  region STRING,
  city STRING,
  asset INT,
  marital_status STRING
) USING CSV
LOCATION "dbfs:/FileStore/synechron/users";

In [0]:
%sql
DESCRIBE FORMATTED quickstart_schema.users_ext;

col_name,data_type,comment
id,int,
name,string,
dob,date,
email,string,
gender,string,
country,string,
region,string,
city,string,
asset,int,
marital_status,string,


# Transaction 01 -  Load data into Delta Table

In [0]:
spark.read.csv(
    path="dbfs:/FileStore/synechron/user_dataset/users_001.csv",
    header=True,
    inferSchema=True,
).write.mode("overwrite").saveAsTable("quickstart_schema.users")

# Read Delta Table

In [0]:
spark.read.table("quickstart_schema.users").limit(4).display()

# List Transaction History



## Approach 01

In [0]:
%sql

DESCRIBE HISTORY quickstart_schema.users;

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
2,2024-12-06T04:41:29.000+0000,6836536383695527,naveenpn.trainer@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1565450580302794),1206-034735-h2b69thq,1.0,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 247, numOutputBytes -> 15871)",,Databricks-Runtime/12.2.x-scala2.12
1,2024-12-06T03:58:41.000+0000,6836536383695527,naveenpn.trainer@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1565450580302794),1206-034735-h2b69thq,0.0,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 500, numOutputBytes -> 28541)",,Databricks-Runtime/12.2.x-scala2.12
0,2024-12-06T03:53:06.000+0000,6836536383695527,naveenpn.trainer@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1565450580302794),1206-034735-h2b69thq,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


## Approach 02

In [0]:
from delta.tables import DeltaTable
table_name = "quickstart_schema.users"
delta_table = DeltaTable.forName(spark,table_name)
delta_table.history().display()

version,timestamp,userId,userName,operation,operationParameters,job,notebook,clusterId,readVersion,isolationLevel,isBlindAppend,operationMetrics,userMetadata,engineInfo
1,2024-12-06T03:58:41.000+0000,6836536383695527,naveenpn.trainer@gmail.com,CREATE OR REPLACE TABLE AS SELECT,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1565450580302794),1206-034735-h2b69thq,0.0,WriteSerializable,False,"Map(numFiles -> 1, numOutputRows -> 500, numOutputBytes -> 28541)",,Databricks-Runtime/12.2.x-scala2.12
0,2024-12-06T03:53:06.000+0000,6836536383695527,naveenpn.trainer@gmail.com,CREATE TABLE,"Map(isManaged -> true, description -> null, partitionBy -> [], properties -> {})",,List(1565450580302794),1206-034735-h2b69thq,,WriteSerializable,True,Map(),,Databricks-Runtime/12.2.x-scala2.12


# Read specific versions

In [0]:
%sql

SELECT * from quickstart_schema.users VERSION AS OF 1 LIMIT 4

id,name,dob,email,gender,country,region,city,asset,marital_status
1,Heather Gibbs,2024-10-31,heathergibbs6243@gmail.com,Female,United States,Virginia,Virginia Beach,734388,Married
2,Herrod Petersen,2024-02-19,herrodpetersen@yahoomail.com,Male,United States,Arizona,Phoenix,113506,Single
3,Ocean Workman,2024-10-10,oceanworkman2328@ymail.com,Male,United States,Tennessee,Clarksville,139985,Married
4,Xaviera Maxwell,2025-03-09,xavieramaxwell@gmail.com,Transgender,United States,Ohio,Cleveland,511409,Married


# Partitioning DELTA Table

In [0]:
%sql
CREATE TABLE IF NOT EXISTS quickstart_schema.users_partitioned(
  id INT,
  name STRING,
  dob DATE,
  email STRING,
  gender STRING,
  country STRING,
  region STRING,
  city STRING,
  asset INT,
  marital_status STRING
)
PARTITIONED BY (country,region,city);
DESCRIBE EXTENDED quickstart_schema.users_partitioned;

col_name,data_type,comment
id,int,
name,string,
dob,date,
email,string,
gender,string,
country,string,
region,string,
city,string,
asset,int,
marital_status,string,


In [0]:

spark.read.csv(
    path="dbfs:/FileStore/synechron/user_dataset/users_001.csv",
    header=True,
    inferSchema=True,
).write.format("delta").mode("overwrite").partitionBy("country","region","city").saveAsTable("quickstart_schema.users_partitioned")

In [0]:
from pyspark.sql.functions import col
spark.read.table("quickstart_schema.users_partitioned").filter(col("country")=="India").limit(4).display()

id,name,dob,email,gender,country,region,city,asset,marital_status
7,Chantale Nixon,2023-09-06,chantalenixon404@ymail.com,Female,India,Andaman and Nicobar Islands,Port Blair,717994,Married
146,Vladimir Pierce,2025-03-04,vladimirpierce@ymail.com,Female,India,Andaman and Nicobar Islands,Port Blair,312067,Married
208,Jonas Decker,2023-05-08,jonasdecker@gmail.com,Male,India,Andaman and Nicobar Islands,Port Blair,967919,Common Law
268,Cedric Spencer,2023-05-07,cedricspencer5169@ymail.com,Transgender,India,Andaman and Nicobar Islands,Port Blair,667070,Single


# Transaction 02 - Update

In [0]:
spark.read.csv(
    path="dbfs:/FileStore/synechron/user_dataset/users_001.csv",
    header=True,
    inferSchema=True,
).filter(col("country") == "India").write.format("delta").mode("overwrite").saveAsTable(
    "quickstart_schema.users"
)

In [0]:
spark.read.table("quickstart_schema.users").display()

id,name,dob,email,gender,country,region,city,asset,marital_status
5,Bo Underwood,2024-06-30,bounderwood@ymail.com,Male,India,Tamil Nadu,Madurai,366783,Married
7,Chantale Nixon,2023-09-06,chantalenixon404@ymail.com,Female,India,Andaman and Nicobar Islands,Port Blair,717994,Married
8,Ashton Willis,2025-02-22,ashtonwillis@gmail.com,Male,India,Dadra and Nagar Haveli,Silvassa,483841,Married
9,Mercedes Lawrence,2024-08-15,mercedeslawrence4115@ymail.com,Male,India,Punjab,Gujranwala,598378,Single
10,Norman Patton,2024-02-25,normanpatton6042@yahoomail.com,Transgender,India,Chhattisgarh,Durg,412938,Married
11,Pamela Farmer,2023-09-13,pamelafarmer7734@gmail.com,Female,India,Pondicherry,Pondicherry,961667,Common Law
13,Marvin Joseph,2025-02-19,marvinjoseph@ymail.com,Transgender,India,Tamil Nadu,Chennai,458643,Divorced
17,Dominique Horton,2023-06-11,dominiquehorton@gmail.com,Transgender,India,Bihar,Muzaffarpur,813996,Married
19,Len Mooney,2023-04-30,lenmooney@yahoomail.com,Transgender,India,Tamil Nadu,Thanjavur,406887,Single
22,Blythe Bryan,2023-11-08,blythebryan@yahoomail.com,Male,India,Jammu and Kashmir,Jammu,283680,Divorced


# Time Travel

In [0]:
spark.read.option("timestampAsOf","2024-12-06T03:58:41").table("quickstart_schema.users").limit(4).display()

id,name,dob,email,gender,country,region,city,asset,marital_status
1,Heather Gibbs,2024-10-31,heathergibbs6243@gmail.com,Female,United States,Virginia,Virginia Beach,734388,Married
2,Herrod Petersen,2024-02-19,herrodpetersen@yahoomail.com,Male,United States,Arizona,Phoenix,113506,Single
3,Ocean Workman,2024-10-10,oceanworkman2328@ymail.com,Male,United States,Tennessee,Clarksville,139985,Married
4,Xaviera Maxwell,2025-03-09,xavieramaxwell@gmail.com,Transgender,United States,Ohio,Cleveland,511409,Married


In [0]:
%sql
DESCRIBE DETAIL quickstart_schema.users;

format,id,name,description,location,createdAt,lastModified,partitionColumns,numFiles,sizeInBytes,properties,minReaderVersion,minWriterVersion,tableFeatures,statistics
delta,df196364-a89f-40cc-b95f-06bd8566c696,spark_catalog.quickstart_schema.users,,dbfs:/user/hive/warehouse/quickstart_schema.db/users,2024-12-06T03:53:03.452+0000,2024-12-06T04:41:29.000+0000,List(),1,15871,Map(),1,2,"List(appendOnly, invariants)",Map()
