## Creating Managed Delta table

In [0]:
%sql
CREATE TABLE MAN_CATALOG.MAN_SCHEMA.ORDERS
(
  ORDER_ID INT,
  ORDER_NAME STRING,
  ORDER_AMOUNT DOUBLE,
  PRODUCT_ID INT
)
USING DELTA;

In [0]:
%sql
INSERT INTO man_catalog.man_schema.orders values
(101,'Biscuits',10,201),
(102,'Noodles',13,202),
(103,'Biscuits',25,203);

In [0]:
%sql
select * from man_catalog.man_schema.orders;

## Creating External Delta table

**Scenario 1**

In [0]:
%sql
CREATE TABLE MAN_CATALOG.MAN_SCHEMA.external_ORDERS
(
  ORDER_ID INT,
  ORDER_NAME STRING,
  ORDER_AMOUNT DOUBLE,
  PRODUCT_ID INT
)
USING DELTA
LOCATION 'abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/orders'

In [0]:
%sql
INSERT INTO man_catalog.man_schema.external_orders values
(101,'Biscuits',10,201),
(102,'Noodles',13,202),
(103,'Biscuits',25,203);

**Scenario 2**

In [0]:
%sql
CREATE TABLE MAN_CATALOG.MAN_SCHEMA.EXTERNAL_ORDERS_CETAS
USING DELTA
LOCATION 'abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/external_orders_cetas'
AS
SELECT * FROM man_catalog.man_schema.external_orders;

**Scenario 3**

In [0]:
df=spark.read.format("csv")\
        .option("header",True)\
        .option("inferSchema",True)\
        .load('abfss://source@mystorageaccountrachit.dfs.core.windows.net/BigMart Sales.csv')

df.write.saveAsTable('man_catalog.man_schema.Sales')

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.SALES LIMIT 1;

### Shallow Cloning & Deep Cloning

Shallow Cloning



In [0]:
%sql
CREATE TABLE MAN_CATALOG.MAN_SCHEMA.SHALLOW_CLONED_TABLE
SHALLOW CLONE MAN_CATALOG.MAN_SCHEMA.ORDERS;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.SHALLOW_CLONED_TABLE;


Until we delete the source table, there is no problem even we update the records for the source table, the shallow cloned table will remain the same

In [0]:
%sql
UPDATE MAN_CATALOG.MAN_SCHEMA.ORDERS
SET ORDER_NAME='CHIPS'
WHERE ORDER_ID='103';

In [0]:
%sql
SELECT * FROM MAN_CATALOG.MAN_SCHEMA.ORDERS;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.shallow_cloned_table;;

Deep Cloning

In [0]:
%sql
CREATE TABLE MAN_CATALOG.MAN_SCHEMA.DEEP_CLONED_ORDERS
DEEP CLONE MAN_CATALOG.MAN_SCHEMA.EXTERNAL_ORDERS;

## Time Travel and Versioning

In [0]:
%sql
DESCRIBE HISTORY man_catalog.man_schema.orders;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.orders
VERSION AS OF 1;

In [0]:
%sql
RESTORE TABLE man_catalog.man_schema.orders TO VERSION AS OF 1;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.orders;


## Vacuum

In [0]:
%sql
SET spark.databricks.delta.retentionDurationCheck.enabled = false;
VACUUM  man_catalog.man_schema.orders RETAIN 0 HOURS DRY RUN;

## OPTIMIZE AND ZORDER

In [0]:
%sql
INSERT INTO man_catalog.man_schema.external_orders values
(104,'Masala',31,205),
(105,'Bottle',13,206),
(106,'Nail cutter',3,207),
(107,'Mouse',5,209),
(108,'Mobile phone',1,210)

In [0]:
%sql
OPTIMIZE man_catalog.man_schema.external_orders;

In [0]:
%sql
OPTIMIZE man_catalog.man_schema.external_orders ZORDER BY (ORDER_ID);

### Liquid Clustering

In [0]:
%sql
CREATE TABLE man_catalog.man_schema.liq_clus_table
(
  id int,
  name varchar(20),
  price decimal(10,4)
)
USING DELTA
LOCATION 'abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/liq_clus_table'
CLUSTER BY (id);

##Schema Evolution & Schema Enforcement

In [0]:
my_data= [(1,'Drinks',10),
          (2,'Fruits',20),
          (3,'Vegetables',30),
          (4,'Meat',40)]
my_schema='id INT, category STRING, sales INT'
df=spark.createDataFrame(data=my_data,schema=my_schema)

In [0]:
df.display()

In [0]:
df.write.format('delta')\
        .mode('append')\
        .save('abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/DeltaSales')

In [0]:
df_new= df.union(spark.createDataFrame(data=[(5,'Bread',50),(6,'Bakery',60)],schema=my_schema))

In [0]:
df_new.write.format('delta')\
        .mode('append')\
        .save('abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/DeltaSales')

In [0]:
from pyspark.sql.functions import lit
df_new=df_new.withColumn('Flag',lit(1))

In [0]:
df_new.display()

Trying to write the data into delta format using same schema

In [0]:
df_new.write.format('delta')\
        .mode('append')\
        .save('abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/DeltaSales')

As we can see that we got an  error while writing the data to the Delta Lake.

To eliminate this, we need to use the functionality of schema evolution in Delta Lake to merge the schema.

In [0]:
df_new.write.format('delta')\
        .mode('append')\
        .option("mergeSchema",True)\
        .save('abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/DeltaSales')

In [0]:
df_read=spark.read.format('delta')\
                  .option('header',True)\
                  .load('abfss://mycontainer@mystorageaccountrachit.dfs.core.windows.net/DeltaSales')

In [0]:
df_read.display()

## Explicit Schema Updates

1. Add a column

In [0]:
%sql
ALTER TABLE man_catalog.man_schema.external_orders
ADD COLUMN FLAG INT;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.external_orders;

2. Add a column after

In [0]:
%sql
ALTER TABLE man_catalog.man_schema.external_orders
ADD COLUMN NEW_COLUMN STRING AFTER ORDER_ID;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.external_orders;

3. Re-ordering columns

In [0]:
%sql
ALTER TABLE man_catalog.man_schema.external_orders
ALTER COLUMN NEW_COLUMN AFTER FLAG;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.external_orders;

4. Rename Columns

In [0]:
%sql
ALTER TABLE man_catalog.man_schema.external_orders
RENAME COLUMN NEW_COLUMN TO NEW_FLAG;

In [0]:
%sql
-- Enable column mapping on the Delta table because we are making explicit schema changes which will not be implemented on the data files but the metadata.
ALTER TABLE man_catalog.man_schema.external_orders
SET TBLPROPERTIES (
    'delta.minReaderVersion' = '2',
    'delta.minWriterVersion' = '5',
    'delta.columnMapping.mode' = 'name'
);

In [0]:
%sql
--Again trying to rename the column
ALTER TABLE man_catalog.man_schema.external_orders
RENAME COLUMN NEW_COLUMN TO NEW_FLAG;

In [0]:
%sql
SELECT * FROM man_catalog.man_schema.external_orders;

### REORG COMMAND

To make the explicit schema changes into the data as well.

In [0]:
%sql
REORG TABLE man_catalog.man_schema.external_orders APPLY(PURGE);