### Enable/Disable

In [None]:
%%sql
-- Create an empty table.
CREATE OR REPLACE TABLE table1(column01 int, column02 string) CLUSTER BY AUTO;

-- Enable automatic liquid clustering on an existing table,
-- including tables that previously had manually specified keys.
ALTER TABLE table1 CLUSTER BY AUTO;

-- Disable automatic liquid clustering on an existing table.
ALTER TABLE table1 CLUSTER BY NONE;

-- Disable automatic liquid clustering by setting the clustering keys
-- to chosen clustering columns or new columns.
ALTER TABLE table1 CLUSTER BY (column01, column02);

In [None]:
df = spark.read.table("table1")

df.write.format("delta").option("clusterByAuto", "true").saveAsTable(...)

# To set clustering columns and auto, which serves as a way to give a hint
# for the initial selection.
df.write.format("delta").clusterBy("clusteringColumn1", "clusteringColumn2").option("clusterByAuto",
                                                                                    "true").saveAsTable(...)

# Using DataFrameWriterV2
df.writeTo(...).using("delta").option("clusterByAuto", "true").create()

# To set clustering columns and auto, which serves as a way to give a hint
# for the initial selection.
df.writeTo(...).using("delta").clusterBy("clusteringColumn1", "clusteringColumn2").option("clusterByAuto",
                                                                                          "true").create()

# Similar syntax can also be used to set clusterByAuto for streaming tables.
spark.readStream.table("source_table").writeStream.option("clusterByAuto", "true").option("checkpointLocation", checkpointPath).toTable("target_table")

# Or to specify a hint for the clustering columns by specifying both auto and columns together
spark.readStream.table("source_table").writeStream.clusterBy("column1", "column2").option("clusterByAuto", "true").option("checkpointLocation", checkpointPath).toTable("target_table")

# Liquid Clustering Overview

This notebook demonstrates various ways to work with liquid clustering in Delta tables.

## 1. Table Creation Methods
The following sections show different approaches to create tables with clustering.

### 1.1 SQL Approach

In [None]:
%%sql
-- Basic table creation with clustering
CREATE TABLE table1(col0 INT, col1 string) CLUSTER BY (col0);

-- CTAS with clustering
CREATE EXTERNAL TABLE table2 CLUSTER BY (col0)
LOCATION 'table_location'
AS SELECT * FROM table1;

-- Copy table structure
CREATE TABLE table3 LIKE table1;

-- view describe
DESCRIBE TABLE table_name;

DESCRIBE DETAIL table_name;

### 1.2 Python API Approach

In [None]:
# Create table using DeltaTable API
(DeltaTable.create()
 .tableName("table1")
 .addColumn("col0", dataType="INT")
 .addColumn("col1", dataType="STRING")
 .clusterBy("col0")
 .execute())

# CTAS using DataFrameWriter
df = spark.read.table("table1")
df.write.clusterBy("col0").saveAsTable("table2")

# CTAS using DataFrameWriterV2
df = spark.read.table("table1")
df.writeTo("table1").using("delta").clusterBy("col0").create()

## 2. Table Alterations

In [None]:
%%sql
ALTER TABLE <table_name>
CLUSTER BY (<clustering_columns>)

## 3. Streaming with Clustering

In [None]:
%%sql
CREATE TABLE table1 (
  col0 STRING,
  col1 DATE,
  col2 BIGINT
)
CLUSTER BY (col0, col1)
TBLPROPERTIES (
  'clusterByAuto' = 'true'
);

In [None]:
(spark.readStream.table("source_table")
 .writeStream
 .clusterBy("column_name")
 .option("checkpointLocation", checkpointPath)
 .toTable("target_table"))

## 4. Auto-Clustering Management

In [None]:
%%sql
-- Create auto-clustered table
CREATE OR REPLACE TABLE table1(column01 int, column02 string) CLUSTER BY AUTO;

-- Enable auto-clustering
ALTER TABLE table1 CLUSTER BY AUTO;

-- Disable clustering
ALTER TABLE table1 CLUSTER BY NONE;

-- Manual clustering columns
ALTER TABLE table1 CLUSTER BY (column01, column02);

In [None]:
# Auto-clustering with DataFrame API
df = spark.read.table("table1")

df.write.format("delta").option("clusterByAuto", "true").saveAsTable("table_name")

# Hybrid approach with column hints
df.write.format("delta") \
    .clusterBy("clusteringColumn1", "clusteringColumn2") \
    .option("clusterByAuto", "true") \
    .saveAsTable("table_name")

# Using DataFrameWriterV2
df.writeTo("table_name").using("delta").option("clusterByAuto", "true").create()

# Streaming with auto-clustering
(spark.readStream.table("source_table")
 .writeStream
 .option("clusterByAuto", "true")
 .option("checkpointLocation", checkpointPath)
 .toTable("target_table"))

# Streaming with column hints
(spark.readStream.table("source_table")
 .writeStream
 .clusterBy("column1", "column2")
 .option("clusterByAuto", "true")
 .option("checkpointLocation", checkpointPath)
 .toTable("target_table"))


## 5. Trigger

In [None]:
%%sql
OPTIMIZE table_name;
OPTIMIZE table_name FULL; --16.0, large table not previously can take hours