## **Configure Native Execution Engine**

In [None]:
%%configure
{
    "conf": {
        "spark.gluten.enabled": "true",
        "spark.shuffle.manager": "org.apache.spark.shuffle.sort.ColumnarShuffleManager"
    }
}

## **Optimize Delta Tables**

In [3]:
# Get the filtered subdirectory names for "Tables"
filtered_tables = [table.name for table in spark.catalog.listTables()]
partial_match = ['fact', 'dim'] # Partial match strings
# filtered_tables = [table for table in filtered_tables if any(keyword in table for keyword in partial_match)] # Filter for Conatains
filtered_tables = [table for table in filtered_tables if any(table.startswith(keyword) for keyword in partial_match)] # Filter for Starts With

# Iterate over the filtered tables and alter their properties
for table_name in filtered_tables:
    try:
        # Attempt to run the VACUUM command
        spark.sql(f'''VACUUM {table_name};''')
        print(f"[I] VACUUM completed for table {table_name}")
    except Exception as e:
        print(f"[E] Error during VACUUM for table {table_name}: {e}")

    try:
        # Attempt to run the OPTIMIZE command
        spark.sql(f'''OPTIMIZE {table_name};''')
        print(f"[I] OPTIMIZE completed for table {table_name}")
    except Exception as e:
        print(f"[E] Error during OPTIMIZE for table {table_name}: {e}")

    try:
        # Attempt to set table properties
        spark.sql(f'''ALTER TABLE {table_name} SET TBLPROPERTIES("delta.parquet.vorder.enabled" = "true");''')
        print(f"[I] ALTER TABLE properties set for table {table_name}")
    except Exception as e:
        print(f"[E] Error setting table properties for table {table_name}: {e}")

StatementMeta(, b4c1210d-3d25-4d9a-8329-6cfae8f9b33e, 4, Finished, Available, Finished)

[I] VACUUM completed for table dim_budget_2024
[I] OPTIMIZE completed for table dim_budget_2024
[I] ALTER TABLE properties set for table dim_budget_2024
[I] VACUUM completed for table dim_products_gold
[I] OPTIMIZE completed for table dim_products_gold
[I] ALTER TABLE properties set for table dim_products_gold
[I] VACUUM completed for table dim_customers_gold
[I] OPTIMIZE completed for table dim_customers_gold
[I] ALTER TABLE properties set for table dim_customers_gold
[I] VACUUM completed for table fact_sales_gold
[I] OPTIMIZE completed for table fact_sales_gold
[I] ALTER TABLE properties set for table fact_sales_gold
[I] VACUUM completed for table dim_unitofmeasure_gold
[I] OPTIMIZE completed for table dim_unitofmeasure_gold
[I] ALTER TABLE properties set for table dim_unitofmeasure_gold
[I] VACUUM completed for table dim_productcategory_gold
[I] OPTIMIZE completed for table dim_productcategory_gold
[I] ALTER TABLE properties set for table dim_productcategory_gold
[I] VACUUM complete

In [4]:
# List tables in the catalog and filter them in one step
filtered_tables = [table.name for table in spark.catalog.listTables()]

# Iterate over the filtered tables and alter their properties
for table_name in filtered_tables:
    display(spark.sql(f'''DESCRIBE HISTORY {table_name};'''))

StatementMeta(, b4c1210d-3d25-4d9a-8329-6cfae8f9b33e, 5, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 06817d3d-1e7b-4231-a57d-308b3e1e09f3)

SynapseWidget(Synapse.DataFrame, 36763584-0b7e-4822-9de4-1937008d71a1)

SynapseWidget(Synapse.DataFrame, cdeab591-9041-4b3d-97e2-d5e4bc1d7ba6)

SynapseWidget(Synapse.DataFrame, 923ac30c-e541-4154-a2f0-99f2a163a6bf)

SynapseWidget(Synapse.DataFrame, 1f10f8be-dce9-4af4-928c-9d283b1bd8ca)

SynapseWidget(Synapse.DataFrame, 482f8072-8d1b-4c59-aa31-d216cdf56353)

SynapseWidget(Synapse.DataFrame, f925e5b7-8549-4d5d-a17c-66d85e3f2b38)

SynapseWidget(Synapse.DataFrame, 919a275c-850a-49c4-a5c3-b5f9fcd8f618)

SynapseWidget(Synapse.DataFrame, 681aa14c-90cb-4b93-b3c6-190a2995de21)

SynapseWidget(Synapse.DataFrame, 00747014-f14b-47b4-a559-0a6b74ed87d1)

SynapseWidget(Synapse.DataFrame, 218d8ce6-8c32-40ed-8891-610b1583ebcd)

SynapseWidget(Synapse.DataFrame, d495652b-91aa-49cd-a0ce-ac463deb7e46)

SynapseWidget(Synapse.DataFrame, 5249b3e1-12aa-435f-ba85-05dd1db22a7c)

SynapseWidget(Synapse.DataFrame, 6e02d11c-1309-46f0-8c16-ad240795ddf1)

SynapseWidget(Synapse.DataFrame, c1d996d4-5704-4b5a-9021-300aba1c32b2)

#### **<u>V-Order</u>**

In [None]:
# # List tables in the catalog and filter them in one step
# filtered_tables = [table.name for table in spark.catalog.listTables() if table.name.startswith(("fact", "dim"))]

# # Iterate over the filtered tables and alter their properties
# for table_name in filtered_tables:
#     spark.sql(f'''ALTER TABLE {table_name} SET TBLPROPERTIES("delta.parquet.vorder.enabled" = "true");''')

StatementMeta(, , , Cancelled, )

#### **<u>VACUUM</u>**

In [None]:
# # List tables in the catalog and filter them in one step
# filtered_tables = [table.name for table in spark.catalog.listTables() if table.name.startswith(("fact", "dim"))]

# # Iterate over the filtered tables and alter their properties
# for table_name in filtered_tables:
#     spark.sql(f'''VACUUM {table_name};''')

StatementMeta(, , , Cancelled, )

#### **<u>OPTIMIZE</u>**

In [None]:
# # List tables in the catalog and filter them in one step
# filtered_tables = [table.name for table in spark.catalog.listTables() if table.name.startswith(("fact", "dim"))]

# # Iterate over the filtered tables and alter their properties
# for table_name in filtered_tables:
#     spark.sql(f'''OPTIMIZE {table_name};''')

StatementMeta(, , , Cancelled, )