## **Configure Native Execution Engine**

In [None]:
%%configure
{
    "conf": {
        "spark.gluten.enabled": "true",
        "spark.shuffle.manager": "org.apache.spark.shuffle.sort.ColumnarShuffleManager"
    }
}

## **Optimize Delta Tables**

In [36]:
# Get the filtered subdirectory names for "Tables"
filtered_tables = [table.name for table in spark.catalog.listTables()]
# partial_match = ['fact', 'dim'] # Partial match strings
# filtered_tables = [table for table in filtered_tables if any(keyword in table for keyword in partial_match)] # Filter for Conatains
# filtered_tables = [table for table in filtered_tables if any(table.startswith(keyword) for keyword in partial_match)] # Filter for Starts With

# Iterate over the filtered tables and alter their properties
for table_name in filtered_tables:
    try:
        # Attempt to run the VACUUM command
        spark.sql(f'''VACUUM {table_name};''')
        print(f"[I] VACUUM completed for table {table_name}")
    except Exception as e:
        print(f"[E] Error during VACUUM for table {table_name}: {e}")

    try:
        # Attempt to run the OPTIMIZE command (Applies V-Order)
        spark.sql(f'''OPTIMIZE {table_name};''')
        print(f"[I] OPTIMIZE completed for table {table_name}")
    except Exception as e:
        print(f"[E] Error during OPTIMIZE for table {table_name}: {e}")

# Apply V-Order (No Necessy if OPTIMIZE is Used)
    # try:
    #     # Enable column mapping mode 'name' for the table
    #     spark.sql(f'''
    #         ALTER TABLE {table_name} SET TBLPROPERTIES (
    #             'delta.columnMapping.mode' = 'name',
    #             'delta.minReaderVersion' = '3',
    #             'delta.minWriterVersion' = '7'
    #         );
    #     ''')
    #     print(f"[I] ALTER TABLE: Column mapping mode set for table {table_name}")
    # except Exception as e:
    #     print(f"[E] Error setting column mapping mode for table {table_name}: {e}")

    # try:
    #     # Set V-Order for Delta Tables
    #     spark.sql(f'''ALTER TABLE {table_name} SET TBLPROPERTIES("delta.parquet.vorder.enabled" = "true");''')
    #     print(f"[I] ALTER TABLE: Properties set for table {table_name}")
    # except Exception as e:
    #     print(f"[E] Error setting table properties for table {table_name}: {e}")


StatementMeta(, 5ef500ea-b029-4f0f-90a4-32be2526b078, 50, Finished, Available, Finished)

[I] VACUUM completed for table venture_funding_deals_delta
[I] OPTIMIZE completed for table venture_funding_deals_delta
[I] VACUUM completed for table local_venture_funding_deals_delta
[I] OPTIMIZE completed for table local_venture_funding_deals_delta
[I] VACUUM completed for table venture_funding_deals_delta_partitioned
[I] OPTIMIZE completed for table venture_funding_deals_delta_partitioned


In [34]:
# List tables in the catalog and filter them in one step
filtered_tables = [table.name for table in spark.catalog.listTables()]

# Iterate over the filtered tables and alter their properties
for table_name in filtered_tables:
    display(spark.sql(f'''DESCRIBE HISTORY {table_name};'''))

StatementMeta(, 5ef500ea-b029-4f0f-90a4-32be2526b078, 48, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, 8a3033cf-d541-43e6-8418-2e4f7976d683)

SynapseWidget(Synapse.DataFrame, cd865a34-d326-4b8d-bcd4-f278e86b5400)

SynapseWidget(Synapse.DataFrame, 0a9663e4-5e84-4ad1-a717-8f558260b2f9)

#### **<u>V-Order</u>**

In [None]:
# # List tables in the catalog and filter them in one step
# filtered_tables = [table.name for table in spark.catalog.listTables() if table.name.startswith(("fact", "dim"))]

# # Iterate over the filtered tables and alter their properties
# for table_name in filtered_tables:
#     spark.sql(f'''ALTER TABLE {table_name} SET TBLPROPERTIES("delta.parquet.vorder.enabled" = "true");''')

StatementMeta(, , , Cancelled, )

#### **<u>VACUUM</u>**

In [None]:
# # List tables in the catalog and filter them in one step
# filtered_tables = [table.name for table in spark.catalog.listTables() if table.name.startswith(("fact", "dim"))]

# # Iterate over the filtered tables and alter their properties
# for table_name in filtered_tables:
#     spark.sql(f'''VACUUM {table_name};''')

StatementMeta(, , , Cancelled, )

#### **<u>OPTIMIZE</u>**

In [None]:
# # List tables in the catalog and filter them in one step
# filtered_tables = [table.name for table in spark.catalog.listTables() if table.name.startswith(("fact", "dim"))]

# # Iterate over the filtered tables and alter their properties
# for table_name in filtered_tables:
#     spark.sql(f'''OPTIMIZE {table_name};''')

StatementMeta(, , , Cancelled, )