In [0]:
from pathlib import Path
from delta.tables import DeltaTable
from pyspark.sql import DataFrame
import pyspark.sql.functions as F

In [0]:
from pathlib import Path

class DataLakeConfig:
    def __init__(self, layer: str):
        self._notebook_context = dbutils.notebook.entry_point.getDbutils().notebook().getContext()
        self.layer = layer.lower()
        self.catalog_name = "data_intelligence"
        self.schema_name = self._extract_schema_from_path()
        self.table_name = "employees"
        self.workspace_user = self._get_workspace_user()

    def _extract_schema_from_path(self) -> str:
        notebook_path = self._notebook_context.notebookPath().get()
        return Path(notebook_path).name.split("_")[-1].lower()

    def _get_workspace_user(self) -> str:
        return self._notebook_context.userName().get()

    def get_layer_config(self) -> dict:
        return {
            "layer": self.layer,
            "catalog": self.catalog_name,
            "schema": self.schema_name,
            "table": self.table_name,
            "user": self.workspace_user
        }

    def get_upstream_layer(self) -> str:
        """Return the upstream layer that this layer depends on."""
        if self.layer == "bronze":
            return "silver"
        elif self.layer == "silver":
            return "gold"
        else:
            return None  # Gold usually doesn’t depend on another layer
        
config = DataLakeConfig(layer="bronze")
print(config.get_layer_config())
print("Upstream Layer:", config.get_upstream_layer())

In [0]:
CONFIG_CATALOG_NAME:str   = "data_intelligence"
CONFIG_CATALOG_SCHEMA:str = Path(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()).name.split("_")[-1].lower()
CONFIG_CATALOG_TABLE:str  = "employees"
CONFIG_WORKSPACE_USER:str = dbutils.notebook.entry_point.getDbutils().notebook().getContext().userName().get()

In [0]:
if CONFIG_CATALOG_SCHEMA not in ["bronze", "silver", "gold"]:
    raise Exception(f"Invalid schema name: {CONFIG_CATALOG_SCHEMA}")

In [0]:
def debug_info(msg:str, notice:str = "Ok", ljust_len:int = 70) -> None:
    print(f"{msg.ljust(ljust_len, '.')}: {notice}")

def create_delta_table(df:DataFrame, table_name:str = None) -> None:
    table_name:str = '.'.join([
        CONFIG_CATALOG_SCHEMA,
        (CONFIG_CATALOG_TABLE if (table_name is None or len(table_name) == 0) else table_name)
    ])
    
    if spark.catalog.tableExists(table_name):
        DeltaTable.forName(spark, table_name).alias("target").merge(
            df.alias("source"),
            "target.id = source.id"
        ).whenNotMatchedInsertAll().execute()
    else:
        df.write.format("delta").mode("overwrite").saveAsTable(table_name)

In [0]:
debug_info("var (CONFIG_CATALOG_NAME)", CONFIG_CATALOG_NAME)
debug_info("var (CONFIG_CATALOG_SCHEMA)", CONFIG_CATALOG_SCHEMA)
debug_info("var (CONFIG_CATALOG_TABLE)", CONFIG_CATALOG_TABLE)
debug_info("var (CONFIG_WORKSPACE_USER)", CONFIG_WORKSPACE_USER)

spark.sql(f"CREATE CATALOG IF NOT EXISTS {CONFIG_CATALOG_NAME}")
debug_info(f"CREATE CATALOG IF NOT EXISTS {CONFIG_CATALOG_NAME}")

spark.sql(f"USE CATALOG {CONFIG_CATALOG_NAME}")
debug_info(f"USE CATALOG {CONFIG_CATALOG_NAME}")

spark.sql(f"CREATE SCHEMA IF NOT EXISTS {CONFIG_CATALOG_SCHEMA}")
debug_info(f"CREATE SCHEMA IF NOT EXISTS {CONFIG_CATALOG_SCHEMA}")

debug_info("Function -> debug_info")
debug_info("Function -> create_delta_table")

if CONFIG_CATALOG_SCHEMA == "silver":
    if spark.catalog.tableExists(f"bronze.{CONFIG_CATALOG_TABLE}"):
        debug_info("Bronze table exists")
    else:
        raise Exception(f"Bronze table does not exist")

if CONFIG_CATALOG_SCHEMA == "gold":
    if spark.catalog.tableExists(f"silver.{CONFIG_CATALOG_TABLE}"):
        debug_info("Silver table exists")
    else:
        raise Exception(f"Silver table does not exist")