In [0]:
print(proc_date)

In [0]:
class Common_Etl_Load:
    def __init__(self, target_dataset, catalog_name, target_table, metadata_schema, dependency_table):
        self.target_dataset = target_dataset.upper()
        self.catalog_name = catalog_name
        self.target_table = target_table.upper()
        self.metadata_schema = metadata_schema
        self.dependency_table = dependency_table

    def create_vw_table_dependencies(self):
        query = f"""
        CREATE OR REPLACE TEMP VIEW {self.target_table}_DEPENDENCIES AS
        WITH ETL_LOG_DEPENDENCIES AS (
            SELECT
                DEPENDENCY_TABLE,
                MAX(ETL_TO_TS) AS LATEST_ETL_TS
            FROM
                {self.catalog_name}.{metadata_schema}.ETL_LOG_DEPENDENCIES
            WHERE
                concat_ws('.', DATASET_AFFECT, TABLE_AFFECT) = '{self.target_dataset}.{self.target_table}'
                AND proc_date = date('{proc_date}')
            GROUP BY
                DEPENDENCY_TABLE
        )
        SELECT
           max(PROC_DATE) as proc_date,
            "{self.target_dataset}" AS DATASET_AFFECT,
            "{self.target_table}" AS TABLE_AFFECT,
            concat_ws('.', A.DATASET_AFFECT, A.TABLE_AFFECT) AS DEPENDENCY_TABLE,
            CASE
                WHEN A1.DEPENDENCY_TABLE IS NULL THEN MIN(A.PROC_DATE)
                ELSE MIN(IF(A.PROC_DATE > A1.LATEST_ETL_TS, A.PROC_DATE, NULL))
            END AS ETL_FROM_TS,
            CASE
                WHEN A1.DEPENDENCY_TABLE IS NULL THEN MAX(A.PROC_DATE)
                ELSE MAX(IF(A.PROC_DATE > A1.LATEST_ETL_TS, A.PROC_DATE, NULL))
            END AS ETL_TO_TS
        FROM
            {self.catalog_name}.{metadata_schema}.ETL_DELTA_TABLE A
            INNER JOIN ETL_LOG_DEPENDENCIES A1
            ON concat_ws('.', A.DATASET_AFFECT, A.TABLE_AFFECT) = (A1.DEPENDENCY_TABLE)
        WHERE A.PROC_DATE = date_add(DATE('{proc_date}'), 1)
        GROUP BY
            A.DATASET_AFFECT,
            A.TABLE_AFFECT,
            A1.DEPENDENCY_TABLE
        """
        spark.sql(query)
        
        print(f"Debug: {self.target_table}_dependencies")
        print(query)
    
    print("execution started not executed")


    def create_fn_clean_caldayinstr(self):
        spark.sql(
            f"""
        CREATE FUNCTION IF NOT EXISTS {self.catalog_name}.default.fn_CLEAN_CALDAYINSTR(CALDAYINSTR STRING)
            RETURNS STRING
            LANGUAGE SQL
            DETERMINISTIC
            RETURN
                (
                SELECT
                    array_join(
                    array_distinct(
                        filter(
                        transform(split(CALDAYINSTR, ','), x -> trim(x)),
                        x -> x IS NOT NULL
                        AND x != ''
                        AND try_cast(trim(replace(replace(replace(x, '"', ''), "'", ""), " ", "")) AS DATE) BETWEEN
                            date_trunc('year', add_months(current_date(), -12))
                        AND
                            current_date()
                        )
                    ),
                    ','
                    )
                )
        """
        )

    print("function not executed")
    def get_all_static_values(self):
        (
            START_DATE,
            END_DATE,
            START_DATE_D,
            END_DATE_D,
            TABLE_AFFECT_COUNT,
            TABLE_ETL,
            CALDAY_IN_STR,
        ) = spark.sql(
            f"""
        SELECT
            "'" || cast(min(START_DATE) as STRING) || "'" as START_DATE,
            "'" || cast(max(END_DATE) as STRING) || "'" as END_DATE,
            CASE
                WHEN
                MIN(a.START_DATE) < date_trunc('year', add_months(current_date(), -12))
                THEN
                date_trunc('year', add_months(current_date(), -12))
                ELSE MIN(a.START_DATE)
            END as START_DATE_D,
            MAX(a.END_DATE) as END_DATE_D,
            COUNT(DISTINCT a1.DEPENDENCY_TABLE) as TABLE_AFFECT_COUNT,
            array_join(collect_set(a1.DEPENDENCY_TABLE), ',') as TABLE_ETL,
            {self.catalog_name}.default.fn_CLEAN_CALDAYINSTR(array_join((collect_set(a.CALDAY_IN_STR)), ',')) as CALDAY_IN_STR
        FROM
            {self.catalog_name}.{self.metadata_schema}.ETL_DELTA_TABLE a
            JOIN {self.target_table}_DEPENDENCIES a1
            ON concat(a.DATASET_AFFECT, '.', a.TABLE_AFFECT) = a1.DEPENDENCY_TABLE
        WHERE
            a.PROC_DATE BETWEEN a1.ETL_FROM_TS AND a1.ETL_TO_TS
        """
        ).head()

        
        print(f"\nDebug: static Datasets")
        print(
            f"""
        SELECT
            "'" || cast(min(START_DATE) as STRING) || "'" as START_DATE,
            "'" || cast(max(END_DATE) as STRING) || "'" as END_DATE,
            CASE
                WHEN
                MIN(a.START_DATE) < date_trunc('year', add_months(current_date(), -12))
                THEN
                date_trunc('year', add_months(current_date(), -12))
                ELSE MIN(a.START_DATE)
            END as START_DATE_D,
            MAX(a.END_DATE) as END_DATE_D,
            COUNT(DISTINCT a1.DEPENDENCY_TABLE) as TABLE_AFFECT_COUNT,
            array_join(collect_set(a1.DEPENDENCY_TABLE), ',') as TABLE_ETL,
            {self.catalog_name}.default.fn_CLEAN_CALDAYINSTR(array_join((collect_set(trim(a.CALDAY_IN_STR))), ',')) as CALDAY_IN_STR
        FROM
            {self.catalog_name}.{self.metadata_schema}.ETL_DELTA_TABLE a
            JOIN {self.target_table}_DEPENDENCIES a1
            ON concat(a.DATASET_AFFECT, '.', a.TABLE_AFFECT) = a1.DEPENDENCY_TABLE
        WHERE
            a.PROC_DATE BETWEEN a1.ETL_FROM_TS AND a1.ETL_TO_TS
        """
        )
        

        return (
            START_DATE,
            END_DATE,
            START_DATE_D,
            END_DATE_D,
            TABLE_AFFECT_COUNT,
            TABLE_ETL,
            CALDAY_IN_STR,
        )

    def __main__(self):
        self.create_fn_clean_caldayinstr() 
        # <comnmenting at the moment as the function creation multilpe time causing access issues>
        self.create_vw_table_dependencies()
        return self.get_all_static_values()

In [0]:
print(target_dataset)
print(target_table)
print(metadata_schema)
print(dependency_table)

In [0]:
common_etl_load = Common_Etl_Load(
    target_dataset=target_dataset,
    catalog_name=catalog_name,
    target_table=target_table,
    metadata_schema=metadata_schema,
    dependency_table=dependency_table
)

(
    START_DATE,
    END_DATE,
    START_DATE_D,
    END_DATE_D,
    TABLE_AFFECT_COUNT,
    TABLE_ETL,
    CALDAY_IN_STR
) = common_etl_load.__main__()

print(f"START_DATE: {START_DATE}")
print(f"END_DATE: {END_DATE}")
print(f"START_DATE_D: {START_DATE_D}")
print(f"END_DATE_D: {END_DATE_D}")
print(f"TABLE_AFFECT_COUNT: {TABLE_AFFECT_COUNT}")
print(f"TABLE_ETL: {TABLE_ETL}")
print(f"CALDAY_IN_STR: {CALDAY_IN_STR}")