In [0]:
# Standard library imports
import os
import logging
import sys

# Third-party library imports
from dotenv import load_dotenv

catalog_name = os.getenv('DATABRICKS_CATALOG_NAME')
schema_name = os.getenv('DATABRICKS_SCHEMA_NAME')
audit_table_name = 'kdayno_audit_job_runs'

def audit_logger(job_id, job_name, input_params_at_runtime, job_start_date, job_start_datetime, task_run_id,  task_name, source_table_name, target_table_name):

    audit_cols = ["job_id", "job_name", "input_params_at_runtime", "job_start_date", "job_start_datetime", "task_run_id", "task_name", "source_tables", "target_tables"]
    audit_vals = [(job_id, job_name, str(input_params_at_runtime), job_start_date, job_start_datetime, task_run_id,  task_name, str(source_table_name), str(target_table_name))]

    audit_df = spark.createDataFrame(audit_vals, audit_cols)

    (audit_df.write.format("delta")
                   .mode("append")
                   .partitionBy('job_start_date')
                   .saveAsTable(f'{catalog_name}.{schema_name}.{audit_table_name}'))
    

    # Checks if there are audit logs from more than 7 days old
    # If true, then execute vacuum command to remove old audit logs
    audit_job_runs_df = spark.sql(f"SELECT date_diff(MAX(job_start_date), MIN(job_start_date)) > 7 FROM {catalog_name}.{schema_name}.{audit_table_name}")

    if audit_job_runs_df.collect()[0][0]:
        spark.sql(f"VACUUM {catalog_name}.{schema_name}.{audit_table_name} LITE RETAIN 168 HOURS")



def etl_logger():

    etl_logger = logging.getLogger(__name__)
    etl_logger.setLevel(logging.INFO)

    formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')

    handler = logging.StreamHandler(stream=sys.stdout)
    handler.setLevel(logging.INFO)
    handler.setFormatter(formatter)

    etl_logger.addHandler(handler)

    return etl_logger
