In [0]:
%sql
-- Creating new database for multiple layers because Unity Catalog is not supported in CE
CREATE DATABASE IF NOT EXISTS edl_hc_mart.bronze;
CREATE DATABASE IF NOT EXISTS edl_hc_mart.silver;
CREATE DATABASE IF NOT EXISTS edl_hc_mart.gold;



In [0]:
%sql

CREATE TABLE IF NOT EXISTS  edl_hc_mart.silver.employees (
     employee_id INT,
     first_name STRING,
     last_name STRING,
     email STRING,
     dept_code STRING,
     date_of_birth DATE,
     date_of_birth_fmt STRING,
     is_active BOOLEAN,
     effective_date TIMESTAMP,
     end_date TIMESTAMP
   )
   USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS edl_hc_mart.silver.jobs (
  employee_id INT,
  position_id STRING,
  job_title STRING,
  start_date DATE,
  start_date_fmt STRING,
  end_date DATE,
  end_date_fmt STRING,
  status STRING,
  salary_amount INTEGER,
  salary_currency STRING,
  salary_frequency STRING,
  salary_effective_from DATE,
  salary_effective_to DATE,
  is_active BOOLEAN,
  effective_date TIMESTAMP,
  effective_end_date TIMESTAMP
)
USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS edl_hc_mart.silver.departments (
  dept_name STRING,
  dept_code STRING,
  is_active BOOLEAN,
  effective_date TIMESTAMP,
  end_date TIMESTAMP
)
USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS edl_hc_mart.silver.attendance (
  employee_id INT,
  attendance_date DATE,
  present BOOLEAN,
  attendance_date_fmt STRING,
  is_active BOOLEAN,
  effective_date TIMESTAMP,
  end_date TIMESTAMP
)
USING DELTA;

In [0]:
%sql

CREATE TABLE IF NOT EXISTS  edl_hc_mart.gold.dim_employees (
     employee_key INT,
     first_name STRING,
     last_name STRING,
     email STRING,
     dept_code STRING,
     date_of_birth_fmt STRING
     )
   USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS edl_hc_mart.gold.dim_departments (
  dept_name STRING,
  dept_code STRING
)
USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS edl_hc_mart.gold.dim_date (
  date_key DATE,
  date_label STRING
)
USING DELTA;

In [0]:
%sql
CREATE TABLE IF NOT EXISTS edl_hc_mart.gold.fact_job_history (
  employee_key INT,
  dept_key STRING,
  start_date_key DATE,
  end_date_key DATE,
  position_id STRING,
  job_title STRING,
  status STRING  
)


In [0]:
%sql
CREATE SCHEMA IF NOT EXISTS edl_hc_mart.audit;

In [0]:
%sql

-- Catalog/Schema is optional; adjust to your environment
CREATE TABLE IF NOT EXISTS edl_hc_mart.audit.audit_ingestion
(
  pipeline_name                       STRING      NOT NULL,
  source_type                         STRING      NOT NULL,   -- file|jdbc
  source_name                         STRING      NOT NULL,
  bronze_table                        STRING,

  batch_id                            STRING,
  run_id                              STRING,
  trigger_type                        STRING,                  -- schedule|manual|retry
  attempt                             INT                      ,

  run_start_ts                        TIMESTAMP,
  run_end_ts                          TIMESTAMP,
  duration_ms                         BIGINT,

  last_status                         STRING,                  -- Success|Failure|Skipped
  records_read                        BIGINT,
  records_written                     BIGINT,
  error_count                         BIGINT,
  error_message                       STRING,

  -- Watermarking / Incremental
  watermark_col                       STRING,                  -- e.g., updated_at / ingestion_ts
  last_success_watermark_value        STRING,                  -- store as string to cover ts/numeric
  current_run_high_watermark_value    STRING,

  -- File/source state
  file_checkpoint_path                STRING,                  -- autoloader/bookmark path if used
  schema_version_applied              STRING,                  -- v1/v2...

  -- Provenance
  producer_system                     STRING,
  ingestion_user                      STRING,
  notes                               STRING,

  -- Convenience (for filters & audits)
  last_load_date                      DATE,
  created_at                          TIMESTAMP                ,
  updated_at                          TIMESTAMP
)
USING delta
-- Partition by pipeline makes sense for large orgs; adjust as needed
PARTITIONED BY (pipeline_name)
TBLPROPERTIES (
  delta.autoOptimize.optimizeWrite = true,
  delta.autoOptimize.autoCompact = true
);


In [0]:
%sql
-- Catalog/Schema is optional; adjust to your environment
CREATE TABLE IF NOT EXISTS edl_hc_mart.audit.audit_ingestion
(
  pipeline_name                       STRING      NOT NULL,
  source_type                         STRING      NOT NULL,   -- file|jdbc
  source_name                         STRING      NOT NULL,
  bronze_table                        STRING,

  batch_id                            STRING,
  run_id                              STRING,
  trigger_type                        STRING,                  -- schedule|manual|retry
  attempt                             INT,

  run_start_ts                        TIMESTAMP,
  run_end_ts                          TIMESTAMP,
  duration_ms                         BIGINT,

  last_status                         STRING,                  -- Success|Failure|Skipped
  records_read                        BIGINT,
  records_written                     BIGINT,
  error_count                         BIGINT,
  error_message                       STRING,

  -- Watermarking / Incremental
  watermark_col                       STRING,                  -- e.g., updated_at / ingestion_ts
  last_success_watermark_value        STRING,                  -- store as string to cover ts/numeric
  current_run_high_watermark_value    STRING,

  -- File/source state
  file_checkpoint_path                STRING,                  -- autoloader/bookmark path if used
  schema_version_applied              STRING,                  -- v1/v2...

  -- Provenance
  producer_system                     STRING,
  ingestion_user                      STRING,
  notes                               STRING,

  -- Convenience (for filters & audits)
  last_load_date                      DATE,
  created_at                          TIMESTAMP,
  updated_at                          TIMESTAMP
)
USING delta
-- Partition by pipeline makes sense for large orgs; adjust as needed
PARTITIONED BY (pipeline_name)
TBLPROPERTIES (
  delta.autoOptimize.optimizeWrite = true,
  delta.autoOptimize.autoCompact = true
);