In [0]:
%run ../../../utils/common

In [0]:
dbutils.widgets.text("proc_date", "")
proc_date = dbutils.widgets.get("proc_date")

dbutils.widgets.text("environment", "")
environment = dbutils.widgets.get("environment")

In [0]:
catalog_name = settings[environment]['catalog_name']

In [0]:
from pyspark.sql import SparkSession
import re

# Create Spark session
spark = SparkSession.builder.appName("SnakeCaseView").getOrCreate()

# Define your bronze table and temp view name
bronze_table = "udp_wcm_pro.udp_wcm_bronze_pro.supra_f_productivity"
temp_view = "temp_supra_f_productivity"

# List of column names
column_names = [
    "ClientCode", "Employee", "EmployeeName", "EndTime", "IsEven", "JobType",
    "ObjectCode", "ProcDate", "RefCode", "StartTime", "TotalSKU",
    "TotalSKUProcessed", "TotalUnit", "TotalUnitProcessed", "WarehouseCode",
    "WarehouseSiteId"
]

# Function to convert camel case to snake case
def to_snake_case(name):
    name = name.replace('/', '_')  # Replace any existing slashes with underscores
    s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)  # Add underscore before uppercase letters
    return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()  # Final conversion to snake case

# Build the dynamic SELECT query with renamed columns
dynamic_query = f"CREATE OR REPLACE TEMP VIEW {temp_view} AS SELECT "

for col in column_names:
    snake_col = to_snake_case(col)
    dynamic_query += f"`{col}` AS {snake_col}, "

# Remove trailing comma and add FROM clause with filtering
dynamic_query = dynamic_query.rstrip(', ') + f"""
FROM {bronze_table}
WHERE to_date(ProcDate, 'yyyyMMddHHmmss') = DATE('{proc_date}')
"""

# Execute the query to create the view
spark.sql(dynamic_query)


In [0]:
spark.sql(f"""
CREATE TABLE IF NOT EXISTS {catalog_name}.udp_wcm_silver_realtime.sft_productivity  
 ( hash_id BIGINT,
  client_code STRING,
    employee_id STRING,
    employee_name STRING,
    end_time STRING,
    is_even BOOLEAN,
    job_type STRING,
    object_code STRING,
    proc_date DATE,
    ref_code STRING,
    start_time STRING,
    total_sku BIGINT,
    total_sku_processed BIGINT,
    total_unit BIGINT,
    total_unit_processed DOUBLE,
    warehouse_code STRING,
    warehouse_site_id STRING
)TBLPROPERTIES (
  'DELTA.AUTOOPTIMIZE.OPTIMIZEWRITE' = 'TRUE',
  'DELTA.AUTOOPTIMIZE.AUTOCOMPACT' = 'TRUE'
)
          """)

In [0]:
spark.sql(f"""
CREATE OR REPLACE TEMP VIEW supra_f_productivity AS
SELECT
  farm_fingerprint(CONCAT(
      IFNULL(CAST(client_code AS STRING), ""),
      IFNULL(CAST(warehouse_code AS STRING), ""),
      IFNULL(CAST(warehouse_site_id AS STRING), ""),
      IFNULL(CAST(object_code AS STRING), ""),
      IFNULL(CAST(ref_code AS STRING), ""),
      IFNULL(CAST(job_type AS STRING), ""),
      IFNULL(CAST(is_even AS STRING), ""),
      IFNULL(CAST(employee AS STRING), ""),
      IFNULL(CAST(start_time AS STRING), ""),
      IFNULL(CAST(end_time AS STRING), ""),
      IFNULL(CAST(proc_date AS STRING), "")
  )) AS hash_id,
  client_code,
  warehouse_code,
  warehouse_site_id,
  object_code,
  ref_code,
  job_type,
  is_even,
  employee AS employee_id,
  employee_name,
  TO_TIMESTAMP(start_time, 'yyyy-MM-dd HH:mm:ss') AS start_time,
  TO_TIMESTAMP(end_time, 'yyyy-MM-dd HH:mm:ss') AS end_time,
  total_sku,
  total_sku_processed,
  total_unit,
  total_unit_processed,
  TO_TIMESTAMP(proc_date, 'yyyyMMddHHmmss') AS proc_date
FROM temp_supra_f_productivity;

""")

In [0]:
spark.sql(f"""
MERGE INTO {catalog_name}.udp_wcm_silver_realtime.sft_productivity AS main_tbl
USING supra_f_productivity AS temp_tbl 
ON main_tbl.hash_id = temp_tbl.hash_id
WHEN NOT MATCHED THEN 
  INSERT (hash_id, client_code, warehouse_code, warehouse_site_id, object_code, ref_code, job_type, is_even, employee_id, employee_name, start_time, end_time, total_sku, total_sku_processed, total_unit, total_unit_processed, proc_date)
  VALUES (temp_tbl.hash_id, temp_tbl.client_code, temp_tbl.warehouse_code, temp_tbl.warehouse_site_id, temp_tbl.object_code, temp_tbl.ref_code, temp_tbl.job_type, temp_tbl.is_even, temp_tbl.employee_id, temp_tbl.employee_name, temp_tbl.start_time, temp_tbl.end_time, temp_tbl.total_sku, temp_tbl.total_sku_processed, temp_tbl.total_unit, temp_tbl.total_unit_processed, temp_tbl.proc_date)
""")
