In [None]:
%sql

WITH Training_Data AS (
    -- STEP 1: Evaluate each employee to see if they completed the training on time
    SELECT 
        DeptID,
        
        -- Logic: 1 = On Time, 0 = Not On Time / Incomplete
        CASE 
            -- ERA Note: "If due date is blank, mark as non-completed"
            WHEN DueDate IS NULL OR TRIM(DueDate) = '' THEN 0
            
            -- If CompletionDate is blank, it's naturally non-completed
            WHEN CompletionDateUTC IS NULL OR TRIM(CompletionDateUTC) = '' THEN 0
            
            -- ERA Note: "Completion date before Due Date"
            -- (Casting strings to DATE so Databricks compares them chronologically)
            WHEN CAST(CompletionDateUTC AS DATE) <= CAST(DueDate AS DATE) THEN 1
            
            -- Missed the deadline
            ELSE 0 
        END AS Completed_On_Time
        
    FROM hive_metastore.ra_adido_2025.td_code_of_conduct_and_ethics_11012024_10312025
)

-- STEP 2: Roll up the data by Assessable Unit and calculate the percentage
SELECT 
    m.`Assessable Unit ID and Name` AS `Assessable Unit`,
    
    -- Denominator: Total number of employees in the unit
    COUNT(t.DeptID) AS `Total Employees`,
    
    -- Numerator: Total number of employees who completed on time
    SUM(t.Completed_On_Time) AS `Total Completed On Time`,
    
    -- Percentage: (Numerator / Denominator) * 100
    -- NULLIF prevents a "Divide by Zero" error if an AU has 0 employees
    ROUND(
        (SUM(t.Completed_On_Time) * 100.0) / NULLIF(COUNT(t.DeptID), 0), 
        2
    ) AS `Timely Completion Percentage`

FROM cost_center m

-- Join the Training DeptID to our Mapping Cost Center
LEFT JOIN Training_Data t
    -- Reusing our Smart Join to handle the 3-digit vs 4-digit ID issue safely
    ON CASE 
        WHEN LENGTH(TRIM(CAST(m.`Cost Center` AS STRING))) = 3 
        THEN CONCAT('0', TRIM(CAST(m.`Cost Center` AS STRING))) 
        ELSE TRIM(CAST(m.`Cost Center` AS STRING)) 
       END = TRIM(t.DeptID)

-- Remove purely blank mapping rows
WHERE m.`Assessable Unit ID and Name` IS NOT NULL

-- Crush down to one row per Assessable Unit
GROUP BY 
    m.`Assessable Unit ID and Name`