In [None]:
/* ===================================================================================
   METRIC: EMP01 - Canadian Officer / PEP Presence (Unpivoted with Col E Logic)
=================================================================================== */

WITH Raw_Strings AS (
    -- STEP 1: Build the Master String based on the Column E "Yes" rule
    SELECT 
        `CostCenter`,
        CASE 
            -- If Col E is 'Yes', ignore it. Just combine [C - B - D] and F.
            WHEN TRIM(`AdditionalAssessableUnitIDandNameandSegment`) = 'Yes' 
            THEN CONCAT_WS(' ', 
                    CONCAT_WS(' - ', `AssessableUnitID`, `AssessableUnitName`, `Segment`),
                    `AdditionalAUID`
                 )
                 
            -- If Col E is NOT 'Yes', it contains actual AUs. Combine [C - B - D], E, and F.
            ELSE CONCAT_WS(' ', 
                    CONCAT_WS(' - ', `AssessableUnitID`, `AssessableUnitName`, `Segment`),
                    `AdditionalAssessableUnitIDandNameandSegment`,
                    `AdditionalAUID`
                 )
        END AS Raw_Text_To_Parse
        
    FROM fy25_cost_center_mapping
),

Parsed_Mapping AS (
    -- STEP 2: Let the regex find every [6-digit ID - Name - Segment] block
    SELECT 
        `CostCenter`,
        regexp_extract_all(
            Raw_Text_To_Parse, 
            '(\\d{6}.*?(?=\\d{6}|$))'
        ) AS AU_Array
    FROM Raw_Strings
),

Expanded_Mapping AS (
    -- STEP 3: Unpivot the array into separate rows
    SELECT 
        `CostCenter`,
        TRIM(EXPLODE(AU_Array)) AS `Assessable_Unit_Full_String`
    FROM Parsed_Mapping
    WHERE SIZE(AU_Array) > 0
),

Canadian_PEP AS (
    -- STEP 4: Clean the PEP Cost Center ID
    SELECT 
        SUBSTRING_INDEX(TRIM(Costcenter), ' ', 1) AS Extracted_CC
    FROM hive_metastore.ra_adido_2025.employee_pep_list_as_of_oct312025
    WHERE TRIM(Region) = 'Canada'
)

-- STEP 5: Join and Roll-up
SELECT 
    m.`Assessable_Unit_Full_String` AS `Assessable Unit`,
    
    CASE 
        WHEN COUNT(p.Extracted_CC) > 0 THEN 'Yes' 
        ELSE 'No' 
    END AS `Has Canadian Officer?`

FROM Expanded_Mapping m

LEFT JOIN Canadian_PEP p
    ON CASE 
        WHEN LENGTH(TRIM(CAST(m.`CostCenter` AS STRING))) = 3 
        THEN CONCAT('0', TRIM(CAST(m.`CostCenter` AS STRING))) 
        ELSE TRIM(CAST(m.`CostCenter` AS STRING)) 
       END = p.Extracted_CC

WHERE m.`Assessable_Unit_Full_String` != ''

GROUP BY 
    m.`Assessable_Unit_Full_String`