In [None]:
%sql
CREATE OR REPLACE TEMP VIEW vw_flattened_mapping AS

-- STEP 1: Deduplicate the raw mapping file
WITH Deduped_Mapping AS (
    SELECT DISTINCT 
        CostCenterID,                                    
        AssessableUnitName,                              
        AssessableUnitID,                                
        Segment,                                         
        AdditionalAssessableUnitIDandNameandSegment,     
        AdditionalAUID                                   
    FROM hive_metastore.ra_adido_2025.fy25_cost_center_mapping
),

-- STEP 2: Explode semicolons in Column F into separate rows
Exploded_Additional AS (
    SELECT 
        *,
        TRIM(F_Item) AS Raw_F_Item
    FROM Deduped_Mapping
    LATERAL VIEW EXPLODE(SPLIT(AdditionalAUID, ';')) AS F_Item
    WHERE AdditionalAUID IS NOT NULL AND TRIM(AdditionalAUID) != ''
)

-- STEP 3: Flatten and Parse
-- ==============================================================================
-- PART 1: Primary Assessable Unit (Columns B, C, D)
-- ==============================================================================
SELECT 
    TRIM(CAST(CostCenterID AS STRING)) AS Raw_CC,
    TRIM(AssessableUnitID) AS AU_ID,          
    TRIM(AssessableUnitName) AS AU_Name,      
    TRIM(Segment) AS Segment                             
FROM Deduped_Mapping
WHERE AssessableUnitID IS NOT NULL

UNION ALL

-- ==============================================================================
-- PART 2: Additional Assessable Unit (Smart Parsing)
-- ==============================================================================
SELECT 
    TRIM(CAST(CostCenterID AS STRING)) AS Raw_CC,
    
    -- AU ID: Aggressively extract exactly 6 digits from the Col F item
    REGEXP_EXTRACT(Raw_F_Item, '(\\d{6})', 1) AS AU_ID,  
    
    -- AU Name: Inherit from Primary since E/F only contain ID and Segment
    TRIM(AssessableUnitName) AS AU_Name,
    
    -- Smart Segment Parsing (Waterfall Logic)
    CASE 
        -- Priority 1: If Col F item has a hyphen, grab everything after it
        WHEN Raw_F_Item LIKE '%-%' 
        THEN TRIM(SUBSTRING(Raw_F_Item, LOCATE('-', Raw_F_Item) + 1))
        
        -- Priority 2: If Col E is NOT 'Yes' (and has a hyphen), grab everything after it
        WHEN TRIM(AdditionalAssessableUnitIDandNameandSegment) != 'Yes' 
             AND AdditionalAssessableUnitIDandNameandSegment LIKE '%-%'
        THEN TRIM(SUBSTRING(AdditionalAssessableUnitIDandNameandSegment, LOCATE('-', AdditionalAssessableUnitIDandNameandSegment) + 1))
        
        -- Priority 3: Fallback to Primary Segment (Col D)
        ELSE TRIM(Segment) 
    END AS Segment

FROM Exploded_Additional

-- Only keep rows where we successfully found a 6-digit ID
WHERE REGEXP_EXTRACT(Raw_F_Item, '(\\d{6})', 1) != '';