In [None]:
%sql
CREATE OR REPLACE TEMP VIEW vw_flattened_mapping AS

-- STEP 1: Deduplicate the raw mapping file
WITH Deduped_Mapping AS (
    SELECT DISTINCT 
        CostCenterID,                                    
        AssessableUnitName,                              
        AssessableUnitID,                                
        Segment,                                         
        AdditionalAssessableUnitIDandNameandSegment,     
        AdditionalAUID                                   
    FROM hive_metastore.ra_adido_2025.fy25_cost_center_mapping
),

-- STEP 2: The "Force Delimiter" Explode
Exploded_Additional AS (
    SELECT 
        *,
        TRIM(F_Item) AS Raw_F_Item
    FROM Deduped_Mapping
    
    -- THE FIX: 
    -- 1. Change existing semicolons to pipes
    -- 2. Inject a pipe before every 6-digit number
    -- 3. Split by pipe and explode!
    LATERAL VIEW EXPLODE(
        SPLIT(
            REGEXP_REPLACE(REPLACE(AdditionalAUID, ';', '|'), '(\\d{6})', '|$1'), 
            '\\|'
        )
    ) AS F_Item
    
    WHERE AdditionalAUID IS NOT NULL AND TRIM(AdditionalAUID) != ''
)

-- STEP 3: Flatten and Parse
-- ==============================================================================
-- PART 1: Primary Assessable Unit (Columns B, C, D)
-- ==============================================================================
SELECT 
    TRIM(CAST(CostCenterID AS STRING)) AS Raw_CC,
    TRIM(AssessableUnitID) AS AU_ID,          
    TRIM(AssessableUnitName) AS AU_Name,      
    TRIM(Segment) AS Segment                             
FROM Deduped_Mapping
WHERE AssessableUnitID IS NOT NULL

UNION ALL

-- ==============================================================================
-- PART 2: Additional Assessable Unit (Extracting from our new cleanly piped chunks)
-- ==============================================================================
SELECT 
    TRIM(CAST(CostCenterID AS STRING)) AS Raw_CC,
    
    -- Grab the 6 digits from our cleanly separated chunk
    REGEXP_EXTRACT(Raw_F_Item, '(\\d{6})', 1) AS AU_ID,  
    
    -- AU Name logic
    TRIM(AssessableUnitName) AS AU_Name,
    
    -- Segment logic (Waterfall)
    CASE 
        WHEN Raw_F_Item LIKE '%-%' 
        THEN TRIM(SUBSTRING(Raw_F_Item, LOCATE('-', Raw_F_Item) + 1))
        
        WHEN TRIM(AdditionalAssessableUnitIDandNameandSegment) != 'Yes' 
             AND AdditionalAssessableUnitIDandNameandSegment LIKE '%-%'
        THEN TRIM(SUBSTRING(AdditionalAssessableUnitIDandNameandSegment, LOCATE('-', AdditionalAssessableUnitIDandNameandSegment) + 1))
        
        ELSE TRIM(Segment) 
    END AS Segment

FROM Exploded_Additional

-- Drop any empty pipes or garbage text that didn't actually contain an ID
WHERE REGEXP_EXTRACT(Raw_F_Item, '(\\d{6})', 1) != '';