In [0]:
CREATE OR REPLACE TEMPORARY FUNCTION clean_date(d DATE)
RETURNS DATE
RETURN CASE
  WHEN d IS NULL THEN NULL
  WHEN d < DATE '1901-01-01' THEN NULL
  WHEN d IN (
    DATE '1900-01-01', DATE '1901-01-01',
    DATE '1969-12-31', DATE '1970-01-01',
    DATE '2099-12-31', DATE '2999-12-31', DATE '9999-12-31'
  ) THEN NULL
  WHEN d > current_date() + INTERVAL 3 YEARS THEN NULL
  ELSE d
END;

--CREATE OR REPLACE TABLE teamconnect.pca_prism.10_tbl_subinf_cur AS
INSERT OVERWRITE TABLE teamconnect.pca_prism.10_tbl_subinf_cur
WITH subinf_raw AS (
  SELECT
    REGEXP_REPLACE(CAST(subexc AS STRING),'[^0-9]','') AS exc_digits,
    REGEXP_REPLACE(CAST(subln_ AS STRING),'[^0-9]','') AS line_digits,
    UPPER(TRIM(subeva)) AS dpi_environment,
    COALESCE(CONCAT(TRIM(subfrn),' ',TRIM(sublsn)), 'Unknown') AS customer_name,
    COALESCE(
      CONCAT_WS(', ',
        NULLIF(TRIM(subad1), ''),
        NULLIF(TRIM(subad2), ''),
        NULLIF(TRIM(subad3), ''),
        NULLIF(TRIM(subcty), ''),
        NULLIF(TRIM(subsab), ''),
        NULLIF(LEFT(subzcd, 5), '')
      ),
      'Unknown'
    ) AS service_address,
    clean_date(COALESCE(
      TRY_TO_DATE(CAST(subcdt AS STRING),'yyyy-MM-dd'),
      TRY_TO_DATE(CAST(subcdt AS STRING),'yyyyMMdd'),
      TRY_TO_DATE(CAST(subcdt AS STRING),'MM/dd/yyyy')
    )) AS connect_date,
    clean_date(COALESCE(
      TRY_TO_DATE(CAST(subddt AS STRING),'yyyy-MM-dd'),
      TRY_TO_DATE(CAST(subddt AS STRING),'yyyyMMdd'),
      TRY_TO_DATE(CAST(subddt AS STRING),'MM/dd/yyyy')
    )) AS disconnect_date
  FROM it_bronze_prod.dpi.subinf
  WHERE subeva IS NOT NULL
),
subinf_ranked AS (
  SELECT
    CASE 
      WHEN LENGTH(exc_digits)=6 AND LENGTH(line_digits) BETWEEN 1 AND 4
        THEN CONCAT(exc_digits, LPAD(line_digits,4,'0'))
      ELSE NULL
    END AS btn,
    dpi_environment,
    customer_name,
    service_address,
    connect_date,
    disconnect_date,
    ROW_NUMBER() OVER (
      PARTITION BY
        CASE 
          WHEN LENGTH(exc_digits)=6 AND LENGTH(line_digits) BETWEEN 1 AND 4
            THEN CONCAT(exc_digits, LPAD(line_digits,4,'0'))
          ELSE NULL
        END,
        dpi_environment
      ORDER BY GREATEST(
        COALESCE(disconnect_date, DATE '1900-01-01'),
        COALESCE(connect_date,   DATE '1900-01-01')
      ) DESC
    ) AS rn
  FROM subinf_raw
)
SELECT
  btn,
  dpi_environment,
  customer_name,
  service_address,
  connect_date,
  disconnect_date,
  CASE
    WHEN disconnect_date IS NULL AND connect_date IS NOT NULL
      THEN DATEDIFF(current_date(), connect_date)
    WHEN connect_date IS NOT NULL AND disconnect_date IS NOT NULL
      THEN DATEDIFF(disconnect_date, connect_date)
    ELSE NULL
  END AS days_in_service
FROM subinf_ranked
WHERE btn IS NOT NULL
  AND btn RLIKE '^[0-9]{10}$'
  AND rn = 1;

OPTIMIZE teamconnect.pca_prism.10_tbl_subinf_cur
ZORDER BY (btn, dpi_environment);



