### Load new affilition strings into `affiliation_strings_lookup` for ML job

In [0]:
DECLARE OR REPLACE VARIABLE max_processed_date TIMESTAMP DEFAULT to_timestamp('1900-01-01');
SET VARIABLE max_processed_date = COALESCE(
    (SELECT MAX(updated_datetime) FROM identifier('openalex' || :env_suffix || '.works.authors_and_affiliations')), 
    to_timestamp('1900-01-01')
);

SELECT max_processed_date;

In [0]:
-- Merge new raw affiliation strings into lookup table
MERGE INTO openalex.institutions.affiliation_strings_lookup AS target
USING (
    WITH new_affiliation_strings AS (
        SELECT DISTINCT
            affiliation_string AS raw_affiliation_string
        FROM identifier('openalex' || :env_suffix || '.works.openalex_works_base')
        LATERAL VIEW EXPLODE(authorships) AS authorship
        LATERAL VIEW EXPLODE(authorship.raw_affiliation_strings) AS affiliation_string
        WHERE affiliation_string IS NOT NULL 
          AND affiliation_string != ""
          AND updated_date > max_processed_date
    )
    SELECT 
        nas.raw_affiliation_string,
        CAST(NULL AS ARRAY<BIGINT>) AS institution_ids,
        CAST(NULL AS ARRAY<BIGINT>) AS institution_ids_override,
        CAST(NULL AS ARRAY<STRING>) AS countries,
        CURRENT_TIMESTAMP() AS created_datetime
    FROM new_affiliation_strings nas
    LEFT ANTI JOIN openalex.institutions.affiliation_strings_lookup existing
        ON nas.raw_affiliation_string = existing.raw_affiliation_string
) AS source
ON target.raw_affiliation_string = source.raw_affiliation_string
WHEN NOT MATCHED THEN
    INSERT (raw_affiliation_string, institution_ids, institution_ids_override, countries, created_datetime)
    VALUES (source.raw_affiliation_string, source.institution_ids, source.institution_ids_override, source.countries, source.created_datetime);