### Creates `openalex.works.authors_and_affiliations` in Walden End to End workflow

In [0]:
CREATE OR REPLACE TABLE identifier('openalex' || :env_suffix || '.works.authors_and_affiliations')
CLUSTER BY (work_id) AS (

WITH base AS (
    SELECT
        native_id,
        work_id,
        authors,
        priority,
        get(authors.affiliations.name, 0) IS NOT NULL AS affiliations_exist,
        EXISTS(authors.is_corresponding, x -> x = TRUE) AS is_corresponding_exists,
        ROW_NUMBER() OVER (
            PARTITION BY work_id
            ORDER BY priority ASC, hash(to_json(authors)) ASC
        ) AS r
    FROM identifier('openalex' || :env_suffix || '.works.locations_mapped')
    LEFT JOIN openalex.system.priority_table USING (provenance)
    WHERE authors_exist
),

best_authors_exploded AS (
    SELECT
        work_id,
        array_size(authors) AS best_author_list_len,
        posexplode(authors) AS (original_author_order, best_author_list_exploded),
        best_author_list_exploded.author_key AS author_key
    FROM base
    WHERE r = 1
),

affiliations_base AS (
    SELECT
        work_id,
        explode(authors) AS authors_exploded,
        authors_exploded.affiliations,
        authors_exploded.author_key AS author_key,
        priority
    FROM base
    WHERE affiliations_exist
),

affiliations_staging AS (
    SELECT
        *,
        RANK() OVER (
            PARTITION BY work_id, author_key
            ORDER BY priority ASC, author_key ASC
        ) AS r
    FROM affiliations_base
),

affiliations AS (
    SELECT
        work_id,
        author_key,
        affiliations
    FROM affiliations_staging
    WHERE r = 1
),

is_corresponding_base AS (
    SELECT
        work_id,
        authors,
        explode(filter(authors, x -> x.is_corresponding = TRUE)) AS corresponding_author,
        corresponding_author.author_key AS author_key,
        corresponding_author.is_corresponding AS is_corresponding_landing_page
    FROM base
    WHERE is_corresponding_exists
),

is_corresponding AS (
    SELECT
        work_id,
        author_key,
        is_corresponding_landing_page
    FROM is_corresponding_base
),

work_has_corresponding_author AS (
    SELECT
        work_id,
        EXISTS(
            collect_list(is_corresponding_landing_page),
            x -> x == TRUE
        ) AS work_has_corresponding_author
    FROM is_corresponding
    GROUP BY work_id
),

authors_and_affiliations_base AS (
    SELECT
        *
    FROM best_authors_exploded
    LEFT JOIN affiliations USING (work_id, author_key)
    LEFT JOIN is_corresponding USING (work_id, author_key)
    LEFT JOIN work_has_corresponding_author USING (work_id)
),

authors_and_affiliations_staging AS (
    SELECT
        work_id,
        original_author_order,
        STRUCT(
            CASE
                WHEN original_author_order == 0 THEN "first"
                WHEN original_author_order + 1 == best_author_list_len THEN "last"
                ELSE "additional"
            END AS author_position,
            TRIM(best_author_list_exploded.name) AS raw_author_name,
            CASE
                WHEN is_corresponding_landing_page THEN TRUE
                WHEN work_has_corresponding_author THEN FALSE
                WHEN original_author_order == 0 THEN TRUE
                ELSE FALSE
            END AS is_corresponding,
            affiliations.name AS raw_affiliation_strings,
            original_author_order
        ) AS authorships
    FROM authors_and_affiliations_base
)

SELECT
    work_id,
    TRANSFORM(
        ARRAY_SORT(
            COLLECT_SET(authorships),
            (left, right) -> CASE
                WHEN left.original_author_order < right.original_author_order THEN -1
                WHEN left.original_author_order > right.original_author_order THEN 1
                ELSE 0
            END
        ),
        x -> STRUCT(
            x.author_position,
            TRIM(REPLACE(x.raw_author_name, "\n", "")) AS raw_author_name,
            x.is_corresponding,
            TRANSFORM(x.raw_affiliation_strings, y -> TRIM(REPLACE(y, "\n", ""))) AS raw_affiliation_strings
        )
    ) AS authorships
FROM authors_and_affiliations_staging
GROUP BY work_id

);

In [0]:
select * from openalex.works.authors_and_affiliations;