### Creates `openalex.works.crossref_super_authorships` in Walden End to End workflow

In [0]:
CREATE OR REPLACE TABLE identifier('openalex' || :env_suffix || '.works.crossref_super_authorships') AS
WITH crossref_base AS (
  SELECT
    CASE
      WHEN provenance = 'crossref' THEN native_id
      ELSE get(filter(ids, x -> x.namespace = "doi").id, 0)
    END AS doi,
    authors,
    priority,
    affiliations_exist,
    is_corresponding_exists,
    ROW_NUMBER() OVER (
      PARTITION BY
        CASE
          WHEN provenance = 'crossref' THEN native_id
          ELSE get(filter(ids, x -> x.namespace = "doi").id, 0)
        END
      ORDER BY priority ASC
    ) AS r
  FROM identifier('openalex' || :env_suffix || '.works.locations_parsed')
    LEFT JOIN openalex.system.priority_table USING (provenance)
  WHERE authors_exist
    AND provenance IN ('crossref', 'pdf', 'landing_page')
),

crossref_best_authors_exploded AS (
  SELECT
    doi,
    posexplode(authors) AS (original_author_order, best_author_list_exploded),
    best_author_list_exploded.author_key AS author_key
  FROM crossref_base
  WHERE r = 1
),

crossref_affiliations_base AS (
  SELECT
    doi,
    explode(authors) AS authors_exploded,
    authors_exploded.affiliations AS affiliations,
    authors_exploded.author_key AS author_key,
    priority
  FROM crossref_base
  WHERE affiliations_exist
),

crossref_affiliations_staging AS (
  SELECT
    *,
    explode(affiliations) AS exploded_affiliations,
    RANK() OVER (PARTITION BY doi, author_key ORDER BY priority ASC) AS r
  FROM crossref_affiliations_base
  WHERE get(affiliations.name, 0) IS NOT NULL
),

crossref_affiliations AS (
  SELECT
    doi,
    author_key,
    collect_list(exploded_affiliations) AS affiliations
  FROM crossref_affiliations_staging
  WHERE r = 1
  GROUP BY doi, author_key
),

crossref_is_corresponding_base AS (
  SELECT
    doi,
    authors,
    explode(filter(authors, x -> x.is_corresponding IS NOT NULL)) AS corresponding_author,
    corresponding_author.author_key AS author_key,
    corresponding_author.is_corresponding AS is_corresponding_landing_page
  FROM crossref_base
  WHERE is_corresponding_exists
),

crossref_is_corresponding AS (
  SELECT
    doi,
    author_key,
    is_corresponding_landing_page
  FROM crossref_is_corresponding_base
),

crossref_authors_and_affiliations_base AS (
  SELECT *
  FROM crossref_best_authors_exploded
    LEFT JOIN crossref_affiliations USING (doi, author_key)
    LEFT JOIN crossref_is_corresponding USING (doi, author_key)
),

crossref_authors_and_affiliations_staging AS (
  SELECT
    doi,
    STRUCT(
      original_author_order,
      best_author_list_exploded.given,
      best_author_list_exploded.family,
      best_author_list_exploded.name,
      best_author_list_exploded.orcid,
      affiliations,
      is_corresponding_landing_page AS is_corresponding,
      best_author_list_exploded.author_key
    ) AS authorships
  FROM crossref_authors_and_affiliations_base
)

SELECT
  doi,
  TRANSFORM(
    ARRAY_SORT(
      COLLECT_SET(authorships),
      (left, right) -> CASE
        WHEN left.original_author_order < right.original_author_order THEN -1
        WHEN left.original_author_order > right.original_author_order THEN 1
        ELSE 0
      END
    ),
    x -> STRUCT(
      TRIM(x.given) AS given,
      TRIM(x.family) AS family,
      TRIM(x.name) AS name,
      x.orcid,
      TRANSFORM(
        x.affiliations,
        y -> STRUCT(TRIM(y.name) AS name, y.department AS department, y.ror_id AS ror_id)
      ) AS affiliations,
      x.is_corresponding,
      x.author_key
    )
  ) AS authorships
FROM crossref_authors_and_affiliations_staging
GROUP BY doi;


In [0]:
select * from openalex_dev.works.crossref_super_authorships