### Creates `openalex.works.repo_super_authorships` in Walden End to End workflow

In [0]:
CREATE OR REPLACE TABLE identifier('openalex' || :env_suffix || '.works.repo_super_authorships') AS
WITH pmh_base AS (
  SELECT
    CASE
      WHEN provenance IN ('repo', 'repo_backfill') THEN native_id
      ELSE get(filter(ids, x -> x.namespace = "pmh").id, 0)
    END AS pmh_id,
    authors,
    priority,
    affiliations_exist,
    is_corresponding_exists,
    ROW_NUMBER() OVER (
      PARTITION BY
        CASE
          WHEN provenance IN ('repo', 'repo_backfill') THEN native_id
          ELSE get(filter(ids, x -> x.namespace = "pmh").id, 0)
        END
      ORDER BY priority ASC
    ) AS r
  FROM identifier('openalex' || :env_suffix || '.works.locations_parsed')
    LEFT JOIN openalex.system.priority_table USING (provenance)
  WHERE authors_exist
    AND provenance IN ('repo', 'repo_backfill', 'pdf', 'landing_page')
),

pmh_best_authors_exploded AS (
  SELECT
    pmh_id,
    posexplode(authors) AS (original_author_order, best_author_list_exploded),
    best_author_list_exploded.author_key AS author_key
  FROM pmh_base
  WHERE r = 1
),

pmh_affiliations_base AS (
  SELECT
    pmh_id,
    explode(authors) AS authors_exploded,
    authors_exploded.affiliations AS affiliations,
    authors_exploded.author_key AS author_key,
    priority
  FROM pmh_base
  WHERE affiliations_exist
),

pmh_affiliations_staging AS (
  SELECT
    *,
    explode(affiliations) AS exploded_affiliations,
    RANK() OVER (PARTITION BY pmh_id, author_key ORDER BY priority ASC) AS r
  FROM pmh_affiliations_base
  WHERE get(affiliations.name, 0) IS NOT NULL
),

pmh_affiliations AS (
  SELECT
    pmh_id,
    author_key,
    collect_list(exploded_affiliations) AS affiliations
  FROM pmh_affiliations_staging
  WHERE r = 1
  GROUP BY pmh_id, author_key
),

pmh_is_corresponding_base AS (
  SELECT
    pmh_id,
    authors,
    explode(filter(authors, x -> x.is_corresponding IS NOT NULL)) AS corresponding_author,
    corresponding_author.author_key AS author_key,
    corresponding_author.is_corresponding AS is_corresponding_landing_page
  FROM pmh_base
  WHERE is_corresponding_exists
),

pmh_is_corresponding AS (
  SELECT
    pmh_id,
    author_key,
    is_corresponding_landing_page
  FROM pmh_is_corresponding_base
),

pmh_authors_and_affiliations_base AS (
  SELECT *
  FROM pmh_best_authors_exploded
    LEFT JOIN pmh_affiliations USING (pmh_id, author_key)
    LEFT JOIN pmh_is_corresponding USING (pmh_id, author_key)
),

pmh_authors_and_affiliations_staging AS (
  SELECT
    pmh_id,
    STRUCT(
      original_author_order,
      best_author_list_exploded.given,
      best_author_list_exploded.family,
      best_author_list_exploded.name,
      best_author_list_exploded.orcid,
      affiliations,
      is_corresponding_landing_page AS is_corresponding,
      best_author_list_exploded.author_key
    ) AS authorships
  FROM pmh_authors_and_affiliations_base
)

SELECT
  pmh_id,
  TRANSFORM(
    ARRAY_SORT(
      COLLECT_SET(authorships),
      (left, right) -> CASE
        WHEN left.original_author_order < right.original_author_order THEN -1
        WHEN left.original_author_order > right.original_author_order THEN 1
        ELSE 0
      END
    ),
    x -> STRUCT(
      TRIM(x.given) AS given,
      TRIM(x.family) AS family,
      TRIM(x.name) AS name,
      x.orcid,
      TRANSFORM(
        x.affiliations,
        y -> STRUCT(TRIM(y.name) AS name, y.department, y.ror_id)
      ) AS affiliations,
      x.is_corresponding,
      x.author_key
    )
  ) AS authorships
FROM pmh_authors_and_affiliations_staging
GROUP BY pmh_id;


In [0]:
-- select * from repo_super_authorships 
select * from identifier('openalex' || :env_suffix || '.works.repo_super_authorships')