### Creates `openalex.works.crossref_super_authorships` in Walden End to End workflow

In [0]:
create or replace temp view crossref_base as
(
  select
    -- native_id, nice for tracking purposes
    case
      when provenance = 'crossref' then native_id
      else get(filter(ids, x -> x.namespace = "doi").id, 0)
    end as doi,
    authors,
    priority,
    affiliations_exist,
    is_corresponding_exists,
    row_number() OVER (
        PARTITION BY
          case
            when provenance = 'crossref' then native_id
            else get(filter(ids, x -> x.namespace = "doi").id, 0)
          end
        ORDER BY priority ASC
      ) AS r
  from
    openalex.works.locations left join openalex.system.priority_table using (provenance)
  where
    authors_exist
    and provenance in ('crossref', 'pdf', 'landing_page')
);

create or replace temp view crossref_best_authors_exploded as
(
  select
    doi,
    posexplode(authors) as (original_author_order, best_author_list_exploded),
    (best_author_list_exploded.author_key) as author_key
  from
    crossref_base
  where
    r = 1
);

-- Explode all records that have an affiliation.
create or replace temp view crossref_affiliations_base as
(
  select
    doi,
    explode(authors) as authors_exploded,
    authors_exploded.affiliations,
    authors_exploded.author_key as author_key,
    priority
  from
    crossref_base
  where
    affiliations_exist
);

create or replace temp view crossref_affiliations_staging as
(
  select
    *,
    explode(affiliations) as exploded_affiliations,
    rank() OVER (PARTITION BY doi, author_key ORDER BY priority ASC) AS r
  from
    crossref_affiliations_base
  where
    get(affiliations.name, 0) is not null
);

create or replace temp view crossref_affiliations as
(
  select
    doi,
    author_key,
    collect_list(exploded_affiliations) as affiliations
  from
    crossref_affiliations_staging
  where
    r = 1
  group by
    doi,
    author_key -- in case any author_keys have multiple affiliations.
);

-- Explode all records that contain an is_corresponding author. These records are mainly, if not exclusively, from landing_page_works.
create or replace temp view crossref_is_corresponding_base as
(
  select
    doi,
    authors,
    explode(filter(authors, x -> x.is_corresponding is not null)) as corresponding_author,
    corresponding_author.author_key as author_key,
    corresponding_author.is_corresponding as is_corresponding_landing_page
  from
    crossref_base
  where
    is_corresponding_exists
);

-- Table that matches to the work_id AND author_key
create or replace temp view crossref_is_corresponding as
(
  select
    doi,
    author_key,
    is_corresponding_landing_page
  from
    crossref_is_corresponding_base
);

create or replace temp view crossref_authors_and_affiliations_base as
(
  select
    *
  from
    crossref_best_authors_exploded
      left join crossref_affiliations using (doi, author_key)
      left join crossref_is_corresponding using (doi, author_key)
);

create or replace temp view crossref_authors_and_affiliations_staging as
(
  select
    doi,
    struct(
      original_author_order,
      best_author_list_exploded.given,
      best_author_list_exploded.family,
      best_author_list_exploded.name,
      best_author_list_exploded.orcid,
      affiliations,
      is_corresponding_landing_page as is_corresponding,
      best_author_list_exploded.author_key
    ) as authorships
  from
    crossref_authors_and_affiliations_base
);

create or replace table openalex.works.crossref_super_authorships as (
-- create or replace temp view crossref_super_authorships as (
  select
    doi,
    transform(
      array_sort(
        collect_set(authorships),
        (left, right) -> case -- order the authorships based on original_author_order
          when left.original_author_order < right.original_author_order then -1
          when left.original_author_order > right.original_author_order then 1
          else 0
        end
      ),
      x -> struct(
        trim(x.given) as given,
        trim(x.family) as family,
        trim(x.name) as name,
        x.orcid,
        transform(
          x.affiliations, y -> struct(trim(y.name) as name, y.department as department, y.ror_id as ror_id)
        ) as affiliations,
        x.is_corresponding,
        x.author_key
      )
    ) as authorships -- transform is used to remove the original_author_order from the struct field. It was needed to correctly order the authors, but we don't want it in the API. We also remove any author_key's that are null. Here we might also want to remove bad authors, like in doi = 10.32870/rmip.v10i1.453, which has author names like "Universidad de Colima". I assume there is a list somewhere to know which authors to filter out programatically.
  from
    crossref_authors_and_affiliations_staging
  group by
    doi
);
-- select * from crossref_super_authorships

In [0]:
select * from openalex.works.crossref_super_authorships