In [0]:
MERGE INTO openalex.works.work_topics_frontfill AS target
USING (
  WITH topics_metadata AS (
    SELECT
      topic_id,
      t.display_name,
      NAMED_STRUCT(
        'id', concat('https://openalex.org/subfields/', s.subfield_id),
        'display_name', s.display_name
      ) AS subfield,
      NAMED_STRUCT(
        'id', concat('https://openalex.org/fields/', f.field_id),
        'display_name', f.display_name
      ) AS field,
      NAMED_STRUCT(
        'id', concat('https://openalex.org/domains/', d.domain_id),
        'display_name', d.display_name
      ) AS domain
    FROM openalex.common.topics t
    JOIN openalex.common.subfields s USING (subfield_id)
    JOIN openalex.common.fields f USING (field_id)
    JOIN openalex.common.domains d USING (domain_id)
  ),

  lm_output_exploded AS (
    SELECT 
      work_id,
      explode(lm_topics) AS result,
      source,
      created_timestamp
    FROM openalex.works.work_topics_lm_output
  )

  SELECT
    work_id,
    slice(array_sort(
      array_agg(
        NAMED_STRUCT(
          'id', concat('https://openalex.org/T', result.topic_id),
          'display_name', tm.display_name,
          'score', result.score,
          'subfield', tm.subfield,
          'field', tm.field,
          'domain', tm.domain
        )
      ),
      (left, right) -> CASE
        WHEN left.score > right.score THEN -1
        WHEN left.score < right.score THEN 1
        ELSE 0
      END
    ), 1, 3) AS topics,
    first(wt.source) AS source,
    max(wt.created_timestamp) AS created_datetime,
    max(wt.created_timestamp) AS updated_datetime
  FROM lm_output_exploded wt
  JOIN topics_metadata tm ON tm.topic_id = result.topic_id
  GROUP BY work_id
) AS source
ON target.work_id = source.work_id

-- Insert only if the work_id does not exist
WHEN NOT MATCHED THEN INSERT (
  work_id,
  topics,
  source,
  created_datetime,
  updated_datetime
) VALUES (
  source.work_id,
  source.topics,
  source.source,
  source.created_datetime,
  source.updated_datetime
);

In [0]:
OPTIMIZE openalex.works.work_topics_frontfill;