### Enrich `concepts` predicted with common metadata

In [0]:
WITH common AS (
  SELECT concept_id, wikidata_id, display_name, level 
  FROM openalex.common.concepts
),
exploded AS (
  SELECT concept_key,
    explode(concepts) as concept
  FROM openalex.works.openalex_works_concepts_predicted
  WHERE size(concepts) > 0 AND (concepts_enriched IS NULL OR keywords is NULL)
),
joined AS (
  SELECT DISTINCT --this should be ok for non-unique original records in the predictions table
    x.concept_key,
    x.concept.id as concept_id,
    x.concept.score as score,
    c.wikidata_id,
    c.display_name,
    c.level
  FROM exploded x
  JOIN common c ON x.concept.id = c.concept_id
),
enriched AS (
  SELECT
    concept_key,
    array_sort(
      collect_set(
        NAMED_STRUCT(
          'id', concept_id,
          'wikidata', wikidata_id,
          'display_name', display_name,
          'level', level,
          'score', CAST(score AS FLOAT)
        )
      ),
      (left, right) -> CASE
        WHEN left.score > right.score THEN -1
        WHEN left.score < right.score THEN 1
        ELSE 0
      END      
    ) AS concepts_enriched,
    array_sort(
      array_distinct(
        array_compact(
          collect_list(
            IF(level > 1, STRUCT(
                concat('https://openalex.org/keywords/',   
                  regexp_replace(
                    regexp_replace(
                      regexp_replace(replace(lower(display_name), '\'', ''), '\\s*\\([^)]*\\)', ''),  -- remove " ( ... )"
                      '[^^\\p{L}\\p{N}\./â€“\*#]+', '-' -- non-alnum -> "-"
                    ),
                    '(^-+|-+$)', '' -- trim leading/trailing "-"
                  )
                ) as id,
                display_name,
                score
              ),
              NULL
            )
          )
        )
      ),
      (left, right) -> CASE
        WHEN left.score > right.score THEN -1
        WHEN left.score < right.score THEN 1
        ELSE 0
      END      
    ) AS keywords
  FROM joined
  GROUP BY concept_key
)
MERGE INTO openalex.works.openalex_works_concepts_predicted AS target
USING enriched AS source
ON target.concept_key = source.concept_key
WHEN MATCHED THEN UPDATE SET 
  target.concepts_enriched = source.concepts_enriched,
  target.keywords = source.keywords;

### Update the `openalex_works` - no time like the present
Commented out due concurrent table update exceptions - don't need the risk

In [0]:
-- %sql
-- MERGE INTO openalex.works.openalex_works AS target
-- USING (
--   SELECT concept_key,
--          FIRST(concepts_enriched) AS concepts
--   FROM openalex.works.openalex_works_concepts_predicted
--   WHERE size(concepts_enriched) > 0
--   GROUP BY concept_key
-- ) as source
-- ON target.concepts IS NULL AND
--    xxhash64(
--      -- sanitize later
--      concat_ws('|',
--        target.title,
--        target.abstract,
--        target.primary_location.source.display_name,
--        target.primary_location.source.type
--      )
--    ) = source.concept_key
-- WHEN MATCHED THEN
--   UPDATE SET target.concepts = source.concepts;  