In [None]:
-- Snapshot awards_api hashes before rebuild
CREATE OR REPLACE TABLE openalex.awards.awards_api_hash AS
SELECT id, updated_date,
  xxhash64(CONCAT_WS('|',
    CAST(id AS STRING),
    COALESCE(display_name, ''),
    COALESCE(description, ''),
    COALESCE(funder_award_id, ''),
    COALESCE(CAST(amount AS STRING), ''),
    COALESCE(currency, ''),
    COALESCE(TO_JSON(funder), '{}'),
    COALESCE(funding_type, ''),
    COALESCE(funder_scheme, ''),
    COALESCE(provenance, ''),
    COALESCE(CAST(start_date AS STRING), ''),
    COALESCE(CAST(end_date AS STRING), ''),
    COALESCE(CAST(start_year AS STRING), ''),
    COALESCE(CAST(end_year AS STRING), ''),
    COALESCE(TO_JSON(lead_investigator), '{}'),
    COALESCE(TO_JSON(co_lead_investigator), '{}'),
    COALESCE(TO_JSON(investigators), '[]'),
    COALESCE(landing_page_url, ''),
    COALESCE(doi, ''),
    COALESCE(works_api_url, ''),
    COALESCE(TO_JSON(funded_outputs), '[]'),
    COALESCE(CAST(funded_outputs_count AS STRING), '0')
  )) AS content_hash
FROM openalex.awards.awards_api

In [None]:
CREATE OR REPLACE TABLE openalex.awards.awards_api AS
SELECT
  id, display_name, description, funder_id, funder_award_id,
  amount, currency, funder, funding_type, funder_scheme,
  provenance, start_date, end_date, start_year, end_year,
  lead_investigator, co_lead_investigator, investigators,
  landing_page_url, doi, works_api_url,
  DATE_TRUNC('SECOND', CAST(created_date AS TIMESTAMP)) AS created_date,
  CAST(NULL AS TIMESTAMP) AS updated_date,
  funded_outputs,
  funded_outputs_count
FROM openalex.awards.openalex_awards

In [None]:
-- Awards: compare hashes and set updated_date only when content changed
WITH new_hashes AS (
  SELECT id,
    xxhash64(CONCAT_WS('|',
      CAST(id AS STRING),
      COALESCE(display_name, ''),
      COALESCE(description, ''),
      COALESCE(funder_award_id, ''),
      COALESCE(CAST(amount AS STRING), ''),
      COALESCE(currency, ''),
      COALESCE(TO_JSON(funder), '{}'),
      COALESCE(funding_type, ''),
      COALESCE(funder_scheme, ''),
      COALESCE(provenance, ''),
      COALESCE(CAST(start_date AS STRING), ''),
      COALESCE(CAST(end_date AS STRING), ''),
      COALESCE(CAST(start_year AS STRING), ''),
      COALESCE(CAST(end_year AS STRING), ''),
      COALESCE(TO_JSON(lead_investigator), '{}'),
      COALESCE(TO_JSON(co_lead_investigator), '{}'),
      COALESCE(TO_JSON(investigators), '[]'),
      COALESCE(landing_page_url, ''),
      COALESCE(doi, ''),
      COALESCE(works_api_url, ''),
      COALESCE(TO_JSON(funded_outputs), '[]'),
      COALESCE(CAST(funded_outputs_count AS STRING), '0')
    )) AS content_hash
  FROM openalex.awards.awards_api
)
MERGE INTO openalex.awards.awards_api AS target
USING (
  SELECT n.id,
    CASE
      WHEN p.id IS NULL THEN DATE_TRUNC('SECOND', CURRENT_TIMESTAMP())
      WHEN n.content_hash <> p.content_hash THEN DATE_TRUNC('SECOND', CURRENT_TIMESTAMP())
      ELSE p.updated_date
    END AS new_updated_date
  FROM new_hashes n
  LEFT JOIN openalex.awards.awards_api_hash p ON n.id = p.id
) AS source
ON target.id = source.id
WHEN MATCHED THEN UPDATE SET target.updated_date = source.new_updated_date