### Combine Award Sources to Create Single Awards Table

**Priority Order (lower = higher priority):**
- 0: GTR Project Awards (authoritative for UK grants, full metadata)
- 1: Crossref Awards (rich metadata from Crossref)
- 2: Backfill Awards (extracted from publication funding acknowledgements)
- 3: NIH Awards (US NIH grants with full metadata)
- 3: NSF Awards (US NSF grants with full metadata)
- 3: NSERC Awards (Canadian NSERC grants with full metadata)
- 3: GTR Awards (legacy publication-based, for work linkage only)
- 4: Gates Foundation Awards (Bill & Melinda Gates Foundation committed grants)
- 5: SSHRC Awards (Social Sciences and Humanities Research Council of Canada)
- 6: ANR Awards (Agence Nationale de la Recherche - French National Research Agency)
- 7: CIHR Awards (Canadian Institutes of Health Research)
- 8: CFI Awards (Canada Foundation for Innovation)
- 9: Vinnova Awards (Sweden's Innovation Agency)

When the same award appears in multiple sources, the highest priority source wins.

In [None]:
%sql
CREATE OR REPLACE TABLE openalex.awards.openalex_awards
USING delta
AS
WITH award_aggregates AS (
  SELECT
    CAST(REPLACE(award.id, 'https://openalex.org/G', '') AS BIGINT) as award_id,
    TRANSFORM(
      COLLECT_LIST(DISTINCT id), 
      w -> CONCAT('https://openalex.org/W', w)
    ) as funded_outputs,
    COUNT(DISTINCT id) as funded_outputs_count
  FROM openalex.works.openalex_works
  LATERAL VIEW explode(awards) as award
  WHERE size(awards) > 0
  GROUP BY award.id
),
combined AS (
  -- Priority 0: GTR Project Awards (authoritative for UK grants, full metadata)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    works_api_url,
    created_date,
    updated_date,
    0 as priority  -- HIGHEST priority for GTR project data
  FROM openalex.awards.gtr_project_awards

  UNION ALL

  -- Priority 1: Crossref Awards (rich metadata)
  SELECT 
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    'crossref_work' as provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    1 as priority
  FROM openalex.awards.crossref_awards

  UNION ALL

  -- Priority 2: Backfill Awards (from publication funding acknowledgements)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    NULL as display_name,
    NULL as description,
    funder_id,
    funder_award_id,
    NULL as amount,
    NULL as currency,
    struct(
      funder.id,
      funder.display_name,
      funder.ror_id,
      funder.doi
    ) as funder,
    NULL as funding_type,
    NULL as funder_scheme,
    'crossref_work.grants' as provenance,
    NULL as start_date,
    NULL as end_date,
    NULL as start_year,
    NULL as end_year,
    NULL as lead_investigator,
    NULL as co_lead_investigator,
    NULL as investigators,
    NULL as landing_page_url,
    NULL as doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    2 as priority
  FROM openalex.awards.backfill_awards

  UNION ALL

  -- Priority 3: NIH Awards (US NIH grants with full metadata)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    3 as priority
  FROM openalex.awards.nih_awards

  UNION ALL

  -- Priority 3: GTR Awards (legacy publication-based, for work linkage)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    NULL as display_name,
    NULL as description,
    funder_id,
    funder_award_id,
    NULL as amount,
    NULL as currency,
    funder,
    NULL as funding_type,
    NULL as funder_scheme,
    'gateway_to_research' as provenance,
    NULL as start_date,
    NULL as end_date,
    NULL as start_year,
    NULL as end_year,
    NULL as lead_investigator,
    NULL as co_lead_investigator,
    NULL as investigators,
    NULL as landing_page_url,
    NULL as doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    3 as priority
  FROM openalex.awards.gtr_awards

  UNION ALL

  -- Priority 3: NSF Awards (US NSF grants with full metadata)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    3 as priority
  FROM openalex.awards.nsf_awards

  UNION ALL

  -- Priority 3: NSERC Awards (Canadian NSERC grants with full metadata)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    3 as priority
  FROM openalex.awards.nserc_awards

  UNION ALL

  -- Priority 4: Gates Foundation Awards (Bill & Melinda Gates Foundation committed grants)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    4 as priority
  FROM openalex.awards.gates_awards

  UNION ALL

  -- Priority 5: SSHRC Awards (Social Sciences and Humanities Research Council of Canada)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    5 as priority
  FROM openalex.awards.sshrc_awards

  UNION ALL

  -- Priority 6: ANR Awards (Agence Nationale de la Recherche - French National Research Agency)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    6 as priority
  FROM openalex.awards.anr_awards

  UNION ALL

  -- Priority 7: CIHR Awards (Canadian Institutes of Health Research)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    7 as priority
  FROM openalex.awards.cihr_awards

  UNION ALL

  -- Priority 8: CFI Awards (Canada Foundation for Innovation)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    8 as priority
  FROM openalex.awards.cfi_awards

  UNION ALL

  -- Priority 9: Vinnova Awards (Sweden's Innovation Agency)
  SELECT
    abs(xxhash64(id)) % 9000000000 as id,
    display_name,
    description,
    funder_id,
    funder_award_id,
    amount,
    currency,
    funder,
    funding_type,
    funder_scheme,
    provenance,
    start_date,
    end_date,
    start_year,
    end_year,
    lead_investigator,
    co_lead_investigator,
    investigators,
    landing_page_url,
    doi,
    concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(id)) % 9000000000) as works_api_url,
    created_date,
    updated_date,
    9 as priority
  FROM openalex.awards.vinnova_awards
),
deduplicated_awards AS (
  SELECT 
    * EXCEPT(priority, row_num)
  FROM (
    SELECT 
      *,
      ROW_NUMBER() OVER (PARTITION BY id ORDER BY priority desc) as row_num
    FROM combined
  )
  WHERE row_num = 1
)
SELECT 
  da.*,
  SLICE(COALESCE(aa.funded_outputs, ARRAY()), 1, 100) as funded_outputs,
  COALESCE(aa.funded_outputs_count, 0) as funded_outputs_count
FROM 
  deduplicated_awards da
  LEFT JOIN award_aggregates aa 
    ON da.id = aa.award_id