### Ingest awards from "Get the Research" parquet file

In [0]:
%sql
CREATE OR REPLACE TABLE openalex.awards.gtr_awards
USING delta 
AS
WITH target_funder_list AS (
    SELECT explode(array(
        'AHRC', 'BBSRC', 'EPSRC', 'ESRC', 
        'MRC', 'NERC', 'STFC', 'INNOVATE UK'
    )) AS funder
),
exploded_awards AS (
    SELECT
        split(doi, 'doi.org/')[1] AS doi,
        explode(grant_reference) AS raw_grant
    FROM
        parquet.`s3a://openalex-ingest/awards/GatewayToResearch_2025-11-24.parquet`
    WHERE
        doi IS NOT NULL
        AND size(grant_reference) > 0
),
awards AS (
    SELECT
        doi,
        -- Split raw_grant by ':' to get ID (index 0) and Funder (index 1)
        split(raw_grant, ':')[0] AS funder_award_id,
        split(raw_grant, ':')[1] AS funder_name
    FROM
        exploded_awards
    WHERE
      split(raw_grant, ':')[1] IN (SELECT funder FROM target_funder_list)
),
funders AS (
  SELECT DISTINCT funder_id, alternate_title
  FROM openalex.common.funder
  LATERAL VIEW explode(from_json(alternate_titles, 'array<string>')) as alternate_title
  WHERE 
    alternate_title IN (SELECT funder FROM target_funder_list)
    AND location = 'United Kingdom'
)
SELECT
  a.doi as native_id,
  'doi' as native_id_namespace,
  a.funder_award_id,
  f.funder_id
FROM awards a
  JOIN funders f
  ON a.funder_name = f.alternate_title
    