In [0]:
  SELECT
    id AS work_id,
    COALESCE(publication_year, YEAR(publication_date)) AS pub_year,
    CAST(cited_by_count AS INT) AS cited_by_count,
    CAST(open_access.is_oa AS BOOLEAN) AS is_oa,
    a.funder_id AS funder_url, -- e.g. 'https://openalex.org/F1234'
    a.id AS award_id,
    a.funder_award_id
  FROM (
    SELECT
      id, publication_year, publication_date, cited_by_count, open_access,
      EXPLODE_OUTER(awards) AS a
    FROM openalex.works.openalex_works
    WHERE SIZE(awards) > 0
  )
  WHERE a.funder_id = 'https://openalex.org/F4320321001'

### Using `openalex_works.awards`
Comes from crossref - explore why so few (175K vs 100x in mid.work_funder)

In [0]:
-- CREATE OR REPLACE TABLE openalex.funders.funders_api AS
-- WITH
-- -- explode awards once (no lateral view), keep per-work context
-- awards_exploded AS (
--   SELECT
--     id AS work_id,
--     COALESCE(publication_year, YEAR(publication_date)) AS pub_year,
--     CAST(cited_by_count AS INT) AS cited_by_count,
--     CAST(open_access.is_oa AS BOOLEAN) AS is_oa,
--     a.funder_id AS funder_url, -- e.g. 'https://openalex.org/F1234'
--     a.id AS award_id,
--     a.funder_award_id
--   FROM (
--     SELECT
--       id, publication_year, publication_date, cited_by_count, open_access,
--       EXPLODE_OUTER(awards) AS a
--     FROM openalex.works.openalex_works
--     WHERE SIZE(awards) > 0
--   )
--   WHERE a.funder_id IS NOT NULL
-- ),

-- -- one row per (work, funder) to avoid double-counting a work
-- unique_work_funder_pairs AS (
--   SELECT DISTINCT
--     work_id,
--     funder_url,
--     pub_year,
--     cited_by_count,
--     is_oa
--   FROM awards_exploded
-- ),

-- funder_metrics AS (
--   SELECT
--     u.funder_url,
--     /* totals from unique (work,funder) */
--     CAST(COUNT(*) AS INT) AS works_count,
--     CAST(SUM(u.cited_by_count) AS INT) AS cited_by_count,
--     CAST(SUM(CASE WHEN u.is_oa THEN 1 ELSE 0 END) AS INT) AS oa_works_count,

--     /* summary_stats helpers */
--     CAST(AVG(CASE WHEN u.pub_year >= YEAR(current_date()) - 2
--                   THEN u.cited_by_count END) AS DOUBLE) AS two_year_mean,
--     CAST(COUNT_IF(u.cited_by_count >= 10) AS INT) AS i10_index,
--     SORT_ARRAY(
--       TRANSFORM(
--         FILTER(COLLECT_LIST(u.cited_by_count), x -> x IS NOT NULL),
--         x -> CAST(x AS INT)
--       ),
--       false
--     ) AS sorted_citations,

--     /* grants_count from awards grain (joined per funder) */
--     COALESCE(MAX(g.grants_count), 0) AS grants_count
--   FROM unique_work_funder_pairs u
--   LEFT JOIN (
--     SELECT funder_url, COUNT(*) AS grants_count
--     FROM awards_exploded
--     GROUP BY funder_url
--   ) g
--     ON g.funder_url = u.funder_url
--   GROUP BY u.funder_url
-- )

-- SELECT
--   -- identity
--   CONCAT('https://openalex.org/F', f.funder_id) AS id,
--   f.display_name,
--   from_json(f.alternate_titles, 'ARRAY<STRING>') as alternate_titles,
--   f.country_code,
--   f.description,
--   f.homepage_url,
--   f.image_url,
--   f.image_thumbnail_url,

--   -- ids map
--   struct(
--     CONCAT('https://openalex.org/F', f.funder_id) as openalex,
--     f.ror_id as ror,
--     f.wikidata_id as wikidata,
--     f.crossref_id as crossref,
--     f.doi as doi
--   ) as ids,

--   -- metrics
--   COALESCE(fm.works_count, 0) AS works_count,
--   COALESCE(fm.cited_by_count, 0) AS cited_by_count,

--   /* inline counts_by_year: correlated per-funder subquery (authors-style) */
--   (
--     SELECT
--       SORT_ARRAY(
--         COLLECT_LIST(
--           STRUCT(
--             year,
--             works_count,
--             oa_works_count,
--             cited_by_count
--           )
--         ),
--         true
--       )
--     FROM (
--       SELECT
--         CAST(uf.pub_year AS INT) AS year,
--         CAST(COUNT(*) AS INT) AS works_count,
--         CAST(SUM(CASE WHEN uf.is_oa THEN 1 ELSE 0 END) AS INT) AS oa_works_count,
--         CAST(SUM(uf.cited_by_count) AS INT) AS cited_by_count
--       FROM unique_work_funder_pairs uf
--       WHERE uf.funder_url = CONCAT('https://openalex.org/F', f.funder_id)
--         AND uf.pub_year IS NOT NULL
--       GROUP BY CAST(uf.pub_year AS INT)
--     )
--   ) AS counts_by_year,

--   COALESCE(fm.grants_count, 0) AS grants_count,

--   -- summary_stats (2yr_mean, h-index via ZIP_WITH, i10)
--   named_struct(
--     '2yr_mean_citedness', COALESCE(fm.two_year_mean, 0.0),
--     'h_index',
--       CAST(
--         ARRAY_MAX(
--           ZIP_WITH(
--             fm.sorted_citations,
--             SEQUENCE(1, SIZE(fm.sorted_citations)),
--             (citation, rank) -> IF(citation >= rank, rank, 0)
--           )
--         ) AS INT
--       ),
--     'i10_index', COALESCE(fm.i10_index, 0)
--   ) AS summary_stats,

--   -- dates
--   f.created_date,
--   f.updated_date

-- FROM openalex.mid.funder f
-- LEFT JOIN funder_metrics fm
--   ON fm.funder_url = CONCAT('https://openalex.org/F', f.funder_id);


### Using `mid.work_funder`

In [0]:
CREATE OR REPLACE TABLE openalex.funders.funders_api AS
WITH
-- Base from mid.work_funder (one row per (work,funder), award_ids may have >1 ids)
wf_base AS (
  SELECT
    CAST(funder_id AS BIGINT) AS funder_id,
    CAST(paper_id  AS BIGINT) AS work_id,
    CAST(SIZE(award_ids) AS INT) AS award_count
  FROM openalex.mid.work_funder
  WHERE funder_id IS NOT NULL AND paper_id IS NOT NULL
),
-- Enrich with work-level signals needed for metrics
wf_enriched AS (
  SELECT
    b.funder_id,
    b.work_id,
    b.award_count,
    COALESCE(w.publication_year, YEAR(w.publication_date)) AS pub_year,
    CAST(w.cited_by_count AS INT) AS cited_by_count,
    CAST(w.open_access.is_oa AS BOOLEAN) AS is_oa
  FROM wf_base b
  JOIN openalex.works.openalex_works w
    ON w.id = b.work_id
),
-- Per-funder metrics (works, citations, OA, grants; plus helpers for summary_stats)
funder_metrics AS (
  SELECT
    funder_id,
    /* counts over unique works */
    CAST(COUNT(DISTINCT work_id) AS INT) AS works_count,
    CAST(SUM(cited_by_count) AS INT)     AS cited_by_count,
    CAST(SUM(CASE WHEN is_oa THEN 1 ELSE 0 END) AS INT) AS oa_works_count,
    /* grants_count from award_ids array sizes */
    CAST(SUM(award_count) AS INT) AS grants_count,
    /* summary_stats helpers */
    CAST(AVG(CASE WHEN pub_year >= YEAR(current_date()) - 2 THEN cited_by_count END) AS DOUBLE) AS two_year_mean,
    CAST(COUNT_IF(cited_by_count >= 10) AS INT) AS i10_index,
    SORT_ARRAY(
      TRANSFORM(
        FILTER(COLLECT_LIST(cited_by_count), x -> x IS NOT NULL),
        x -> CAST(x AS INT)
      ),
      false
    ) AS sorted_citations
  FROM wf_enriched
  GROUP BY funder_id
)

SELECT
  CONCAT('https://openalex.org/F', f.funder_id) AS id,
  f.display_name,
  from_json(f.alternate_titles, 'ARRAY<STRING>') AS alternate_titles,
  f.country_code,
  f.description,
  f.homepage_url,
  f.image_url,
  f.image_thumbnail_url,
  STRUCT(
    CONCAT('https://openalex.org/F', f.funder_id) AS openalex,
    f.ror_id       AS ror,
    f.wikidata_id  AS wikidata,
    f.crossref_id  AS crossref,
    f.doi          AS doi
  ) AS ids,

  /* totals */
  COALESCE(m.works_count, 0)      AS works_count,
  COALESCE(m.cited_by_count, 0)   AS cited_by_count,
  COALESCE(m.grants_count, 0)     AS grants_count,

  /* counts_by_year (authors-style inline correlated subquery) */
  (
    SELECT
      SORT_ARRAY(
        COLLECT_LIST(
          STRUCT(
            year,
            works_count,
            oa_works_count,
            cited_by_count
          )
        ),
        false
      )
    FROM (
      SELECT
        CAST(e.pub_year AS INT) AS year,
        CAST(COUNT(DISTINCT e.work_id) AS INT) AS works_count,
        CAST(SUM(CASE WHEN e.is_oa THEN 1 ELSE 0 END) AS INT) AS oa_works_count,
        CAST(SUM(e.cited_by_count) AS INT) AS cited_by_count
      FROM wf_enriched e
      WHERE e.funder_id = f.funder_id
        AND e.pub_year IS NOT NULL
      GROUP BY CAST(e.pub_year AS INT)
    )
  ) AS counts_by_year,

  /* summary_stats */
  NAMED_STRUCT(
    '2yr_mean_citedness', COALESCE(m.two_year_mean, 0.0),
    'h_index',
      CAST(
        ARRAY_MAX(
          ZIP_WITH(
            m.sorted_citations,
            SEQUENCE(1, SIZE(m.sorted_citations)),
            (citation, rank) -> IF(citation >= rank, rank, 0)
          )
        ) AS INT
      ),
    'i10_index', COALESCE(m.i10_index, 0)
  ) AS summary_stats,

  /* dates from funder dimension */
  f.created_date,
  f.updated_date

FROM openalex.mid.funder f
LEFT JOIN funder_metrics m
  ON m.funder_id = f.funder_id;
