# Create Forte Awards from SweCRIS

Creates Forte (Swedish Research Council for Health, Working Life and Welfare) awards from SweCRIS. ~2.7K grants.

**Prerequisites:**
- Run `scripts/local/forte_to_s3.py` to download and upload the data first.

**Data source:** https://swecris-api.vr.se (SweCRIS API)
**S3 location:** `s3a://openalex-ingest/awards/forte/forte_projects.parquet`

**Forte funder:**
- funder_id: 4320324004
- display_name: "Forskningsr책det om H채lsa, Arbetsliv och V채lf채rd"
- ROR: https://ror.org/02d290r06
- DOI: 10.13039/501100006636

**Focus Areas:**
- Health
- Working Life
- Welfare

## Step 1: Create Staging Table from S3

In [None]:
%sql
CREATE OR REPLACE TABLE openalex.awards.forte_raw
USING delta
AS
SELECT *, current_timestamp() as databricks_ingested_at
FROM parquet.`s3a://openalex-ingest/awards/forte/forte_projects.parquet`;

In [None]:
%sql
SELECT COUNT(*) as total_projects FROM openalex.awards.forte_raw;

In [None]:
%sql
SELECT * FROM openalex.awards.forte_raw LIMIT 5;

## Step 2: Create Forte Awards Table

In [None]:
%sql
CREATE OR REPLACE TABLE openalex.awards.forte_awards
USING delta
AS
WITH
forte_funder AS (
    SELECT funder_id, display_name, ror_id, doi
    FROM openalex.common.funder
    WHERE funder_id = 4320324004
),

awards_transformed AS (
    SELECT
        abs(xxhash64(CONCAT(f.funder_id, ':', LOWER(g.project_id)))) % 9000000000 as id,
        COALESCE(g.title_english, g.title) as display_name,
        COALESCE(g.abstract_english, g.abstract) as description,
        f.funder_id,
        g.project_id as funder_award_id,
        TRY_CAST(g.amount AS DOUBLE) as amount,
        'SEK' as currency,
        struct(
            CONCAT('https://openalex.org/F', f.funder_id) as id,
            f.display_name,
            f.ror_id,
            f.doi
        ) as funder,
        CASE
            WHEN LOWER(g.type_of_award) LIKE '%positions%' THEN 'fellowship'
            WHEN LOWER(g.type_of_award) LIKE '%stipend%' THEN 'fellowship'
            WHEN LOWER(g.type_of_award) LIKE '%infrastructure%' THEN 'infrastructure'
            WHEN LOWER(g.type_of_award) LIKE '%project%' THEN 'research'
            ELSE 'grant'
        END as funding_type,
        g.type_of_award as funder_scheme,
        'forte' as provenance,
        TRY_TO_DATE(g.start_date, 'yyyy-MM-dd') as start_date,
        TRY_TO_DATE(g.end_date, 'yyyy-MM-dd') as end_date,
        YEAR(TRY_TO_DATE(g.start_date, 'yyyy-MM-dd')) as start_year,
        YEAR(TRY_TO_DATE(g.end_date, 'yyyy-MM-dd')) as end_year,
        CASE
            WHEN g.pi_family_name IS NOT NULL AND TRIM(g.pi_family_name) != '' THEN
                struct(
                    g.pi_given_name as given_name,
                    g.pi_family_name as family_name,
                    g.pi_orcid as orcid,
                    CAST(NULL AS DATE) as role_start,
                    struct(
                        g.coordinating_organisation as name,
                        'Sweden' as country,
                        CAST(NULL AS ARRAY<STRUCT<id:STRING, type:STRING, asserted_by:STRING>>) as ids
                    ) as affiliation
                )
            ELSE NULL
        END as lead_investigator,
        CAST(NULL AS STRUCT<given_name:STRING, family_name:STRING, orcid:STRING, role_start:DATE, affiliation:STRUCT<name:STRING, country:STRING, ids:ARRAY<STRUCT<id:STRING, type:STRING, asserted_by:STRING>>>>) as co_lead_investigator,
        CAST(NULL AS ARRAY<STRUCT<given_name:STRING, family_name:STRING, orcid:STRING, role_start:DATE, affiliation:STRUCT<name:STRING, country:STRING, ids:ARRAY<STRUCT<id:STRING, type:STRING, asserted_by:STRING>>>>>) as investigators,
        CONCAT('https://www.vr.se/swecris#/project/', g.project_id) as landing_page_url,
        CAST(NULL AS STRING) as doi,
        concat('https://api.openalex.org/works?filter=awards.id:G', abs(xxhash64(CONCAT(f.funder_id, ':', LOWER(g.project_id)))) % 9000000000) as works_api_url,
        current_timestamp() as created_date,
        current_timestamp() as updated_date
    FROM openalex.awards.forte_raw g
    CROSS JOIN forte_funder f
    WHERE g.project_id IS NOT NULL AND TRIM(g.project_id) != ''
)
SELECT * FROM awards_transformed;

In [None]:
%sql
INSERT INTO openalex.awards.openalex_awards_raw
SELECT
    id, display_name, description, funder_id, funder_award_id,
    amount, currency, funder, funding_type, funder_scheme, provenance,
    start_date, end_date, start_year, end_year,
    lead_investigator, co_lead_investigator, investigators,
    landing_page_url, doi, works_api_url, created_date, updated_date,
    17 as priority
FROM openalex.awards.forte_awards;

## Verification

In [None]:
%sql
SELECT COUNT(*) as total_forte_awards FROM openalex.awards.forte_awards;

In [None]:
%sql
SELECT
    COUNT(*) as total,
    COUNT(display_name) as has_title,
    COUNT(description) as has_abstract,
    COUNT(amount) as has_amount,
    COUNT(lead_investigator) as has_pi,
    SUM(CASE WHEN lead_investigator.orcid IS NOT NULL THEN 1 ELSE 0 END) as has_orcid,
    ROUND(COUNT(lead_investigator) * 100.0 / COUNT(*), 1) as pct_with_pi,
    ROUND(SUM(amount)/1e9, 2) as total_amount_billions_sek
FROM openalex.awards.forte_awards;

In [None]:
%sql
SELECT 
    lead_investigator.affiliation.name as institution,
    COUNT(*) as grant_count,
    ROUND(SUM(amount)/1e9, 2) as total_billions_sek
FROM openalex.awards.forte_awards
WHERE lead_investigator.affiliation.name IS NOT NULL
GROUP BY lead_investigator.affiliation.name
ORDER BY total_billions_sek DESC
LIMIT 15;