In [1]:
import requests
import pandas as pd
from datetime import date, timedelta
from urllib.parse import quote_plus

### Configuration

In [19]:
# Organizations
ORG_CANDIDATES = [
    "Corewell Health",
    "Spectrum Health",
    "Beaumont Health",
]

# Columns you asked for (final, normalized)
OUT_COLS = [
    "id","agency","awardeeName","awardeeCity","awardeeStateCode",
    "piFirstName","piLastName","title","date","startDate","expDate",
    "fundsObligatedAmt","abstractText"
]

# Last 3 years (inclusive) based on today's date
TODAY = date.today()
START = TODAY.replace(year=TODAY.year - 20)

### NSF Data Extraction

In [23]:
def fetch_nsf_awards(org_names, start_date, end_date):
    base = "https://api.nsf.gov/services/v1/awards.json"
    s = start_date.strftime("%m/%d/%Y")
    e = end_date.strftime("%m/%d/%Y")
    rpp = 25

    rows = []
    for org in org_names:
        offset = 1
        while True:
            params = {
                "awardeeName": org,
                "startDateStart": s,
                "startDateEnd": e,
                "rpp": rpp,
                "offset": offset,
                "printFields": ",".join([
                    "id","agency","awardeeName","awardeeCity","awardeeStateCode",
                    "pdPIName","piFirstName","piLastName","title","date","startDate","expDate",
                    "fundsObligatedAmt","abstractText"
                ])
            }
            resp = requests.get(base, params=params, timeout=60)
            resp.raise_for_status()
            data = resp.json()
            awards = (data.get("response") or {}).get("award") or []
            if not awards:
                break

            for a in awards:
                pi_first = a.get("piFirstName")
                pi_last = a.get("piLastName")
                if (not pi_first or not pi_last) and a.get("pdPIName"):
                    parts = a["pdPIName"].split()
                    if len(parts) >= 2:
                        pi_first = pi_first or parts[0]
                        pi_last = pi_last or " ".join(parts[1:])

                rows.append({
                    "id": str(a.get("id") or ""),
                    "agency": a.get("agency") or "NSF",
                    "awardeeName": a.get("awardeeName") or org,
                    "awardeeCity": a.get("awardeeCity"),
                    "awardeeStateCode": a.get("awardeeStateCode"),
                    "piFirstName": pi_first,
                    "piLastName": pi_last,
                    "title": a.get("title"),
                    "date": a.get("date"),
                    "startDate": a.get("startDate"),
                    "expDate": a.get("expDate"),
                    "fundsObligatedAmt": a.get("fundsObligatedAmt"),
                    "abstractText": a.get("abstractText"),
                    "orgCandidate": org    # <--- new column
                })

            if len(awards) < rpp:
                break
            offset += rpp

    df = pd.DataFrame(rows)
    return df


In [25]:
nsf_df_org= fetch_nsf_awards(ORG_CANDIDATES, START, TODAY)

In [26]:
nsf_df_org

Unnamed: 0,id,agency,awardeeName,awardeeCity,awardeeStateCode,piFirstName,piLastName,title,date,startDate,expDate,fundsObligatedAmt,abstractText,orgCandidate
0,2432754,NSF,XN HEALTH INC.,NEWARK,NJ,Elizabeth,Jaworski,STTR Phase I: Tracheal Phrenic Nerve Stimulat...,07/11/2025,07/15/2025,06/30/2026,274841,The broader/commercial impact of this Small Bu...,Corewell Health
1,2505338,NSF,Oregon Health & Science University,PORTLAND,OR,Paul,Tratnyek,Electrochemical Characterization of Redox Proc...,05/22/2025,07/01/2025,06/30/2028,410000,With support from the Environmental Chemical S...,Corewell Health
2,2451412,NSF,"ABSTRACTIVE HEALTH, INC.",NEW YORK,NY,Vince,Hartman,SBIR Phase II : A tool to automate a narrative...,07/01/2025,07/01/2025,06/30/2027,1250000,The broader impact/commercial potential of thi...,Corewell Health
3,2419342,NSF,"INSU HEALTH DESIGN, INC.",MAYAGUEZ,PR,Mason,Lucich,SBIR Phase II: Insu Health Design: Temperature...,05/15/2025,05/15/2025,04/30/2027,998527,This Small Business Innovation Research (SBIR)...,Corewell Health
4,2444410,NSF,University of North Texas Health Science Cente...,FORT WORTH,TX,Austin,Reynolds,Collaborative Research: Genomic and Isotopic A...,04/07/2025,04/15/2025,03/31/2028,63318,Studies of human population genetics can revea...,Corewell Health
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1510,0525026,NSF,University of Texas Health Science Center San ...,SAN ANTONIO,TX,DAVID,KATERNDAHL,Dynamic Patterns of Husband-to-Wife Abuse,12/12/2005,12/15/2005,11/30/2007,124214,This exploratory research study will evaluate ...,Beaumont Health
1511,0534797,NSF,Oregon Health & Science University,PORTLAND,OR,Paul,Gorman,Collaborative Research: Supporting Rapid Trans...,10/19/2005,11/01/2005,10/31/2009,192037,This is a project to understand essential issu...,Beaumont Health
1512,0635493,NSF,Oregon Health & Science University,PORTLAND,OR,Bradley,Tebo,Bacterial Manganese (II) Oxidation in the Guay...,06/16/2006,10/01/2005,03/31/2008,185289,ABSTRACT OCE- 0352081 Hydrothermal vents a...,Beaumont Health
1513,0513492,NSF,Oregon Health & Science University,PORTLAND,OR,James,Pankow,Thermodynamic Modeling of Atmospheric Organic ...,09/20/2005,10/01/2005,04/30/2009,424999,This project involves the further development ...,Beaumont Health


In [None]:
nsf_df_org.to_csv("../Data/NSF_data_20years.csv")

In [30]:
nsf_df_org.shape

(1515, 14)