In [1]:
import requests
from datetime import datetime, timedelta
import time
import pandas as pd

In [3]:
base_url = "https://api.reporter.nih.gov/v2/projects/search"

In [5]:
# Define the start and end dates for 2025
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 2, 14)

dfdata = { 'award_date': [], 'project_number': [], 'award_amount': [], 'award_type': [] }

In [7]:
# current_date = start_date
current_date = start_date
while current_date <= end_date:
    # Format the current date as a string in MM/DD/YYYY format
    date_str = current_date.strftime("%m/%d/%Y")
    date_plus_1 = (current_date + timedelta(days = 1)).strftime("%m/%d/%Y")
    # Define the query parameters according to the instructions in the document
    params = {
        "criteria":
        {
            "award_notice_date": {
                "from_date": date_str,
                "to_date": date_plus_1
            },
            "award_types": [ "1", "2", "3", "4C", "4N" ],
        },
        "include_fields": [
            "AwardNoticeDate",
            "ProjectNum",
            "AwardAmount",
            "AwardType"
        ],
        "offset": 0,
        "limit": 100
    }
   
    # Make the API request
    response = requests.post(base_url, json=params)
   
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
 
        # Print the full response for debugging
        # print(f"Response for {date_str}: {data}")
       
        # Extract and print the award details for the current date
        awards = data.get("results", [])
        if awards:
            for award in awards:
                award_amount = award.get("award_amount", 0)
                if award_amount is None:
                    award_amount = 0
                project_num = award.get("project_num", "N/A")
                award_type = award.get("award_type", "N/A")
                award_notice_date = award.get("award_notice_date", "N/A")
                # Convert the award_notice_date to MM/DD/YYYY format
                if award_notice_date != "N/A":
                    award_notice_date = datetime.strptime(award_notice_date, "%Y-%m-%dT%H:%M:%SZ").strftime("%m/%d/%Y")
                    
                dfdata['award_date'].append(award_notice_date)
                dfdata['project_number'].append(project_num)
                dfdata['award_amount'].append(award_amount)
                dfdata['award_type'].append(award_type)
                # print(f"Award Notice Date: {award_notice_date}, Project Number: {project_num}, Award Amount: ${award_amount:.2f}")
        else:
            print(f"No awards found for {date_str}")
    else:
        print(f"Error: {response.status_code} - {response.text}")
   
    # Move to the next day
    current_date += timedelta(days=1)
   
    # Wait for 1 second before making the next request
    time.sleep(1)


No awards found for 01/04/2025
No awards found for 01/05/2025
No awards found for 01/12/2025
No awards found for 01/26/2025
No awards found for 02/02/2025
No awards found for 02/04/2025
No awards found for 02/06/2025
No awards found for 02/07/2025
No awards found for 02/08/2025
No awards found for 02/09/2025
No awards found for 02/10/2025
No awards found for 02/11/2025
No awards found for 02/12/2025
No awards found for 02/13/2025
No awards found for 02/14/2025


In [9]:
df = pd.DataFrame(dfdata)

In [11]:
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,01/01/2025,1R01ES036436-01A1,702364,1
1,01/01/2025,1R35GM157007-01,372100,1
2,01/01/2025,1R01ES036253-01A1,709575,1
3,01/01/2025,1R01CA296842-01,665368,1
4,01/01/2025,1K23NS136843-01A1,227445,1
...,...,...,...,...
915,02/03/2025,1R21CA296712-01,269280,1
916,02/03/2025,1K08NS140551-01,190485,1
917,02/03/2025,1R01CA285337-01A1,585931,1
918,02/03/2025,1R21MH139202-01,185165,1


In [83]:
# change date format to ISO standard
df.award_date = pd.to_datetime(df.award_date)
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,2025-01-01,1R01ES036436-01A1,702364,1
1,2025-01-01,1R35GM157007-01,372100,1
2,2025-01-01,1R01ES036253-01A1,709575,1
3,2025-01-01,1R01CA296842-01,665368,1
4,2025-01-01,1K23NS136843-01A1,227445,1
...,...,...,...,...
915,2025-02-03,1R21CA296712-01,269280,1
916,2025-02-03,1K08NS140551-01,190485,1
917,2025-02-03,1R01CA285337-01A1,585931,1
918,2025-02-03,1R21MH139202-01,185165,1


In [115]:
# save the data
today = datetime.today().strftime("%Y%m%d")
csv_file = "tmp/nih_data-" + today + ".csv"
df.to_csv(csv_file, index = False)

print(f"CSV file '{csv_file}' created successfully.")

CSV file 'tmp/nih_data-20250214.csv' created successfully.


In [107]:
current_date = start_date
while current_date <= end_date:
    # Format the current date as a string in MM/DD/YYYY format
    date_str = current_date.strftime("%Y-%m-%d")
    tmp = df.loc[df['award_date'] == date_str]
    total = sum(tmp['award_amount'])
    count = tmp.shape[0]
    print(f"Date: {date_str}: Award total: ${total:,} (n = {count})")
    current_date += timedelta(days = 1)


Date: 2025-01-01: Award total: $12,800,727 (n = 13)
Date: 2025-01-02: Award total: $6,019,132 (n = 17)
Date: 2025-01-03: Award total: $10,390,225 (n = 29)
Date: 2025-01-04: Award total: $0 (n = 0)
Date: 2025-01-05: Award total: $0 (n = 0)
Date: 2025-01-06: Award total: $8,215,139 (n = 14)
Date: 2025-01-07: Award total: $9,054,572 (n = 24)
Date: 2025-01-08: Award total: $7,096,584 (n = 28)
Date: 2025-01-09: Award total: $5,615,718 (n = 26)
Date: 2025-01-10: Award total: $10,693,322 (n = 35)
Date: 2025-01-11: Award total: $87,918 (n = 1)
Date: 2025-01-12: Award total: $0 (n = 0)
Date: 2025-01-13: Award total: $14,988,735 (n = 40)
Date: 2025-01-14: Award total: $17,289,318 (n = 37)
Date: 2025-01-15: Award total: $29,243,753 (n = 44)
Date: 2025-01-16: Award total: $15,855,269 (n = 42)
Date: 2025-01-17: Award total: $67,454,809 (n = 89)
Date: 2025-01-18: Award total: $224,091 (n = 3)
Date: 2025-01-19: Award total: $597,574 (n = 2)
Date: 2025-01-20: Award total: $7,024,601 (n = 22)
Date: 202

In [111]:
df.groupby(['award_date']).agg(
    total = ('award_amount', 'sum'),
    count = ('project_number', 'count')
).reset_index()

Unnamed: 0,award_date,total,count
0,2025-01-01,12800727,13
1,2025-01-02,6019132,17
2,2025-01-03,10390225,29
3,2025-01-06,8215139,14
4,2025-01-07,9054572,24
5,2025-01-08,7096584,28
6,2025-01-09,5615718,26
7,2025-01-10,10693322,35
8,2025-01-11,87918,1
9,2025-01-13,14988735,40


In [113]:
df.groupby(['award_type']).size()

award_type
1     626
2     121
3     144
4C      1
4N     28
dtype: int64

In [None]:
# example (working) query from NIH Reporter documentation
params = {
        "criteria":
        {
            "funding_mechanism":["SB","RP","RC","OR","TR","TI","CO","IM","IAA","SRDC","Other"],
            "newly_added_projects_only": True,
            },
        "include_fields": [
            "ApplId","SubprojectId","FiscalYear","ProjectNum","ProjectSerialNum","Organization", "OrganizationType",
            "AwardType", "ActivityCode", "AwardAmount", "ProjectNumSplit", "PrincipalInvestigators", "ProgramOfficers",
            "AgencyIcAdmin", "AgencyIcFundings","CongDist", "ProjectStartDate","ProjectEndDate","FullFoa","FullStudySection",
            "AwardNoticeDate", "CoreProjectNum","PrefTerms", "ProjectTitle", "PhrText","SpendingCategoriesDesc", "ArraFunded",
            "BudgetStart", "BudgetEnd","CfdaCode","FundingMechanism","DirectCostAmt","IndirectCostAmt"
            ],
        "offset":0,
        "limit":25,
        "sort_field":"project_start_date",
        "sort_order":"desc"
        }
