In [1]:
import requests
from datetime import datetime, timedelta
import time
import pandas as pd

In [3]:
base_url = "https://api.reporter.nih.gov/v2/projects/search"

In [5]:
# Define the start and end dates for 2025
start_date = datetime(2025, 1, 1)
end_date = datetime(2025, 2, 14)

dfdata = { 'award_date': [], 'project_number': [], 'award_amount': [], 'award_type': [] }

In [9]:
# current_date = start_date
current_date = start_date
while current_date <= end_date:
    # Format the current date as a string in MM/DD/YYYY format
    date_str = current_date.strftime("%m/%d/%Y")
    date_plus_1 = (current_date + timedelta(days = 1)).strftime("%m/%d/%Y")

    print(f"Querying award date: {date_str}")
    # Define the query parameters according to the instructions in the document
    params = {
        "criteria":
        {
            "award_notice_date": {
                "from_date": date_str,
                "to_date": date_plus_1
            },
            "award_types": [ "1", "2", "3", "4C", "4N" ],
        },
        "include_fields": [
            "AwardNoticeDate",
            "ProjectNum",
            "AwardAmount",
            "AwardType"
        ],
        "offset": 0,
        "limit": 100
    }
   
    # Make the API request
    response = requests.post(base_url, json=params)
   
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
 
        # Print the full response for debugging
        # print(f"Response for {date_str}: {data}")
       
        # Extract and print the award details for the current date
        awards = data.get("results", [])
        if awards:
            for award in awards:
                award_amount = award.get("award_amount", 0)
                if award_amount is None:
                    award_amount = 0
                project_num = award.get("project_num", "N/A")
                award_type = award.get("award_type", "N/A")
                award_notice_date = award.get("award_notice_date", "N/A")
                # Convert the award_notice_date to MM/DD/YYYY format
                if award_notice_date != "N/A":
                    award_notice_date = datetime.strptime(award_notice_date, "%Y-%m-%dT%H:%M:%SZ").strftime("%m/%d/%Y")
                    
                dfdata['award_date'].append(award_notice_date)
                dfdata['project_number'].append(project_num)
                dfdata['award_amount'].append(award_amount)
                dfdata['award_type'].append(award_type)
                # print(f"Award Notice Date: {award_notice_date}, Project Number: {project_num}, Award Amount: ${award_amount:.2f}")
        else:
            print(f">> No awards found for {date_str}")
    else:
        print(f"Error: {response.status_code} - {response.text}")
   
    # Move to the next day
    current_date += timedelta(days=1)
   
    # Wait for 1 second before making the next request
    time.sleep(1)


Querying award date: 01/01/2025
Querying award date: 01/02/2025
Querying award date: 01/03/2025
Querying award date: 01/04/2025
>> No awards found for 01/04/2025
Querying award date: 01/05/2025
>> No awards found for 01/05/2025
Querying award date: 01/06/2025
Querying award date: 01/07/2025
Querying award date: 01/08/2025
Querying award date: 01/09/2025
Querying award date: 01/10/2025
Querying award date: 01/11/2025
Querying award date: 01/12/2025
>> No awards found for 01/12/2025
Querying award date: 01/13/2025
Querying award date: 01/14/2025
Querying award date: 01/15/2025
Querying award date: 01/16/2025
Querying award date: 01/17/2025
Querying award date: 01/18/2025
Querying award date: 01/19/2025
Querying award date: 01/20/2025
Querying award date: 01/21/2025
Querying award date: 01/22/2025
Querying award date: 01/23/2025
Querying award date: 01/24/2025
Querying award date: 01/25/2025
Querying award date: 01/26/2025
>> No awards found for 01/26/2025
Querying award date: 01/27/2025


In [11]:
df = pd.DataFrame(dfdata)

In [13]:
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,01/01/2025,1R01ES036436-01A1,702364,1
1,01/01/2025,1R35GM157007-01,372100,1
2,01/01/2025,1R01ES036253-01A1,709575,1
3,01/01/2025,1R01CA296842-01,665368,1
4,01/01/2025,1K23NS136843-01A1,227445,1
...,...,...,...,...
1834,02/03/2025,1R21CA296712-01,269280,1
1835,02/03/2025,1K08NS140551-01,190485,1
1836,02/03/2025,1R01CA285337-01A1,585931,1
1837,02/03/2025,1R21MH139202-01,185165,1


In [15]:
# change date format to ISO standard
df.award_date = pd.to_datetime(df.award_date)
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,2025-01-01,1R01ES036436-01A1,702364,1
1,2025-01-01,1R35GM157007-01,372100,1
2,2025-01-01,1R01ES036253-01A1,709575,1
3,2025-01-01,1R01CA296842-01,665368,1
4,2025-01-01,1K23NS136843-01A1,227445,1
...,...,...,...,...
1834,2025-02-03,1R21CA296712-01,269280,1
1835,2025-02-03,1K08NS140551-01,190485,1
1836,2025-02-03,1R01CA285337-01A1,585931,1
1837,2025-02-03,1R21MH139202-01,185165,1


In [17]:
# save the data
today = datetime.today().strftime("%Y%m%d")
csv_file = "tmp/nih_data-" + today + ".csv"
df.to_csv(csv_file, index = False)

print(f"CSV file '{csv_file}' created successfully.")

CSV file 'tmp/nih_data-20250215.csv' created successfully.


In [19]:
# tabulate awards by date
current_date = start_date

# formatting function to make the total 'pretty'
def format(x):
    return (f"${x:,}")

summary = { 'date': [], 'total_awards': [], 'count': [] }
while current_date <= end_date:
    # Format the current date as a string in MM/DD/YYYY format
    date_str = current_date.strftime("%Y-%m-%d")
    tmp = df.loc[df['award_date'] == date_str]
    summary['date'].append(date_str)
    summary['total_awards'].append(sum(tmp['award_amount']))
    summary['count'].append(tmp.shape[0])
    # print(f"Date: {date_str}: Award total: ${total:,} (n = {count})")
    current_date += timedelta(days = 1)


In [21]:
summarydf = pd.DataFrame(summary)

In [23]:
summarydf['total_awards'] = summarydf['total_awards'].apply(format)

In [25]:
summarydf

Unnamed: 0,date,total_awards,count
0,2025-01-01,"$25,601,454",26
1,2025-01-02,"$12,038,264",34
2,2025-01-03,"$20,780,450",58
3,2025-01-04,$0,0
4,2025-01-05,$0,0
5,2025-01-06,"$16,430,278",28
6,2025-01-07,"$18,109,144",48
7,2025-01-08,"$14,193,168",56
8,2025-01-09,"$11,231,436",52
9,2025-01-10,"$21,386,644",70


In [27]:
# this does the same thing, but dates for which there are no awards will not appear in the results. 
df.groupby(['award_date']).agg(
    total = ('award_amount', 'sum'),
    count = ('project_number', 'count')
).reset_index()

Unnamed: 0,award_date,total,count
0,2025-01-01,25601454,26
1,2025-01-02,12038264,34
2,2025-01-03,20780450,58
3,2025-01-06,16430278,28
4,2025-01-07,18109144,48
5,2025-01-08,14193168,56
6,2025-01-09,11231436,52
7,2025-01-10,21386644,70
8,2025-01-11,175836,2
9,2025-01-13,29977470,80


In [29]:
df.groupby(['award_type']).size()

award_type
1     1252
2      241
3      288
4C       2
4N      56
dtype: int64

In [None]:
# example (working) query from NIH Reporter documentation
params = {
        "criteria":
        {
            "funding_mechanism":["SB","RP","RC","OR","TR","TI","CO","IM","IAA","SRDC","Other"],
            "newly_added_projects_only": True,
            },
        "include_fields": [
            "ApplId","SubprojectId","FiscalYear","ProjectNum","ProjectSerialNum","Organization", "OrganizationType",
            "AwardType", "ActivityCode", "AwardAmount", "ProjectNumSplit", "PrincipalInvestigators", "ProgramOfficers",
            "AgencyIcAdmin", "AgencyIcFundings","CongDist", "ProjectStartDate","ProjectEndDate","FullFoa","FullStudySection",
            "AwardNoticeDate", "CoreProjectNum","PrefTerms", "ProjectTitle", "PhrText","SpendingCategoriesDesc", "ArraFunded",
            "BudgetStart", "BudgetEnd","CfdaCode","FundingMechanism","DirectCostAmt","IndirectCostAmt"
            ],
        "offset":0,
        "limit":25,
        "sort_field":"project_start_date",
        "sort_order":"desc"
        }
