In [1]:
import requests
from datetime import datetime, timedelta
import time
import pandas as pd

In [3]:
base_url = "https://api.reporter.nih.gov/v2/projects/search"

In [9]:
dfdata = { 'award_date': [], 'project_number': [], 'award_amount': [], 'award_type': [] }

for year in range(2021, 2025): # ie, 1 Nov 2021 - 16 Feb 2025
    start_date = datetime(year, 11, 1)
    end_date = datetime((year + 1), 2, 16) 
    current_date = start_date
    
    while current_date <= end_date:
        # Format the current date as a string in MM/DD/YYYY format
        date_str = current_date.strftime("%m/%d/%Y")
        date_plus_1 = (current_date + timedelta(days = 1)).strftime("%m/%d/%Y")

        print(f"Querying award date: {date_str}")
        # Define the query parameters according to the instructions in the document
        params = {
            "criteria":
            {
                "award_notice_date": {
                    "from_date": date_str,
                    "to_date": date_plus_1
                    },
                "exclude_subprojects": True
                },
            "include_fields": [
                "AwardNoticeDate",
                "ProjectNum",
                "AwardAmount",
                "AwardType"
                ],
            "offset": 0,
            "limit": 500
            }
   
        # Make the API request
        try:
            # see https://docs.python-requests.org/en/latest/user/advanced/#timeouts for timeout param
            response = requests.post(base_url, json=params, timeout = (3.05, 27))
            
        except Exception as e:
            print(f"==> Error: {e}")
            pass
            
        # Check if the request was successful
        if response.status_code == 200:
            data = response.json()
 
            # Print the full response for debugging
            # print(f"Response for {date_str}: {data}")
       
            # Extract and print the award details for the current date
            awards = data.get("results", [])
            if awards:
                for award in awards:
                    award_amount = award.get("award_amount", 0)
                    if award_amount is None:
                        award_amount = 0
                    project_num = award.get("project_num", "N/A")
                    award_type = award.get("award_type", "N/A")
                    award_notice_date = award.get("award_notice_date", "N/A")
                    # Convert the award_notice_date to MM/DD/YYYY format
                    if award_notice_date != "N/A":
                        award_notice_date = datetime.strptime(award_notice_date, "%Y-%m-%dT%H:%M:%SZ").strftime("%m/%d/%Y")
                    
                    dfdata['award_date'].append(award_notice_date)
                    dfdata['project_number'].append(project_num)
                    dfdata['award_amount'].append(award_amount)
                    dfdata['award_type'].append(award_type)
            else:
                print(f">> No awards found for {date_str}")
                
        else:
            print(f"Error: {response.status_code} - {response.text}")
   
        # Move to the next day
        current_date += timedelta(days=1)
   
        # Wait for 1 second before making the next request
        time.sleep(1)



Querying award date: 11/01/2021
Querying award date: 11/02/2021
Querying award date: 11/03/2021
Querying award date: 11/04/2021
Querying award date: 11/05/2021
Querying award date: 11/06/2021
Querying award date: 11/07/2021
>> No awards found for 11/07/2021
Querying award date: 11/08/2021
Querying award date: 11/09/2021
Querying award date: 11/10/2021
Querying award date: 11/11/2021
Querying award date: 11/12/2021
Querying award date: 11/13/2021
Querying award date: 11/14/2021
Querying award date: 11/15/2021
Querying award date: 11/16/2021
Querying award date: 11/17/2021
Querying award date: 11/18/2021
Querying award date: 11/19/2021
Querying award date: 11/20/2021
Querying award date: 11/21/2021
Querying award date: 11/22/2021
Querying award date: 11/23/2021
Querying award date: 11/24/2021
Querying award date: 11/25/2021
Querying award date: 11/26/2021
Querying award date: 11/27/2021
Querying award date: 11/28/2021
>> No awards found for 11/28/2021
Querying award date: 11/29/2021
Quer

In [11]:
df = pd.DataFrame(dfdata)

In [13]:
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,11/01/2021,5R01AI130470-05,396250,5
1,11/01/2021,5R25CA214255-05,117558,5
2,11/01/2021,5R01EB028797-03,570894,5
3,11/01/2021,5R37MH080046-15,714476,5
4,11/01/2021,5R21MH125227-02,226590,5
...,...,...,...,...
45735,02/14/2025,5R01NS121405-05,394788,5
45736,02/15/2025,3F32DC022145-01S1,3000,3
45737,02/15/2025,5U01DC019405-04,409204,5
45738,02/15/2025,5R01HL138456-08,672896,5


In [15]:
# change date format to ISO standard
df.award_date = pd.to_datetime(df.award_date)
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,2021-11-01,5R01AI130470-05,396250,5
1,2021-11-01,5R25CA214255-05,117558,5
2,2021-11-01,5R01EB028797-03,570894,5
3,2021-11-01,5R37MH080046-15,714476,5
4,2021-11-01,5R21MH125227-02,226590,5
...,...,...,...,...
45735,2025-02-14,5R01NS121405-05,394788,5
45736,2025-02-15,3F32DC022145-01S1,3000,3
45737,2025-02-15,5U01DC019405-04,409204,5
45738,2025-02-15,5R01HL138456-08,672896,5


In [17]:
# save the data
today = datetime.today().strftime("%Y%m%d")
csv_file = f"~/Documents/Nature/Programming/nih_reporter/data/nih_data-{today}.csv"
df.to_csv(csv_file, index = False)

print(f"CSV file '{csv_file}' created successfully.")

CSV file 'tmp/nih_data-20250219.csv' created successfully.
