In [16]:
import requests
from datetime import datetime, timedelta
import time
import pandas as pd

In [17]:
base_url = "https://api.reporter.nih.gov/v2/projects/search"

In [None]:
# make list of dates
# from https://www.geeksforgeeks.org/python/creating-a-list-of-range-of-dates-in-python/
dfdata = { 'award_date': [], 'project_number': [], 'award_amount': [], 'award_type': [] }

start_date = datetime(2021,1,1)
end_date = datetime(2025,7,23)

dates = []
while start_date <= end_date:
    dates.append(start_date)
    start_date += timedelta(days = 1)

In [None]:
for date in dates: 
    date_plus_1 = (date + timedelta(days = 1)).strftime("%m/%d/%Y")
    date = date.strftime("%m/%d/%Y")

# for year in range(2021, 2025): # ie, 1 Nov 2021 - 16 Feb 2025
#     start_date = datetime(year, 11, 1)
#     end_date = datetime((year + 1), 2, 16) 
#     current_date = start_date
    
    # while current_date <= end_date:
    #     # Format the current date as a string in MM/DD/YYYY format
    #     date_str = current_date.strftime("%m/%d/%Y")
    #     date_plus_1 = (current_date + timedelta(days = 1)).strftime("%m/%d/%Y")

    print(f"Querying award date: {date}")
    # Define the query parameters according to the instructions in the document
    params = {
        "criteria":
        {
            "award_notice_date": {
                "from_date": date,
                "to_date": date_plus_1
            },
            "exclude_subprojects": True,
            "advanced_text_search": {
                "operator": "or",
                "search_field": "projecttitle, abstracttext",
                "search_text": "autism" 
            },
        },
        "include_fields": [
            "AwardNoticeDate",
            "ProjectNum",
            "AwardAmount",
            "AwardType"
        ],
        "offset": 0,
        "limit": 500
    }
   
    # Make the API request
    try:
        # see https://docs.python-requests.org/en/latest/user/advanced/#timeouts for timeout param
        response = requests.post(base_url, json=params, timeout = (3.05, 27))
            
    except Exception as e:
        print(f"==> Error: {e}")
        pass
            
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()
 
        # Print the full response for debugging
        # print(f"Response for {date_str}: {data}")
       
        # Extract and print the award details for the current date
        awards = data.get("results", [])
        if awards:
            for award in awards:
                award_amount = award.get("award_amount", 0)
                if award_amount is None:
                    award_amount = 0
                project_num = award.get("project_num", "N/A")
                award_type = award.get("award_type", "N/A")
                award_notice_date = award.get("award_notice_date", "N/A")
                # Convert the award_notice_date to MM/DD/YYYY format
                if award_notice_date != "N/A":
                    award_notice_date = datetime.strptime(award_notice_date, "%Y-%m-%dT%H:%M:%S").strftime("%m/%d/%Y")                    
                
                dfdata['award_date'].append(award_notice_date)
                dfdata['project_number'].append(project_num)
                dfdata['award_amount'].append(award_amount)
                dfdata['award_type'].append(award_type)

        else:
            print(f">> No awards found for {date}")
                
    else:
        print(f"Error: {response.status_code} - {response.text}")
   
    # Move to the next day
    # current_date += timedelta(days=1)
   
    # Wait for 1 second before making the next request
    time.sleep(1)



Querying award date: 01/01/2021
>> No awards found for 01/01/2021
Querying award date: 01/02/2021
>> No awards found for 01/02/2021
Querying award date: 01/03/2021
Querying award date: 01/04/2021
Querying award date: 01/05/2021
Querying award date: 01/06/2021
Querying award date: 01/07/2021
Querying award date: 01/08/2021
Querying award date: 01/09/2021
>> No awards found for 01/09/2021
Querying award date: 01/10/2021
>> No awards found for 01/10/2021
Querying award date: 01/11/2021
Querying award date: 01/12/2021
Querying award date: 01/13/2021
Querying award date: 01/14/2021
Querying award date: 01/15/2021
Querying award date: 01/16/2021
>> No awards found for 01/16/2021
Querying award date: 01/17/2021
Querying award date: 01/18/2021
Querying award date: 01/19/2021
Querying award date: 01/20/2021
Querying award date: 01/21/2021
Querying award date: 01/22/2021
Querying award date: 01/23/2021
Querying award date: 01/24/2021
Querying award date: 01/25/2021
Querying award date: 01/26/202

In [23]:
df = pd.DataFrame(dfdata)

In [24]:
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,01/04/2021,5R01HL149926-02,386250,5
1,01/05/2021,5R01MH113670-04,381250,5
2,01/04/2021,5R01HL149926-02,386250,5
3,01/05/2021,5R01GM125749-04,337107,5
4,01/05/2021,5R01MH113670-04,381250,5
...,...,...,...,...
14669,07/16/2025,5R01NS133430-03,616775,5
14670,07/17/2025,5P40OD010995-22,776328,5
14671,07/16/2025,5R01EY027718-09,457561,5
14672,07/16/2025,5R01NS133430-03,616775,5


In [25]:
# change date format to ISO standard
df.award_date = pd.to_datetime(df.award_date)
df

Unnamed: 0,award_date,project_number,award_amount,award_type
0,2021-01-04,5R01HL149926-02,386250,5
1,2021-01-05,5R01MH113670-04,381250,5
2,2021-01-04,5R01HL149926-02,386250,5
3,2021-01-05,5R01GM125749-04,337107,5
4,2021-01-05,5R01MH113670-04,381250,5
...,...,...,...,...
14669,2025-07-16,5R01NS133430-03,616775,5
14670,2025-07-17,5P40OD010995-22,776328,5
14671,2025-07-16,5R01EY027718-09,457561,5
14672,2025-07-16,5R01NS133430-03,616775,5


In [26]:
# save the data
today = datetime.today().strftime("%Y%m%d")
csv_file = f"~/Documents/Nature/Programming/nih_reporter/data/nih_data-{today}.csv"
df.to_csv(csv_file, index = False)

print(f"CSV file '{csv_file}' created successfully.")

CSV file '~/Documents/Nature/Programming/nih_reporter/data/nih_data-20250723.csv' created successfully.
