In [1]:
import requests
from datetime import datetime, timedelta
import time
import pandas as pd

In [2]:
base_url = "https://api.reporter.nih.gov/v2/projects/search"

In [4]:
dfdata = { 'award_date': [], 'project_number': [], 'award_amount': [], 'award_type': [], "project_url": [], 'keyword': [] }

# for year in range(2021, 2024): 
for term in [ "diversity", "LGBTQ" ]:
    # start_date = datetime(year, 11, 1)
    # end_date = datetime((year + 1), 2, 16) 
    # end_date = datetime(year, 12, 1)
    start_date = datetime(2024, 1, 1)
    end_date = datetime(2024,1, 31)
    # current_date = start_date
    
    # while current_date <= end_date:
        # Format the current date as a string in MM/DD/YYYY format
        # date_str = current_date.strftime("%m/%d/%Y")
        # date_plus_1 = (current_date + timedelta(days = 1)).strftime("%m/%d/%Y")

    print(f"Querying search term: {term}")
    # Define the query parameters according to the instructions in the document
    params = {
        "criteria":
        {
            "award_notice_date": {
                "from_date": start_date.strftime("%m/%d/%Y"),
                "to_date": end_date.strftime("%m/%d/%Y")
                },
            "exclude_subprojects": True,
            "advanced_text_search": { 
                "operator": "and", 
                "search_field": "all", 
                "search_text": term
                } 
            },
        "include_fields": [
            "AwardNoticeDate",
            "ProjectNum",
            "AwardAmount",
            "AwardType",
            "ProjectDetailUrl"
            ],
        "offset": 0,
        "limit": 500
        }
   
    # Make the API request
    try:
        # see https://docs.python-requests.org/en/latest/user/advanced/#timeouts for timeout param
        response = requests.post(base_url, json=params, timeout = (3.05, 27))
        
    except Exception as e:
        print(f"==> Error: {e}")
        pass
        
    # Check if the request was successful
    if response.status_code == 200:
        data = response.json()

        # Print the full response for debugging
        # print(f"Response for {date_str}: {data}")
    
        # Extract and print the award details for the current date
        awards = data.get("results", [])
        if awards:
            for award in awards:
                award_amount = award.get("award_amount", 0)
                if award_amount is None:
                    award_amount = 0
                project_num = award.get("project_num", "N/A")
                award_type = award.get("award_type", "N/A")
                award_url = award.get("project_detail_url", "N/A")
                award_notice_date = award.get("award_notice_date", "N/A")
                # Convert the award_notice_date to MM/DD/YYYY format
                if award_notice_date != "N/A":
                    award_notice_date = datetime.strptime(award_notice_date, "%Y-%m-%dT%H:%M:%S").strftime("%m/%d/%Y")
                
                dfdata['award_date'].append(award_notice_date)
                dfdata['project_number'].append(project_num)
                dfdata['award_amount'].append(award_amount)
                dfdata['award_type'].append(award_type)
                dfdata['project_url'].append(award_url)
                dfdata['keyword'].append(term)
        else:
            print(f">> No awards found for term {term}")
            
    else:
        print(f"Error: {response.status_code} - {response.text}")

    # Move to the next day
    # current_date += timedelta(days=1)

    # Wait for 1 second before making the next request
    time.sleep(1)



Querying search term: diversity
Querying search term: LGBTQ


In [5]:
df = pd.DataFrame(dfdata)

In [6]:
df

Unnamed: 0,award_date,project_number,award_amount,award_type,project_url,keyword
0,01/10/2024,5G13LM014176-02,49762,5,https://reporter.nih.gov/project-details/10757924,diversity
1,01/01/2024,1T32HL172255-01,309555,1,https://reporter.nih.gov/project-details/10809399,diversity
2,01/24/2024,5R24OD024617-07,779278,5,https://reporter.nih.gov/project-details/10764712,diversity
3,01/19/2024,5R25HL103156-14,93528,5,https://reporter.nih.gov/project-details/10771961,diversity
4,01/26/2024,1R13OD036186-01,5000,1,https://reporter.nih.gov/project-details/10826020,diversity
...,...,...,...,...,...,...
414,01/25/2024,5R01MH121194-04,642346,5,https://reporter.nih.gov/project-details/10774201,LGBTQ
415,01/08/2024,5R13EY030356-05,60000,5,https://reporter.nih.gov/project-details/10757386,LGBTQ
416,01/26/2024,5R21HD108052-02,235128,5,https://reporter.nih.gov/project-details/10752716,LGBTQ
417,01/26/2024,5P30MH133399-02,2169482,5,https://reporter.nih.gov/project-details/10817201,LGBTQ


In [7]:
# change date format to ISO standard
df.award_date = pd.to_datetime(df.award_date)
df

Unnamed: 0,award_date,project_number,award_amount,award_type,project_url,keyword
0,2024-01-10,5G13LM014176-02,49762,5,https://reporter.nih.gov/project-details/10757924,diversity
1,2024-01-01,1T32HL172255-01,309555,1,https://reporter.nih.gov/project-details/10809399,diversity
2,2024-01-24,5R24OD024617-07,779278,5,https://reporter.nih.gov/project-details/10764712,diversity
3,2024-01-19,5R25HL103156-14,93528,5,https://reporter.nih.gov/project-details/10771961,diversity
4,2024-01-26,1R13OD036186-01,5000,1,https://reporter.nih.gov/project-details/10826020,diversity
...,...,...,...,...,...,...
414,2024-01-25,5R01MH121194-04,642346,5,https://reporter.nih.gov/project-details/10774201,LGBTQ
415,2024-01-08,5R13EY030356-05,60000,5,https://reporter.nih.gov/project-details/10757386,LGBTQ
416,2024-01-26,5R21HD108052-02,235128,5,https://reporter.nih.gov/project-details/10752716,LGBTQ
417,2024-01-26,5P30MH133399-02,2169482,5,https://reporter.nih.gov/project-details/10817201,LGBTQ


In [8]:
# save the data
today = datetime.today().strftime("%Y%m%d")
csv_file = f"~/Documents/Nature/Programming/nih_reporter/data/nih_data-{today}.csv"
df.to_csv(csv_file, index = False)

print(f"CSV file '{csv_file}' created successfully.")

CSV file '~/Documents/Nature/Programming/nih_reporter/data/nih_data-20250429.csv' created successfully.
