In [1]:
pip install requests

Note: you may need to restart the kernel to use updated packages.


In [120]:
# pull data from NIH RePORTER API

import requests
import json

# Define the endpoint and headers
url = "https://api.reporter.nih.gov/v2/projects/search"
headers = {"Content-Type": "application/json"}

# Customize the search parameters
payload = {
    "criteria": {
        "org_names": ["YALE UNIVERSITY"], 
        "fiscal_years": [2024,2025], 
        "award_notice_date":{ "from_date":"2024-01-20", "to_date":"2024-04-22" }, 
        "award_notice_date":{ "from_date":"2025-01-20", "to_date":"2025-04-22" },
        "award_notice_date":{ "from_date":"2024-01-20", "to_date":"2024-04-22" }, 

    },
    "includeFields": [
        "project_title", "contact_pi_name", "award_amount", "fiscal_year", "principal_investigators",
        "organization_name", "project_num", "agency", "project_start_date", "project_end_date", "organization", 
        "organization_type", "award_notice_date"
        
    ],
    "offset": 0,
    "limit": 100

}

# Prepare an empty list to store all project data
all_projects = []

# Send requests and handle pagination
while True:
    # Send the request to the API
    response = requests.post(url, headers=headers, json=payload)

    # Check if the response was successful
    if response.status_code == 200:
        data = response.json()
        projects = data['results']

        # Append projects to the all_projects list
        for project in projects:
            title = project.get("project_title", "No title")
            pi_list = project.get("principal_investigators", [])
            pi_name = pi_list[0]["full_name"] if pi_list else "N/A"
            amount = project.get("award_amount", "N/A")
            year = project.get("fiscal_year", "N/A")
            project_start_date = project.get("project_start_date", "N/A")
            project_end_date = project.get("project_end_date", "N/A")
            agency_list = project.get("agency_ic_fundings", "N/A")
            agency = agency_list[0]["abbreviation"] if agency_list else "N/A"
            org_type_list = project.get("organization_type", "N/A")
            org_type = org_type_list["name"] if org_type_list else "N/A"
            award_date = project.get("award_notice_date", "N/A")
            
            # Add the extracted data to the list
            all_projects.append({
                'Project Title': title,
                'Principal Investigator': pi_name,
                'Award Amount': amount,
                'Fiscal Year': year,
                'Project Start Date': project_start_date,
                'Project End Date': project_end_date,
                'Award Date': award_date,
                'Agency': agency,
                'Org Type': org_type
            })

        # Check if there are more results to fetch (next page)
        if len(projects) < payload['limit']:
            break  # If fewer results than the limit, we've reached the last page

        # Update the offset for the next page
        payload['offset'] += payload['limit']
    else:
        print("Failed to retrieve data:", response.status_code)
        print("Error message:", response.json())

        break


In [117]:
print(all_projects)

[{'Project Title': 'Mitochondrial Mechanisms Promoting Innate and Intestinal Immunity', 'Principal Investigator': 'CLARA  ABRAHAM', 'Award Amount': 454986, 'Fiscal Year': 2025, 'Project Start Date': '2023-03-15T00:00:00', 'Project End Date': '2027-01-31T00:00:00', 'Award Date': '2025-02-20T00:00:00', 'Agency': 'NIDDK', 'Org Type': 'SCHOOLS OF MEDICINE'}, {'Project Title': 'Dual-isotope SPECT imaging and immunophenotyping of immune cells to determine response to immunotherapy', 'Principal Investigator': 'Harriet M. Kluger', 'Award Amount': 618441, 'Fiscal Year': 2025, 'Project Start Date': '2023-02-07T00:00:00', 'Project End Date': '2028-01-31T00:00:00', 'Award Date': '2025-01-22T00:00:00', 'Agency': 'NCI', 'Org Type': 'SCHOOLS OF MEDICINE'}, {'Project Title': 'Yale-METRO Metropolitan Emergency Trial netwoRK to advance patient Outcomes', 'Principal Investigator': "Gail  D'Onofrio", 'Award Amount': 371900, 'Fiscal Year': 2025, 'Project Start Date': '2023-02-01T00:00:00', 'Project End Dat

In [122]:
import csv 

# Specify the file path and name
csv_file_path = 'nih_grants_2024_2025_all.csv'

# Define the fieldnames (these are the column headers in the CSV file)
fieldnames = [
    'Project Title', 'Principal Investigator', 'Award Amount', 'Fiscal Year',
    'Project Start Date', 'Project End Date', 'Award Date', 'Agency', 'Org Type'
]

# Open the CSV file in write mode
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    # Create a CSV DictWriter to write data in key-value pairs
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    # Write the header (fieldnames are used as column headers)
    writer.writeheader()
    
    # Write the rows (the extracted project data)
    writer.writerows(all_projects)

print(f"Data saved to {csv_file_path}")

Data saved to nih_grants_2024_2025_all.csv
