# Download the Full-Text Funding Opportunity Documents for the RADx-rad Program
This notebook downloads and parses the core Funding Opportunity Announcement (FOA) for the RADx-rad program.

**Author:** Peter W. Rose ([pwrose@ucsd.edu](mailto:pwrose@ucsd.edu))  
**Date:** 2025-03-13

In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
# Input files
FOA_DIR = "../data/funding_opportunities.csv" # List of RADx-rad core funding opportunities
# Output file directory
DERIVED_DATA = "../derived_data"

## Load the list of FOAs

In [3]:
foa_info = pd.read_csv(FOA_DIR)
foa_info

Unnamed: 0,id,name,url,sub_project
0,RFA-OD-20-023,Emergency Awards: RADx-rad Predicting Viral-As...,https://grants.nih.gov/grants/guide/rfa-files/...,PreVAIL kIds
1,RFA-OD-20-022,Emergency Awards: Chemosensory Testing as a CO...,https://grants.nih.gov/grants/guide/rfa-files/...,Chemosensory Testing
2,RFA-OD-20-016,Emergency Awards: RADx-RAD Multimodal COVID-19...,https://grants.nih.gov/grants/guide/rfa-files/...,Multimodal Surveillance
3,RFA-OD-20-015,Emergency Awards: RADx-rad Wastewater Detectio...,https://grants.nih.gov/grants/guide/rfa-files/...,Wastewater
4,RFA-OD-20-014,Emergency Awards: Automatic Detection and Trac...,https://grants.nih.gov/grants/guide/rfa-files/...,Automatic Detection & Tracing
5,RFA-OD-20-018,Emergency Awards: Exosome-based Non-traditiona...,https://grants.nih.gov/grants/guide/rfa-files/...,Exosome
6,RFA-OD-20-017,Emergency Awards RADx-RAD: Screening for COVID...,https://grants.nih.gov/grants/guide/rfa-files/...,SCENT
7,RFA-OD-20-021,Emergency Awards RADx-RAD: Novel Biosensing fo...,https://grants.nih.gov/grants/guide/rfa-files/...,Novel Biosensing and VOC


## Download, parse, and save each FOA in a separate file

In [4]:
def load_document_from_url(url):
    response = requests.get(url)
    html_content = response.text

    # Parse the HTML document
    soup = BeautifulSoup(html_content, 'html.parser')
    # Extract all paragraphs
    paragraphs = [p.get_text() for p in soup.find_all('p')]
    return "/n".join(paragraphs)

In [5]:
def get_foa_full_text(row): 
    url = row["url"]
    identifier = row["id"]
    sub_project = row["sub_project"]

    print(f"parsing: {identifier}")

    result_path = os.path.join(DERIVED_DATA, "foas")
    os.makedirs(result_path, exist_ok=True)

    result_file = os.path.join(result_path, f"{sub_project}.csv")
    if os.path.isfile(result_file):
        return
        
    text = load_document_from_url(url)
    # Remove colons, they sometimes mislead the LLM
    text = text.replace(":", " ")

    result_data = {
        'summary': [text],
    }
    pub_data = row.to_dict()
    merged_data = pub_data | result_data

    # Create the DataFrame
    df = pd.DataFrame(merged_data)
    df.to_csv(result_file, index=False)

    print(f"Saving {sub_project} FOA to: {result_file}")
    return

In [6]:
_ = foa_info.apply(get_foa_full_text, axis=1)

parsing: RFA-OD-20-023
parsing: RFA-OD-20-022
parsing: RFA-OD-20-016
parsing: RFA-OD-20-015
parsing: RFA-OD-20-014
parsing: RFA-OD-20-018
parsing: RFA-OD-20-017
parsing: RFA-OD-20-021
