In [1]:
# If any of these fail with "ModuleNotFoundError", 
# uncomment the pip install lines below and run them once.

# !pip install requests beautifulsoup4 pandas tqdm

import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm.notebook import tqdm  # nice progress bars in Jupyter

BASE_URL = "https://nij.ojp.gov"
LIST_URL = BASE_URL + "/funding/awards/list"


In [2]:
def fetch_page_html(page: int = 0):
    params = {"page": page}
    response = requests.get(LIST_URL, params=params)
    print(f"Requested URL: {response.url}")
    response.raise_for_status()  # will raise an error if something went wrong
    return response.text

html_page0 = fetch_page_html(page=0)

# Show a middle slice of the HTML text (sometimes more informative than the very beginning)
start = len(html_page0) // 2 - 500
end = len(html_page0) // 2 + 500
print(html_page0[start:end])


Requested URL: https://nij.ojp.gov/funding/awards/list?page=0
eld-field-fiscal-year is-active">2024          </td>
                                                                                        <td headers="view-title-table-column" class="views-field views-field-title"><a href="/funding/awards/15pnij-24-gg-05448-nijb" hreflang="en">A Pilot Study: Behavioral Response and Community Engagement (BRACE) Team</a>          </td>
                                                                                        <td headers="view-field-funding-opportunity-table-column" class="views-field views-field-field-funding-opportunity"><a href="/funding/opportunities/o-nij-2024-172070" hreflang="en">NIJ FY24 Field-Initiated Action Research Partnerships</a>          </td>
                                                                                        <td headers="view-field-awardee-table-column" class="views-field views-field-field-awardee">UNIVERSITY OF SOUTH FLORIDA          </td>


In [3]:
from bs4 import BeautifulSoup
from urllib.parse import urljoin

def parse_awards_from_html(html: str):
    soup = BeautifulSoup(html, "html.parser")
    
    # Find the awards table
    table = soup.find("table", class_="usa-table")
    if table is None:
        raise ValueError("Could not find awards table on the page.")
    
    tbody = table.find("tbody")
    if tbody is None:
        raise ValueError("Could not find <tbody> inside the awards table.")
    
    rows = []
    
    for tr in tbody.find_all("tr"):
        tds = tr.find_all("td")
        if len(tds) != 8:
            # Skip any weird rows that don't match the expected 8 columns
            continue
        
        # 1. FY
        fy = tds[0].get_text(strip=True)
        
        # 2‚Äì3. Title + Title URL
        title_a = tds[1].find("a")
        if title_a:
            title = title_a.get_text(strip=True)
            title_url = urljoin(BASE_URL, title_a.get("href", ""))
        else:
            title = tds[1].get_text(strip=True)
            title_url = None
        
        # 4‚Äì5. Original Solicitation + URL
        sol_a = tds[2].find("a")
        if sol_a:
            solicitation = sol_a.get_text(strip=True)
            solicitation_url = urljoin(BASE_URL, sol_a.get("href", ""))
        else:
            solicitation = tds[2].get_text(strip=True)
            solicitation_url = None
        
        # 6. Recipient
        recipient = tds[3].get_text(strip=True)
        
        # 7. State
        state = tds[4].get_text(strip=True)
        
        # 8. Award Number
        award_number = tds[5].get_text(strip=True)
        
        # 9. Amount (string for now; we can clean later if you want)
        amount = tds[6].get_text(strip=True)
        
        # 10. Status
        status = tds[7].get_text(strip=True)
        
        rows.append(
            {
                "fy": fy,
                "title": title,
                "title_url": title_url,
                "original_solicitation": solicitation,
                "original_solicitation_url": solicitation_url,
                "recipient": recipient,
                "state": state,
                "award_number": award_number,
                "amount": amount,
                "status": status,
            }
        )
    
    return rows

# Parse page 0 that we already downloaded
rows_page0 = parse_awards_from_html(html_page0)
print(f"Number of awards found on page 0: {len(rows_page0)}")

# Preview first few as a DataFrame
df_page0 = pd.DataFrame(rows_page0)
df_page0.head()


Number of awards found on page 0: 25


Unnamed: 0,fy,title,title_url,original_solicitation,original_solicitation_url,recipient,state,award_number,amount,status
0,2024,A Combined Molecular Analysis of Pollen Utiliz...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Graduate Research Fellowship,https://nij.ojp.gov/funding/opportunities/o-ni...,THE UNIVERSITY OF CENTRAL FLORIDA BOARD OF TRU...,FL,15PNIJ-24-GG-01571-RESS,"$120,000",Open
1,2024,A Comparative Evaluation of the MinION and MiS...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Development in Forensic ...,https://nij.ojp.gov/funding/opportunities/o-ni...,FLORIDA INTERNATIONAL UNIVERSITY,FL,15PNIJ-24-GG-03842-SLFO,"$487,790",Open
2,2024,A Comprehensive Evaluation and Error Rate Asse...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Development in Forensic ...,https://nij.ojp.gov/funding/opportunities/o-ni...,RESEARCH TRIANGLE INSTITUTE,NC,15PNIJ-24-GG-03851-SLFO,"$499,902",Open
3,2024,A Focused Deterrence Program for Juveniles wit...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Evaluation on Youth Just...,https://nij.ojp.gov/funding/opportunities/o-ni...,FLORIDA STATE UNIVERSITY,FL,15PNIJ-24-GG-03092-MUMU,"$1,255,241",Open
4,2024,A Multi-District Analysis of School Safety in ...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Evaluation on School Safety,https://nij.ojp.gov/funding/opportunities/o-ni...,RESEARCH TRIANGLE INSTITUTE,NC,15PNIJ-24-GK-01051-STOP,"$933,524",Open


In [4]:
all_rows = []

num_pages = 10  # we decided: first 10 pages (0 through 9)

print(f"Starting scrape of {num_pages} pages (0‚Äì{num_pages-1})...\n")

for page in tqdm(range(num_pages), desc="Scraping pages"):
    try:
        print(f"\nüìÑ Fetching page {page} ...")
        html = fetch_page_html(page=page)
        page_rows = parse_awards_from_html(html)
        print(f"‚úÖ Page {page}: found {len(page_rows)} awards.")
        all_rows.extend(page_rows)
    except Exception as e:
        print(f"‚ö†Ô∏è Problem on page {page}: {e}")

print("\n‚úÖ Done scraping pages!")

df_all = pd.DataFrame(all_rows)
print(f"\nTotal awards collected: {len(df_all)}")
df_all.head()


Starting scrape of 10 pages (0‚Äì9)...



Scraping pages:   0%|          | 0/10 [00:00<?, ?it/s]


üìÑ Fetching page 0 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=0
‚úÖ Page 0: found 25 awards.

üìÑ Fetching page 1 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=1
‚úÖ Page 1: found 25 awards.

üìÑ Fetching page 2 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=2
‚úÖ Page 2: found 25 awards.

üìÑ Fetching page 3 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=3
‚úÖ Page 3: found 25 awards.

üìÑ Fetching page 4 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=4
‚úÖ Page 4: found 25 awards.

üìÑ Fetching page 5 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=5
‚úÖ Page 5: found 25 awards.

üìÑ Fetching page 6 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=6
‚úÖ Page 6: found 25 awards.

üìÑ Fetching page 7 ...
Requested URL: https://nij.ojp.gov/funding/awards/list?page=7
‚úÖ Page 7: found 25 awards.

üìÑ Fetching page 8 ...
Requested URL: https://nij.ojp.gov/fun

Unnamed: 0,fy,title,title_url,original_solicitation,original_solicitation_url,recipient,state,award_number,amount,status
0,2024,A Combined Molecular Analysis of Pollen Utiliz...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Graduate Research Fellowship,https://nij.ojp.gov/funding/opportunities/o-ni...,THE UNIVERSITY OF CENTRAL FLORIDA BOARD OF TRU...,FL,15PNIJ-24-GG-01571-RESS,"$120,000",Open
1,2024,A Comparative Evaluation of the MinION and MiS...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Development in Forensic ...,https://nij.ojp.gov/funding/opportunities/o-ni...,FLORIDA INTERNATIONAL UNIVERSITY,FL,15PNIJ-24-GG-03842-SLFO,"$487,790",Open
2,2024,A Comprehensive Evaluation and Error Rate Asse...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Development in Forensic ...,https://nij.ojp.gov/funding/opportunities/o-ni...,RESEARCH TRIANGLE INSTITUTE,NC,15PNIJ-24-GG-03851-SLFO,"$499,902",Open
3,2024,A Focused Deterrence Program for Juveniles wit...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Evaluation on Youth Just...,https://nij.ojp.gov/funding/opportunities/o-ni...,FLORIDA STATE UNIVERSITY,FL,15PNIJ-24-GG-03092-MUMU,"$1,255,241",Open
4,2024,A Multi-District Analysis of School Safety in ...,https://nij.ojp.gov/funding/awards/15pnij-24-g...,NIJ FY24 Research and Evaluation on School Safety,https://nij.ojp.gov/funding/opportunities/o-ni...,RESEARCH TRIANGLE INSTITUTE,NC,15PNIJ-24-GK-01051-STOP,"$933,524",Open


In [5]:
# Preview more rows just to sanity-check the data
print("Data preview (first 20 rows):")
df_all.head(20)

# Save to CSV
output_path = "nij_awards_first_10_pages.csv"
df_all.to_csv(output_path, index=False)

print(f"\n‚úÖ CSV saved as: {output_path}")


Data preview (first 20 rows):

‚úÖ CSV saved as: nij_awards_first_10_pages.csv
