In [21]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Step 1: Get the main page table and parse
base_url = "https://www.tdcj.texas.gov"
table_url = urljoin(base_url, "/death_row/dr_executed_offenders.html")
response = requests.get(table_url, verify=False)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")

# Step 2: Extract the links from the 3rd column and other relevant data
links = []
for row in soup.find_all("tr"):
    cells = row.find_all("td")
    if cells:
        id = cells[0].get_text(strip=True)
        last_name = cells[3].get_text(strip=True)
        first_name = cells[4].get_text(strip=True)
        age = cells[6].get_text(strip=True)
        date = cells[7].get_text(strip=True)
        race = cells[8].get_text(strip=True)

        link_tag = cells[2].find("a")
        slug = link_tag.get("href")

        if "no_last_statement" in slug:
            continue
        if slug and not slug.startswith("/death_row/"):
            slug = "/death_row/" + slug
        if link_tag and slug:
            href = urljoin(base_url, slug)
            links.append([id, last_name, first_name, age, date, race, href])

print(f"Found {len(links)} links.")

Found 493 links.


In [22]:
# Step 3: Visit each link, parse and scrape data
scraped_data = []

for row in links:
    url = row[-1]
    try:
        page_response = requests.get(url, verify=False)
        page_response.raise_for_status()
        page_soup = BeautifulSoup(page_response.text, "html.parser")

        statement = page_soup.find_all("p")[5].get_text(strip=True) if page_soup.find("p") else "No statement found"

        scraped_data.append(row + [statement])
    
    except Exception as e:
        print(f"Failed to scrape {url}: {e}")

print(f"Scraped {len(scraped_data)} rows.")

Failed to scrape https://www.tdcj.texas.gov/death_row/dr_info/bibledannylast.html: list index out of range
Scraped 492 rows.


The above scraping step took about 2 minutes and resulted in only 1 error, due to a page layout issue. A quick check confirms there is no recorded statement for this person, so we can proceed without this row.

Below, the data is recorded as a CSV before proceeding to analysis.

In [24]:
import csv

# Step 4: Save to CSV
csv_filename = "dr_scraped.csv"

with open(csv_filename, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.writer(csvfile)
    # Write header row
    writer.writerow(["Link Text", "Link URL", "Title", "First Paragraph"])
    # Write data rows
    writer.writerows(scraped_data)

print(f"Scraped data saved to '{csv_filename}'")

Scraped data saved to 'dr_scraped.csv'
