# **Turtles All the Way Down**

In [None]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Setup headers
headers = {"User-Agent": "Mozilla/5.0"}
main_url = "https://www.scrapethissite.com"
frames_url = "https://www.scrapethissite.com/pages/frames/"

# Step 1: Get iframe URL from main page
r = requests.get(frames_url, headers=headers)
soup = BeautifulSoup(r.text, "html.parser")
iframe_src = soup.find("iframe").get("src")
current_page_url = requests.compat.urljoin(main_url, iframe_src)

# Lists for data
turtles_name = []
all_turtles = []

while True:
    print("Scraping page:", current_page_url)

    try:
        response = requests.get(current_page_url, headers=headers)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching page {current_page_url}: {e}")
        break

    soup = BeautifulSoup(response.text, "html.parser")

    # Grab all turtle families
    turtle_cards = soup.find_all("div", class_="col-md-4")
    if not turtle_cards:
        print("No turtles found!")
        break

    for card in turtle_cards:
        family_name = card.find("h3", class_="family-name").get_text(strip=True)
        turtles_name.append({"Turtle_name": family_name})
        print(f"Turtle Family: {family_name}")

    # Follow "Learn More" buttons for details
    learn_more_btns = soup.find_all("a", class_="btn")
    if learn_more_btns:
        for btn in learn_more_btns:
            learn_more_url = requests.compat.urljoin(current_page_url, btn["href"])
            r = requests.get(learn_more_url, headers=headers)
            s = BeautifulSoup(r.text, "html.parser")

            detail_cards = s.find_all("div", class_="col-md-6")
            for d in detail_cards:
                # Extract species, year, discoverer safely
                p_tag = d.find("p")
                if not p_tag or not p_tag.strong:
                    continue

                species = p_tag.strong.get_text(strip=True)

                # Text cleanup
                text_parts = p_tag.get_text(" ", strip=True).split()
                year_found = next((x for x in text_parts if x.isdigit()), "Unknown")
                turtle_discover = text_parts[-1] if len(text_parts) > 1 else "Unknown"

                print(f"Species: {species}, Year: {year_found}, Discoverer: {turtle_discover}")

                all_turtles.append({
                    "species": species,
                    "year_found": year_found,
                    "turtle_discover": turtle_discover
                })
    else:
        print("No more 'Learn More' buttons found!")
        break

# Convert to DataFrames
df_names = pd.DataFrame(turtles_name)
df_details = pd.DataFrame(all_turtles)

print("\n--- Turtle Families ---")
print(df_names)
print("\n--- Turtle Details ---")
print(df_details)


Scraping page: https://www.scrapethissite.com/pages/frames/?frame=i
Turtle Family: Carettochelyidae
Turtle Family: Cheloniidae
Turtle Family: Chelydridae
Turtle Family: Dermatemydidae
Turtle Family: Dermochelyidae
Turtle Family: Emydidae
Turtle Family: Geoemydidae
Turtle Family: Kinosternidae
Turtle Family: Platysternidae
Turtle Family: Testudinidae
Turtle Family: Trionychidae
Turtle Family: Chelidae
Turtle Family: Pelomedusidae
Turtle Family: Podocnemididae
Species: Pig-nosed turtle, Year: 1887, Discoverer: Boulenger.
Species: Sea turtles, Year: 1811, Discoverer: Oppel.
Species: Snapping turtles, Year: 1831, Discoverer: Gray.
Species: Central American river turtle, Year: 1870, Discoverer: Gray.
Species: Leatherback sea turtle, Year: 1843, Discoverer: Fitzinger.
Species: Pond or water turtles, Year: 1815, Discoverer: Rafinesque.
Species: Asian river, leaf, roofed or Asian box turtles, Year: 1868, Discoverer: Theobald.
Species: Mud or musk turtles, Year: 1857, Discoverer: Agassiz.
Speci