# Turtles Dataset

### imports

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

In [None]:
# Load main frame and extract iframe URL
base_url = "https://scrapethissite.com"
main_url = base_url + "/pages/frames/"

main_response = requests.get(main_url)
main_soup = BeautifulSoup(main_response.text, "html.parser")

iframe_src = main_soup.find("iframe")["src"]
iframe_url = base_url + iframe_src

# Get turtle links from iframe
response = requests.get(iframe_url)
soup = BeautifulSoup(response.text, "html.parser")

turtle_links = soup.select('a[href*="family="]')
turtle_data = []

# Visit each detail page and extract name, nickname, description, year, discoverer
for link in turtle_links:
    relative_url = link["href"]
    full_url = base_url + relative_url

    detail_response = requests.get(full_url)
    detail_soup = BeautifulSoup(detail_response.text, "html.parser")

    name = detail_soup.select_one("h3.family-name").get_text(strip=True)
    nickname = detail_soup.select_one("strong.common-name")
    nickname = nickname.get_text(strip=True) if nickname else "Unknown"

    description = detail_soup.select_one("p.lead")
    description = description.get_text(strip=True) if description else "No description"

    # Try to extract year and discoverer from description
    year = "Unknown"
    discoverer = "Unknown"
    if "discovered in" in description:
        parts = description.split("discovered in")
        if len(parts) > 1:
            right = parts[1].strip().split(" by ")
            if len(right) == 2:
                year = right[0].strip()
                discoverer = right[1].strip().rstrip(".")

    turtle_data.append({
        "Name": name,
        "Nickname": nickname,
        "Description": description,
        "Year Discovered": year,
        "Discovered By": discoverer
    })

# Display as DataFrame
df = pd.DataFrame(turtle_data)
display(df)


Unnamed: 0,Name,Nickname,Description,Year Discovered,Discovered By
0,Carettochelyidae,Pig-nosed turtle,TheCarettochelyidaefamily of turtles — more co...,1887,Boulenger
1,Cheloniidae,Sea turtles,TheCheloniidaefamily of turtles — more commonl...,1811,Oppel
2,Chelydridae,Snapping turtles,TheChelydridaefamily of turtles — more commonl...,1831,Gray
3,Dermatemydidae,Central American river turtle,TheDermatemydidaefamily of turtles — more comm...,1870,Gray
4,Dermochelyidae,Leatherback sea turtle,TheDermochelyidaefamily of turtles — more comm...,1843,Fitzinger
5,Emydidae,Pond or water turtles,TheEmydidaefamily of turtles — more commonly k...,1815,Rafinesque
6,Geoemydidae,"Asian river, leaf, roofed or Asian box turtles",TheGeoemydidaefamily of turtles — more commonl...,1868,Theobald
7,Kinosternidae,Mud or musk turtles,TheKinosternidaefamily of turtles — more commo...,1857,Agassiz
8,Platysternidae,Big-headed turtle,ThePlatysternidaefamily of turtles — more comm...,1869,Gray
9,Testudinidae,Tortoises,TheTestudinidaefamily of turtles — more common...,1788,Batsch
