In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://asn.flightsafety.org/asndb/types/DB"

headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/114.0.0.0 Safari/537.36"
    )
}

response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")

# Locate the table
table = soup.find("table", {"id": "myTable"})

data = []

# Parse table rows
for row in table.find("tbody").find_all("tr"):
    cols = row.find_all("td")
    if len(cols) >= 2:
        data.append({
            "Aircraft Type": cols[0].text.strip(),
            "Year": cols[1].text.strip(),
            "Description": cols[2].text.strip(),
        })

# Save to CSV
df = pd.DataFrame(data)

print("✅ Data scraped")


✅ Data scraped and saved to 'aircraft_type_accidents.csv'


In [24]:
# Extract only the first 10 aircraft type links from the table
aircraft_links = []
for row in table.find("tbody").find_all("tr"):
    link_tag = row.find("a", href=True)
    if link_tag:
        aircraft_links.append("https://asn.flightsafety.org" + link_tag['href'])

# Fetch and parse tables from each aircraft type link
aircraft_tables = []
for link in aircraft_links:
    resp = requests.get(link, headers=headers)
    soup_link = BeautifulSoup(resp.text, "html.parser")
    aircraft_table = soup_link.find("table")
    if aircraft_table:
        rows = []
        for tr in aircraft_table.find_all("tr"):
            cols = [td.text.strip() for td in tr.find_all("td")]
            rows.append(cols)
        aircraft_tables.append({
            "url": link,
            "rows": rows
        })

print(f"✅ Fetched {len(aircraft_tables)} aircraft tables")

✅ Fetched 472 aircraft tables


In [78]:
aircraft_tables_df = pd.DataFrame(aircraft_tables)


In [79]:
aircraft_tables_df.rows[0]

[['',
  'Aero Commander 1121 Jet Commander\r\xa0\r\xa0First flight: 1963\r\xa02 Jet engines'],
 []]

In [82]:
for r in range(len(aircraft_tables_df)):
    aircraft_tables_df['Aircraft Name'][r] = aircraft_tables_df.rows[r][0][1].split("\r")[0]
    aircraft_tables_df['First Flight'][r] = aircraft_tables_df.rows[r][0][1].split("\r")[2].replace("First flight: ", "")
    aircraft_tables_df['Aircraft Engines'][r] = aircraft_tables_df.rows[r][0][1].split("\r")[3]
    

You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  aircraft_tables_df['Aircraft Name'][r] = aircraft_tables_df.rows[r][0][1].split("\r")[0]
You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this wi

In [83]:
aircraft_tables_df

Unnamed: 0,url,rows,Aircraft Name,First Flight,Aircraft Engines
0,https://asn.flightsafety.org/database/type/JCOM,"[[, Aero Commander 1121 Jet Commander\r \r Fir...",Aero Commander 1121 Jet Commander,1963,2 Jet engines
1,https://asn.flightsafety.org/database/type/gupt,"[[, Aero Spacelines Mini Guppy Turbine\r \r Fi...",Aero Spacelines Mini Guppy Turbine,1970,4 Turboprop engines
2,https://asn.flightsafety.org/database/type/S601,"[[, Aérospatiale SN.601 Corvette\r corporate j...",Aérospatiale SN.601 Corvette,1970,2 Jet engines
3,https://asn.flightsafety.org/database/type/_A220,"[[, Airbus A220\r A220-100, A220-300\r \r 2 Je...",Airbus A220,,2 Jet engines
4,https://asn.flightsafety.org/database/type/BCS1,"[[, Airbus A220-100\r narrow-body jet airliner...",Airbus A220-100,2013,2 Jet engines
...,...,...,...,...,...
467,https://asn.flightsafety.org/database/type/VISC,"[[, Vickers Viscount\r \r First flight: 1948\r...",Vickers Viscount,1948,4 Turboprop engines
468,https://asn.flightsafety.org/database/type/B18T,"[[, Volpar Turboliner\r \r First flight: 1967\...",Volpar Turboliner,1967,2 Turboprop engines
469,https://asn.flightsafety.org/database/type/MA60,"[[, Xian MA-60\r \r First flight: 2000\r 2 Tur...",Xian MA-60,2000,2 Turboprop engines
470,https://asn.flightsafety.org/database/type/YK40,"[[, Yakovlev Yak-40\r three-turbofan short-hau...",Yakovlev Yak-40,1966,3 Jet engines


In [85]:
aircraft_tables_df.drop(columns=['rows'], inplace=True)