In [53]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import unicodedata

def extract_column_from_header(row):
    if row.br: row.br.extract()
    if row.a: row.a.extract()
    if row.sup: row.sup.extract()
    return ' '.join(row.contents).strip()

def date_time(table_cells):
    return [data_time.strip() for data_time in list(table_cells.strings)][0:2]

def booster_version(table_cells):
    return ''.join([bv for i, bv in enumerate(table_cells.strings) if i % 2 == 0][:-1])

def landing_status(table_cells):
    return next(table_cells.strings, None)

def get_mass(table_cells):
    mass = unicodedata.normalize("NFKD", table_cells.text).strip()
    return mass[:mass.find("kg")+2] if "kg" in mass else 0

static_url = "https://en.wikipedia.org/w/index.php?title=List_of_Falcon_9_and_Falcon_Heavy_launches&oldid=1027686922"
response = requests.get(static_url)
soup = BeautifulSoup(response.text, 'html.parser')
html_tables = soup.find_all('table')
first_launch_table = html_tables[2]
column_names = [extract_column_from_header(th) for th in first_launch_table.find_all('th') if extract_column_from_header(th)]
launch_dict = {col: [] for col in column_names}
launch_dict['Time'] = []

for table in soup.find_all('table', "wikitable plainrowheaders collapsible"):
    for rows in table.find_all("tr"):
        row = rows.find_all('td')
        if row:
            flight_number = rows.th.string.strip() if rows.th and rows.th.string and rows.th.string.isdigit() else None
            if flight_number:
                datatimelist = date_time(row[0])
                launch_dict['Flight No.'].append(flight_number)
                launch_dict['Date and time ( )'].append(datatimelist[0].strip(','))
                launch_dict['Time'].append(datatimelist[1])
                launch_dict['Version Booster'].append(booster_version(row[1]))
                launch_dict['Launch site'].append(row[2].a.string if row[2].a else '')
                launch_dict['Payload'].append(row[3].a.string if row[3].a else '')
                launch_dict['Payload mass'].append(get_mass(row[4]))
                launch_dict['Orbit'].append(row[5].a.string if row[5].a else '')
                launch_dict['Customer'].append(row[6].a.string if row[6].a else 'Not Available')
                launch_dict['Launch outcome'].append(next(row[7].strings, '').strip())
                launch_dict['Booster landing'].append(landing_status(row[8]))

df = pd.DataFrame(launch_dict)
csv_filename = 'spacex_web_scraped.csv'
df.to_csv(csv_filename, index=False)

df = pd.read_csv(csv_filename)
df['Date'] = pd.to_datetime(df['Date and time ( )'], errors='coerce')
year_first_row = df['Date'].dt.year.iloc[0] if not df.empty else "DataFrame is empty"
falcon_9_count = df[df['Version Booster'].str.contains('F9')].shape[0] if 'Version Booster' in df.columns else 0
missing_landing_pads = df['Booster landing'].isnull().sum() if 'Booster landing' in df.columns else 0
soup_title = soup.title.string if soup.title else 'No title found'

print(f"Year in the first row: {year_first_row}")
print(f"Count of Falcon 9 launches: {falcon_9_count}")
print(f"Number of missing landingPad values: {missing_landing_pads}")
print(f"Title of the page: {soup_title}")


# Request data from SpaceX API
response = requests.get("https://api.spacexdata.com/v3/launches")
launches = response.json()

df = pd.json_normalize(launches)

# Extracting year from the first row of the 'static_fire_date_utc' column
df['static_fire_date_utc'] = pd.to_datetime(df['static_fire_date_utc'], errors='coerce')
year_first_row = df['static_fire_date_utc'].dt.year.iloc[0] if not df.empty else "DataFrame is empty"
print(f"Year in the first row: {year_first_row}")
response = requests.get("https://api.spacexdata.com/v3/launches")
launches = response.json()
df = pd.json_normalize(launches)

# Filter out Falcon 1 launches and count Falcon 9 launches
falcon_9_launches = df[df['rocket.rocket_id'] == 'falcon9']
count_falcon_9 = falcon_9_launches.shape[0]
print(f"Number of Falcon 9 launches: {count_falcon_9}")

# Request data from SpaceX API
response = requests.get("https://api.spacexdata.com/v3/launches")
launches = response.json()
df = pd.json_normalize(launches)
missing_landing_pads = df['rocket.first_stage.cores'].apply(lambda cores: all(core['landing_vehicle'] is None for core in cores)).sum()
print(f"Number of missing landingPad values: {missing_landing_pads}")


Year in the first row: DataFrame is empty
Count of Falcon 9 launches: 0
Number of missing landingPad values: 0
Title of the page: List of Falcon 9 and Falcon Heavy launches - Wikipedia
Year in the first row: 2006.0
Number of Falcon 9 launches: 103
Number of missing landingPad values: 38
