In [3]:
# --- Install the requests library (if not already available) ---
!pip install requests --quiet

# --- Import all the Python libraries we need ---
import requests          # to call the SpaceX API
import pandas as pd      # to work with tables
import numpy as np       # for numeric operations
from datetime import datetime

# Make pandas show all columns when we print a dataframe
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


In [4]:
# Base URL for SpaceX API v4
BASE_URL = "https://api.spacexdata.com/v4"

def get_json(endpoint):
    """
    Small helper function:
    - builds the full URL
    - sends a GET request
    - returns JSON data (Python dict / list)
    """
    url = f"{BASE_URL}/{endpoint}"
    response = requests.get(url)
    response.raise_for_status()   # will show error if the request failed
    return response.json()


In [5]:
# 1. All past launches
launches = get_json("launches/past")

# 2. Reference tables (rockets, payloads, launchpads, landpads, cores)
rockets    = {item["id"]: item for item in get_json("rockets")}
payloads   = {item["id"]: item for item in get_json("payloads")}
launchpads = {item["id"]: item for item in get_json("launchpads")}
landpads   = {item["id"]: item for item in get_json("landpads")}
cores      = {item["id"]: item for item in get_json("cores")}

print("Number of launches:", len(launches))


Number of launches: 187


In [6]:
rows = []

for launch in launches:
    # --- Get rocket info and keep only Falcon 9 ---
    rocket_info = rockets.get(launch.get("rocket"), {})
    rocket_name = rocket_info.get("name", "")
    if "Falcon 9" not in rocket_name:
        continue   # skip Falcon 1 and others

    # --- Basic launch info ---
    flight_number = launch.get("flight_number")
    date_utc = launch.get("date_utc")
    if date_utc:
        # keep only date part (YYYY-MM-DD)
        date = date_utc.split("T")[0]
    else:
        date = None

    # --- Launch site information ---
    lp_info = launchpads.get(launch.get("launchpad"), {})
    launch_site = lp_info.get("name")
    longitude   = lp_info.get("longitude")
    latitude    = lp_info.get("latitude")

    # --- Payload info (take the first payload if there are many) ---
    payload_mass = None
    orbit        = None
    customer     = None
    payload_ids = launch.get("payloads") or []
    if len(payload_ids) > 0:
        p_info = payloads.get(payload_ids[0], {})
        payload_mass = p_info.get("mass_kg")
        orbit        = p_info.get("orbit")
        customers    = p_info.get("customers") or []
        customer     = ", ".join(customers) if customers else None

    # --- Core / landing information ---
    core_list = launch.get("cores") or [{}]
    core_part = core_list[0]

    core_obj   = cores.get(core_part.get("core")) or {}
    block      = core_obj.get("block")
    serial     = core_obj.get("serial")
    reuse_count = core_obj.get("reuse_count")

    gridfins  = core_part.get("gridfins")
    legs      = core_part.get("legs")
    reused    = core_part.get("reused")
    flights   = core_part.get("flight")
    landing_attempt = core_part.get("landing_attempt")
    landing_success = core_part.get("landing_success")

    landpad_obj = landpads.get(core_part.get("landpad")) or {}
    landing_pad_name = landpad_obj.get("name")

    # --- Define a simple landing outcome label ---
    if not landing_attempt:
        outcome = "No attempt"
    elif landing_success:
        outcome = "Success"
    else:
        outcome = "Failure"

    # --- Add one row to our table ---
    rows.append({
        "FlightNumber":   flight_number,
        "Date":           date,
        "BoosterVersion": rocket_name,
        "LaunchSite":     launch_site,
        "PayloadMass":    payload_mass,
        "Orbit":          orbit,
        "Customer":       customer,
        "Outcome":        outcome,
        "Flights":        flights,
        "GridFins":       gridfins,
        "Reused":         reused,
        "Legs":           legs,
        "LandingPad":     landing_pad_name,
        "Block":          block,
        "ReusedCount":    reuse_count,
        "Serial":         serial,
        "Longitude":      longitude,
        "Latitude":       latitude,
    })

# Convert the list of rows to a DataFrame
spacex_df = pd.DataFrame(rows)
spacex_df.head()


Unnamed: 0,FlightNumber,Date,BoosterVersion,LaunchSite,PayloadMass,Orbit,Customer,Outcome,Flights,GridFins,Reused,Legs,LandingPad,Block,ReusedCount,Serial,Longitude,Latitude
0,6,2010-06-04,Falcon 9,CCSFS SLC 40,,LEO,SpaceX,No attempt,1,False,False,False,,1,0,B0003,-80.577366,28.561857
1,7,2010-12-08,Falcon 9,CCSFS SLC 40,,LEO,NASA(COTS),No attempt,1,False,False,False,,1,0,B0004,-80.577366,28.561857
2,8,2012-05-22,Falcon 9,CCSFS SLC 40,525.0,LEO,NASA(COTS),No attempt,1,False,False,False,,1,0,B0005,-80.577366,28.561857
3,9,2012-10-08,Falcon 9,CCSFS SLC 40,400.0,ISS,NASA (CRS),No attempt,1,False,False,False,,1,0,B0006,-80.577366,28.561857
4,10,2013-03-01,Falcon 9,CCSFS SLC 40,677.0,ISS,NASA (CRS),No attempt,1,False,False,False,,1,0,B0007,-80.577366,28.561857


In [7]:
# Check how many missing values per column
spacex_df.isna().sum()


Unnamed: 0,0
FlightNumber,0
Date,0
BoosterVersion,0
LaunchSite,0
PayloadMass,23
Orbit,1
Customer,4
Outcome,0
Flights,0
GridFins,0


In [8]:
# Replace NULL (NaN) in PayloadMass with the mean payload mass
mean_payload = spacex_df["PayloadMass"].mean()
spacex_df["PayloadMass"].fillna(mean_payload, inplace=True)

# Quick check
spacex_df["PayloadMass"].isna().sum()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  spacex_df["PayloadMass"].fillna(mean_payload, inplace=True)


np.int64(0)

In [9]:
spacex_df.to_csv("spacex_launch_data.csv", index=False)
print("Saved file: spacex_launch_data.csv")


Saved file: spacex_launch_data.csv
