In [1]:
# SpaceX Falcon 9 Data Collection via API
# IBM Data Science Capstone Project

import requests
import pandas as pd
import numpy as np
from datetime import datetime
import json

In [2]:
# Define SpaceX API endpoints
spacex_url = "https://api.spacexdata.com/v4/launches"
rockets_url = "https://api.spacexdata.com/v4/rockets"
launchpads_url = "https://api.spacexdata.com/v4/launchpads"
payloads_url = "https://api.spacexdata.com/v4/payloads"
cores_url = "https://api.spacexdata.com/v4/cores"

In [3]:
# Function to get data from SpaceX API
def get_spacex_data(url):
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error: {response.status_code}")
        return None

# Fetch all data
print("Fetching launches data...")
launches = get_spacex_data(spacex_url)
print(f"Total launches: {len(launches)}")

print("\nFetching rockets data...")
rockets = get_spacex_data(rockets_url)

print("Fetching launchpads data...")
launchpads = get_spacex_data(launchpads_url)

print("Fetching payloads data...")
payloads = get_spacex_data(payloads_url)

print("Fetching cores data...")
cores = get_spacex_data(cores_url)

Fetching launches data...
Total launches: 205

Fetching rockets data...
Fetching launchpads data...
Fetching payloads data...
Fetching cores data...


In [4]:
# Create dictionaries for easy lookup
rockets_dict = {r['id']: r for r in rockets}
launchpads_dict = {lp['id']: lp for lp in launchpads}
payloads_dict = {p['id']: p for p in payloads}
cores_dict = {c['id']: c for c in cores}

print("\nData loaded successfully!")
print(f"Rockets: {len(rockets)}")
print(f"Launchpads: {len(launchpads)}")
print(f"Payloads: {len(payloads)}")
print(f"Cores: {len(cores)}")


Data loaded successfully!
Rockets: 4
Launchpads: 6
Payloads: 225
Cores: 83


In [5]:
# Process launch data and create structured DataFrame
def process_launch_data(launch):
    # Extract basic launch info
    launch_data = {
        'FlightNumber': launch.get('flight_number'),
        'Date': launch.get('date_utc'),
        'LaunchSite': launch.get('launchpad'),
        'Success': 1 if launch.get('success') == True else 0,
        'Rocket': launch.get('rocket'),
        'Payload': launch.get('payloads', [None])[0] if launch.get('payloads') else None,
        'PayloadMass': None,
        'Orbit': None,
        'Customer': None,
        'LaunchSiteName': None,
        'Longitude': None,
        'Latitude': None,
        'RocketName': None,
        'BoosterVersion': None,
        'Cores': launch.get('cores', []),
        'LandingOutcome': None,
        'LandingType': None
    }

    # Get payload information
    if launch_data['Payload'] and launch_data['Payload'] in payloads_dict:
        payload_info = payloads_dict[launch_data['Payload']]
        launch_data['PayloadMass'] = payload_info.get('mass_kg')
        launch_data['Orbit'] = payload_info.get('orbit')
        customers = payload_info.get('customers', [])
        launch_data['Customer'] = customers[0] if customers else None

    # Get launchpad information
    if launch_data['LaunchSite'] and launch_data['LaunchSite'] in launchpads_dict:
        launchpad_info = launchpads_dict[launch_data['LaunchSite']]
        launch_data['LaunchSiteName'] = launchpad_info.get('name')
        launch_data['Longitude'] = launchpad_info.get('longitude')
        launch_data['Latitude'] = launchpad_info.get('latitude')

    # Get rocket information
    if launch_data['Rocket'] and launch_data['Rocket'] in rockets_dict:
        rocket_info = rockets_dict[launch_data['Rocket']]
        launch_data['RocketName'] = rocket_info.get('name')

    # Get core/landing information
    if launch_data['Cores'] and len(launch_data['Cores']) > 0:
        core = launch_data['Cores'][0]
        launch_data['LandingOutcome'] = 'Success' if core.get('landing_success') else 'Failure'
        landing_type = core.get('landing_type')
        if landing_type:
            launch_data['LandingType'] = landing_type

        # Get booster version from core
        core_id = core.get('core')
        if core_id and core_id in cores_dict:
            core_info = cores_dict[core_id]
            launch_data['BoosterVersion'] = f"B{core_info.get('block', 0)}"

    return launch_data

print("Processing launch data...")
processed_data = [process_launch_data(launch) for launch in launches]
df = pd.DataFrame(processed_data)

print(f"\nDataFrame created with {len(df)} rows and {len(df.columns)} columns")
print("\nFirst 5 rows:")
df.head()

Processing launch data...

DataFrame created with 205 rows and 17 columns

First 5 rows:


Unnamed: 0,FlightNumber,Date,LaunchSite,Success,Rocket,Payload,PayloadMass,Orbit,Customer,LaunchSiteName,Longitude,Latitude,RocketName,BoosterVersion,Cores,LandingOutcome,LandingType
0,1,2006-03-24T22:30:00.000Z,5e9e4502f5090995de566f86,0,5e9d0d95eda69955f709d1eb,5eb0e4b5b6c3bb0006eeb1e1,20.0,LEO,DARPA,Kwajalein Atoll,167.743129,9.047721,Falcon 1,BNone,"[{'core': '5e9e289df35918033d3b2623', 'flight'...",Failure,
1,2,2007-03-21T01:10:00.000Z,5e9e4502f5090995de566f86,0,5e9d0d95eda69955f709d1eb,5eb0e4b6b6c3bb0006eeb1e2,,LEO,DARPA,Kwajalein Atoll,167.743129,9.047721,Falcon 1,BNone,"[{'core': '5e9e289ef35918416a3b2624', 'flight'...",Failure,
2,3,2008-08-03T03:34:00.000Z,5e9e4502f5090995de566f86,0,5e9d0d95eda69955f709d1eb,5eb0e4b6b6c3bb0006eeb1e3,,LEO,NASA,Kwajalein Atoll,167.743129,9.047721,Falcon 1,BNone,"[{'core': '5e9e289ef3591814873b2625', 'flight'...",Failure,
3,4,2008-09-28T23:15:00.000Z,5e9e4502f5090995de566f86,1,5e9d0d95eda69955f709d1eb,5eb0e4b7b6c3bb0006eeb1e5,165.0,LEO,SpaceX,Kwajalein Atoll,167.743129,9.047721,Falcon 1,BNone,"[{'core': '5e9e289ef3591855dc3b2626', 'flight'...",Failure,
4,5,2009-07-13T03:35:00.000Z,5e9e4502f5090995de566f86,1,5e9d0d95eda69955f709d1eb,5eb0e4b7b6c3bb0006eeb1e6,200.0,LEO,ATSB,Kwajalein Atoll,167.743129,9.047721,Falcon 1,BNone,"[{'core': '5e9e289ef359184f103b2627', 'flight'...",Failure,


In [6]:
# Save DataFrame to CSV
df.to_csv('spacex_launch_data.csv', index=False)
print("\nData saved to 'spacex_launch_data.csv'")

# Display data summary
print("\n=== DATA SUMMARY ===")
print(f"Total Launches: {len(df)}")
print(f"Successful Launches: {df['Success'].sum()}")
print(f"Failed Launches: {len(df) - df['Success'].sum()}")
print(f"Success Rate: {(df['Success'].sum() / len(df) * 100):.2f}%")

print("\n=== LAUNCHES BY SITE ===")
print(df['LaunchSiteName'].value_counts())

print("\n=== LAUNCHES BY ROCKET ===")
print(df['RocketName'].value_counts())

print("\n=== ORBIT TYPES ===")
print(df['Orbit'].value_counts().head(10))


Data saved to 'spacex_launch_data.csv'

=== DATA SUMMARY ===
Total Launches: 205
Successful Launches: 181
Failed Launches: 24
Success Rate: 88.29%

=== LAUNCHES BY SITE ===
LaunchSiteName
CCSFS SLC 40       112
KSC LC 39A          58
VAFB SLC 4E         30
Kwajalein Atoll      5
Name: count, dtype: int64

=== LAUNCHES BY ROCKET ===
RocketName
Falcon 9        195
Falcon 1          5
Falcon Heavy      5
Name: count, dtype: int64

=== ORBIT TYPES ===
Orbit
VLEO     59
GTO      36
ISS      33
LEO      20
PO       15
SSO      13
MEO       8
TLI       2
GEO       2
ES-L1     1
Name: count, dtype: int64
