# SpaceX Falcon 9 first stage Landing Prediction

## Import Libraries and Define Auxiliary Functions

### Import Libraries

In [1]:
import requests
import pandas as pd
import numpy as np
import datetime

# Display all columns and full content in pandas DataFrames
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


### Auxilaiary Functions

Define helper functions to extract specific details about rockets, launch sites, payloads, and core data:

In [2]:
def getBoosterVersion(data):
    for x in data['rocket']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/rockets/{x}").json()
            BoosterVersion.append(response.get('name', None))

def getLaunchSite(data):
    for x in data['launchpad']:
        if x:
            response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{x}").json()
            Longitude.append(response.get('longitude', None))
            Latitude.append(response.get('latitude', None))
            LaunchSite.append(response.get('name', None))

def getPayloadData(data):
    for load in data['payloads']:
        if load:
            response = requests.get(f"https://api.spacexdata.com/v4/payloads/{load}").json()
            PayloadMass.append(response.get('mass_kg', None))
            Orbit.append(response.get('orbit', None))

def getCoreData(data):
    for core in data['cores']:
        if core['core']:
            response = requests.get(f"https://api.spacexdata.com/v4/cores/{core['core']}").json()
            Block.append(response.get('block', None))
            ReusedCount.append(response.get('reuse_count', None))
            Serial.append(response.get('serial', None))
        else:
            Block.append(None)
            ReusedCount.append(None)
            Serial.append(None)
        Outcome.append(f"{core['landing_success']} {core['landing_type']}")
        Flights.append(core['flight'])
        GridFins.append(core['gridfins'])
        Reused.append(core['reused'])
        Legs.append(core['legs'])
        LandingPad.append(core['landpad'])


## Fetch Data from SpaceX API

In [3]:
spacex_url = "https://api.spacexdata.com/v4/launches/past"
response = requests.get(spacex_url)

if response.status_code == 200:
    data = response.json()
    print("Data fetched successfully.")
else:
    print("Failed to fetch data.")


Data fetched successfully.


## . Convert JSON Data to DataFrame

In [4]:
data = pd.json_normalize(data)
print(data.head())  # Inspect the first 5 rows of the data


       static_fire_date_utc  static_fire_date_unix    net  window  \
0  2006-03-17T00:00:00.000Z           1.142554e+09  False     0.0   
1                      None                    NaN  False     0.0   
2                      None                    NaN  False     0.0   
3  2008-09-20T00:00:00.000Z           1.221869e+09  False     0.0   
4                      None                    NaN  False     0.0   

                     rocket success  \
0  5e9d0d95eda69955f709d1eb   False   
1  5e9d0d95eda69955f709d1eb   False   
2  5e9d0d95eda69955f709d1eb   False   
3  5e9d0d95eda69955f709d1eb    True   
4  5e9d0d95eda69955f709d1eb    True   

                                                                                                            failures  \
0                                                [{'time': 33, 'altitude': None, 'reason': 'merlin engine failure'}]   
1            [{'time': 301, 'altitude': 289, 'reason': 'harmonic oscillation leading to premature engine shutd

## Subset and Clean the Data

Retain only relevant columns and clean the dataset:

In [5]:
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

# Remove rows with multiple cores or payloads
data = data[data['cores'].map(len) == 1]
data = data[data['payloads'].map(len) == 1]

# Extract single values from lists in 'cores' and 'payloads'
data['cores'] = data['cores'].map(lambda x: x[0])
data['payloads'] = data['payloads'].map(lambda x: x[0])

# Convert 'date_utc' to datetime and filter dates
data['date'] = pd.to_datetime(data['date_utc']).dt.date
data = data[data['date'] <= datetime.date(2020, 11, 13)]


## Initialize Global Variables

Define empty lists to store the extracted data:

In [6]:
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []


## Apply Helper Functions

In [7]:
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)


## Combine Data into a DataFrame

Add the collected data to the main DataFrame:

In [8]:
data['BoosterVersion'] = BoosterVersion
data['PayloadMass'] = PayloadMass
data['Orbit'] = Orbit
data['LaunchSite'] = LaunchSite
data['Outcome'] = Outcome
data['Flights'] = Flights
data['GridFins'] = GridFins
data['Reused'] = Reused
data['Legs'] = Legs
data['LandingPad'] = LandingPad
data['Block'] = Block
data['ReusedCount'] = ReusedCount
data['Serial'] = Serial
data['Longitude'] = Longitude
data['Latitude'] = Latitude

print(data.head())  # Inspect the updated DataFrame


                     rocket                  payloads  \
0  5e9d0d95eda69955f709d1eb  5eb0e4b5b6c3bb0006eeb1e1   
1  5e9d0d95eda69955f709d1eb  5eb0e4b6b6c3bb0006eeb1e2   
3  5e9d0d95eda69955f709d1eb  5eb0e4b7b6c3bb0006eeb1e5   
4  5e9d0d95eda69955f709d1eb  5eb0e4b7b6c3bb0006eeb1e6   
5  5e9d0d95eda69973a809d1ec  5eb0e4b7b6c3bb0006eeb1e7   

                  launchpad  \
0  5e9e4502f5090995de566f86   
1  5e9e4502f5090995de566f86   
3  5e9e4502f5090995de566f86   
4  5e9e4502f5090995de566f86   
5  5e9e4501f509094ba4566f84   

                                                                                                                                                                                            cores  \
0  {'core': '5e9e289df35918033d3b2623', 'flight': 1, 'gridfins': False, 'legs': False, 'reused': False, 'landing_attempt': False, 'landing_success': None, 'landing_type': None, 'landpad': None}   
1  {'core': '5e9e289ef35918416a3b2624', 'flight': 1, 'gridfins': False, 'leg

## Filter the DataFrame to Include Only Falcon 9 Launches

First, filter the DataFrame to remove Falcon 1 launches.

In [11]:
# Filter the DataFrame to only include Falcon 9 launches and make a copy
data_falcon9 = data[data['BoosterVersion'] != 'Falcon 1'].copy()

# Reset the FlightNumber column
data_falcon9['FlightNumber'] = list(range(1, data_falcon9.shape[0] + 1))



## Check for Missing Values

In [12]:
# Check for missing values
missing_values = data_falcon9.isnull().sum()
print(missing_values)


rocket             0
payloads           0
launchpad          0
cores              0
flight_number      0
date_utc           0
date               0
BoosterVersion     0
PayloadMass        5
Orbit              0
LaunchSite         0
Outcome            0
Flights            0
GridFins           0
Reused             0
Legs               0
LandingPad        26
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
FlightNumber       0
dtype: int64


##  Handle Missing Values in the PayloadMass Column

Replace missing values (np.nan) in the PayloadMass column with the mean value:

In [15]:
import numpy as np

# Calculate the mean of the PayloadMass column
payload_mass_mean = data_falcon9['PayloadMass'].mean()

# Replace np.nan values with the mean value (avoid inplace=True)
data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].replace(np.nan, payload_mass_mean)




## Export the Cleaned DataFrame to a CSV File

In [16]:
# Export the cleaned DataFrame to a CSV file
data_falcon9.to_csv('dataset_part_1.csv', index=False)
print("Data has been saved to 'dataset_part_1.csv'.")


Data has been saved to 'dataset_part_1.csv'.
