In [59]:
import requests
import pandas as pd
import numpy as np
import datetime

# Setting option will print all colomns of a dataframe
pd.set_option('display.max_columns', None)

# Setting this option will print all of the data in a feature
pd.set_option('display.max_colwidth', None)

# Global variables
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

def getBoosterVersion(data):
    global BoosterVersion  # Declare as global to modify the global list
    BoosterVersion = []  # Reset the list
    for launch in data:  # Iterate through the list of launch dictionaries
        rocket_ids = launch.get('rocket')  # Get the 'rocket' key (could be single ID or a list)
        if rocket_ids:
            if isinstance(rocket_ids, list):
                for rocket_id in rocket_ids: #If it's a list, iterate through rocket IDs
                    response = requests.get(f"https://api.spacexdata.com/v4/rockets/{str(rocket_id)}").json()
                    BoosterVersion.append(response['name'])
            else:
                response = requests.get(f"https://api.spacexdata.com/v4/rockets/{str(rocket_ids)}").json()
                BoosterVersion.append(response['name'])
        else:
            BoosterVersion.append(None)  # Handle missing 'rocket' key
    print(f"BoosterVersion length: {len(BoosterVersion)}")

def getLaunchSite(data):
    global LaunchSite, Longitude, Latitude
    LaunchSite, Longitude, Latitude = [], [], []
    for launch in data:
        launchpad_ids = launch.get('launchpad')  # Use .get() to handle missing keys
        if launchpad_ids:
            if isinstance(launchpad_ids,list):
                for launchpad_id in launchpad_ids:
                    response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{str(launchpad_id)}").json()
                    Longitude.append(response['longitude'])
                    Latitude.append(response['latitude'])
                    LaunchSite.append(response['name'])
            else:
                response = requests.get(f"https://api.spacexdata.com/v4/launchpads/{str(launchpad_ids)}").json()
                Longitude.append(response['longitude'])
                Latitude.append(response['latitude'])
                LaunchSite.append(response['name'])
        else:
            Longitude.append(None)
            Latitude.append(None)
            LaunchSite.append(None)
    print(f"LaunchSite length: {len(LaunchSite)}, Longitude length: {len(Longitude)}, Latitude length: {len(Latitude)}")


def getPayloadData(data):
    global PayloadMass, Orbit
    PayloadMass, Orbit = [], []
    for launch in data:
        payload_ids = launch.get('payloads')
        if payload_ids:
            if isinstance(payload_ids, list):
                for payload_id in payload_ids:
                     response = requests.get(f"https://api.spacexdata.com/v4/payloads/{payload_id}").json()
                     PayloadMass.append(response.get('mass_kg'))  # Use .get()
                     Orbit.append(response.get('orbit'))
            else:
                response = requests.get(f"https://api.spacexdata.com/v4/payloads/{payload_ids}").json()
                PayloadMass.append(response.get('mass_kg'))  # Use .get()
                Orbit.append(response.get('orbit'))
        else:
            PayloadMass.append(None)
            Orbit.append(None)
    print(f"PayloadMass length: {len(PayloadMass)}, Orbit length: {len(Orbit)}")


def getCoreData(data):
    global Outcome, Flights, GridFins, Reused, Legs, LandingPad, Block, ReusedCount, Serial
    Outcome, Flights, GridFins, Reused, Legs, LandingPad, Block, ReusedCount, Serial = [], [], [], [], [], [], [], [], []

    for launch in data: #Iterate through launches
        for core in launch.get('cores', []):  # Iterate through cores, handle missing 'cores' key
            core_data = core.get('core')
            if core_data is not None:
                response = requests.get(f"https://api.spacexdata.com/v4/cores/{core_data}").json()
                Block.append(response.get('block'))
                ReusedCount.append(response.get('reuse_count'))
                Serial.append(response.get('serial'))
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)

            Outcome.append(str(core.get('landing_success'))+' '+str(core.get('landing_type')))
            Flights.append(core.get('flight'))
            GridFins.append(core.get('gridfins'))
            Reused.append(core.get('reused'))
            Legs.append(core.get('legs'))
            LandingPad.append(core.get('landpad'))

    print(f"Outcome length: {len(Outcome)}, Flights length: {len(Flights)}, GridFins length: {len(GridFins)}, Reused length: {len(Reused)}, Legs length: {len(Legs)}, LandingPad length: {len(LandingPad)}, Block length: {len(Block)}, ReusedCount length: {len(ReusedCount)}, Serial length: {len(Serial)}")


spacex_url="https://api.spacexdata.com/v4/launches"
response = requests.get(spacex_url)

# Check for successful request
if response.status_code == 200:
    data = response.json()  # Parse the JSON response
    print("Data retrieval successful. Proceeding with processing.")
else:
    print(f"Error: API request failed with status code {response.status_code}")
    #  Potentially exit or return an error
    exit()

# Call the data extraction functions
getBoosterVersion(data)
getLaunchSite(data)
getPayloadData(data)
getCoreData(data)

#Determine the lengths
lengths = [len(BoosterVersion), len(PayloadMass), len(Orbit), len(LaunchSite), len(Outcome), len(Flights), len(GridFins), len(Reused), len(Legs), len(LandingPad), len(Block), len(ReusedCount), len(Serial), len(Longitude), len(Latitude)]
print(f"All Lengths: {lengths}")

min_len = min(lengths)

BoosterVersion = BoosterVersion[:min_len]
PayloadMass = PayloadMass[:min_len]
Orbit = Orbit[:min_len]
LaunchSite = LaunchSite[:min_len]
Outcome = Outcome[:min_len]
Flights = Flights[:min_len]
GridFins = GridFins[:min_len]
Reused = Reused[:min_len]
Legs = Legs[:min_len]
LandingPad = LandingPad[:min_len]
Block = Block[:min_len]
ReusedCount = ReusedCount[:min_len]
Serial = Serial[:min_len]
Longitude = Longitude[:min_len]
Latitude = Latitude[:min_len]

# Finally construct our dataset using the data we have obtained
launch_dict = {'FlightNumber': list(range(1,min_len+1)),
                'Date': [datetime.date(year=int(data[i]['date_utc'][:4]),month=int(data[i]['date_utc'][5:7]),day=int(data[i]['date_utc'][8:10])) for i in range(min_len)], #Extract date from the dictionaries
                'BoosterVersion':BoosterVersion,
                'PayloadMass':PayloadMass,
                'Orbit':Orbit,
                'LaunchSite':LaunchSite,
                'Outcome':Outcome,
                'Flights':Flights,
                'GridFins':GridFins,
                'Reused':Reused,
                'Legs':Legs,
                'LandingPad':LandingPad,
                'Block':Block,
                'ReusedCount':ReusedCount,
                'Serial':Serial,
                'Longitude': Longitude,
                'Latitude': Latitude}

# Create a data frame from launch dict
data_falcon9 = pd.DataFrame(launch_dict)

#Task 2: Filter the dataframe to only include Falcon 9 launches
data_falcon9 = data_falcon9[data_falcon9['BoosterVersion'] != 'Falcon 1'] # Hint: data['BoosterVersion'] != 'Falcon 1'

data_falcon9.loc[:,'FlightNumber'] = list(range(1, data_falcon9.shape[0]+1))
data_falcon9

#Task 3: Dealing with Missing Values

# 3.1: Calculate below the mean for the PayloadMass using the .mean().  Then use the mean and the .replace() function to replace the np.nan values in the data with the mean you calculated.

# Calculate the mean value of PayloadMass column
mean_payloadmass = data_falcon9['PayloadMass'].mean()

# Replace the np.nan values with its mean value
data_falcon9['PayloadMass'].replace(to_replace=float('NaN'), value=mean_payloadmass, inplace=True)

# Show the summary of the dataframe
print(data_falcon9.describe())

# Show the head of the dataframe
print(data_falcon9.head())

# You should see the number of missing values of the 'PayloadMass' change to zero.

# Now we should have no missing values in our dataset except in LandingPad.

# -----------------------------------------------------------------------------------------
# 3.2: We can now export it to a CSV for the next section,but to make the answers consistent,in the next lab we will provide data in a pre-selected date range.
data_falcon9.to_csv('dataset_part_1.csv', index=False)

Data retrieval successful. Proceeding with processing.
BoosterVersion length: 205
LaunchSite length: 205, Longitude length: 205, Latitude length: 205
PayloadMass length: 234, Orbit length: 234
Outcome length: 215, Flights length: 215, GridFins length: 215, Reused length: 215, Legs length: 215, LandingPad length: 215, Block length: 215, ReusedCount length: 215, Serial length: 215
All Lengths: [205, 234, 234, 205, 215, 215, 215, 215, 215, 215, 215, 215, 215, 205, 205]
       FlightNumber   PayloadMass     Flights       Block  ReusedCount  \
count    200.000000    200.000000  191.000000  191.000000   191.000000   
mean     100.500000   6692.520170    3.602094    4.157068     5.183246   
std       57.879185   5402.601105    3.240987    1.409158     4.714248   
min        1.000000      1.000000    1.000000    1.000000     0.000000   
25%       50.750000   2205.000000    1.000000    3.500000     1.000000   
50%      100.500000   5550.000000    2.000000    5.000000     4.000000   
75%      15