In [42]:
# Requests allows us to make HTTP requests which we will use to get data from an API
import requests
# Pandas is a software library written for the Python programming language for data manipulation and analysis.
import pandas as pd
# NumPy is a library for the Python programming language, adding support for large, multi-dimensional arrays and matrices, along with a large collection of high-level mathematical functions to operate on these arrays
import numpy as np
# Datetime is a library that allows us to represent dates
import datetime

# Setting this option will print all collumns of a dataframe
pd.set_option('display.max_columns', None)
# Setting this option will print all of the data in a feature
pd.set_option('display.max_colwidth', None)

In [43]:
# Below we will define a series of helper functions that will help us use the API to extract information using identification numbers in the launch data.

# From the rocket column we would like to learn the booster name.

# Takes the dataset and uses the rocket column to call the API and append the data to the list
def getBoosterVersion(data):
    for x in data['rocket']:
       if x:
        response = requests.get("https://api.spacexdata.com/v4/rockets/"+str(x)).json()
        BoosterVersion.append(response['name'])

In [44]:
# From the launchpad we would like to know the name of the launch site being used, the logitude, and the latitude.

# Takes the dataset and uses the launchpad column to call the API and append the data to the list
def getLaunchSite(data):
    for x in data['launchpad']:
       if x:
         response = requests.get("https://api.spacexdata.com/v4/launchpads/"+str(x)).json()
         Longitude.append(response['longitude'])
         Latitude.append(response['latitude'])
         LaunchSite.append(response['name'])

In [45]:
# From the payload we would like to learn the mass of the payload and the orbit that it is going to.

# Takes the dataset and uses the payloads column to call the API and append the data to the lists
def getPayloadData(data):
    for load in data['payloads']:
       if load:
        response = requests.get("https://api.spacexdata.com/v4/payloads/"+load).json()
        PayloadMass.append(response['mass_kg'])
        Orbit.append(response['orbit'])

In [46]:
# From cores we would like to learn the outcome of the landing, the type of the landing, number of flights with that core, whether gridfins were used, wheter the core is reused,
# wheter legs were used, the landing pad used, the block of the core which is a number used to seperate version of cores, the number of times this specific core has been reused, 
# and the serial of the core.

# Takes the dataset and uses the cores column to call the API and append the data to the lists
def getCoreData(data):
    for core in data['cores']:
            if core['core'] != None:
                response = requests.get("https://api.spacexdata.com/v4/cores/"+core['core']).json()
                Block.append(response['block'])
                ReusedCount.append(response['reuse_count'])
                Serial.append(response['serial'])
            else:
                Block.append(None)
                ReusedCount.append(None)
                Serial.append(None)
            Outcome.append(str(core['landing_success'])+' '+str(core['landing_type']))
            Flights.append(core['flight'])
            GridFins.append(core['gridfins'])
            Reused.append(core['reused'])
            Legs.append(core['legs'])
            LandingPad.append(core['landpad'])

In [47]:
# Now let's start requesting rocket launch data from SpaceX API with the following URL:

spacex_url="https://api.spacexdata.com/v4/launches/past"

In [48]:
response = requests.get(spacex_url)

In [49]:
# Check the content of the response

# print(response.content)

In [50]:
# Task 1: Request and parse the SpaceX launch data using the GET request
# To make the requested JSON results more consistent, we will use the following static response object for this project:

static_json_url='https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/API_call_spacex_api.json'


In [51]:
# We should see that the request was successfull with the 200 status response code

response.status_code

200

In [52]:
# Now we decode the response content as a Json using .json() and turn it into a Pandas dataframe using .json_normalize()

# Use json_normalize meethod to convert the json result into a dataframe

# Decode the response content as a JSON
info = response.json()

# Convert the JSON result into a DataFrame
data = pd.json_normalize(info)


# print(df.head(5))

In [53]:
!pip install openpyxl

data.to_excel('output.xlsx', sheet_name='SpaceX', index=False)



PermissionError: [Errno 13] Permission denied: 'output.xlsx'

In [None]:
# You will notice that a lot of the data are IDs. For example the rocket column has no information about the rocket just an 
# identification number.

# We will now use the API again to get information about the launches using the IDs given for each launch. 
# Specifically we will be using columns rocket, payloads, launchpad, and cores.

# Lets take a subset of our dataframe keeping only the features we want and the flight number, and date_utc.
data = data[['rocket', 'payloads', 'launchpad', 'cores', 'flight_number', 'date_utc']]

In [None]:
# We will remove rows with multiple cores because those are falcon rockets with 2 extra rocket boosters and rows
# that have multiple payloads in a single rocket.
data = data[data['cores'].map(len)==1]
data = data[data['payloads'].map(len)==1]


In [None]:
# Since payloads and cores are lists of size 1 we will also extract the single value in the list and replace the feature.
data['cores'] = data['cores'].map(lambda x : x[0])
data['payloads'] = data['payloads'].map(lambda x : x[0])

In [None]:
# We also want to convert the date_utc to a datetime datatype and then extracting the date leaving the time
data['date'] = pd.to_datetime(data['date_utc']).dt.date

# From the rocket we would like to learn the booster name

# From the payload we would like to learn the mass of the payload and the orbit that it is going to

# From the launchpad we would like to know the name of the launch site being used, the longitude, and the latitude.

# From cores we would like to learn the outcome of the landing, the type of the landing, number of flights with that core, 
# whether gridfins were used, whether the core is reused, whether legs were used, the landing pad used, the block of the core
# which is a number used to seperate version of cores, the number of times this specific core has been reused, and the serial 
# of the core.

In [None]:
# The data from these requests will be stored in lists and will be used to create a new dataframe.

#Global variables 
BoosterVersion = []
PayloadMass = []
Orbit = []
LaunchSite = []
Outcome = []
Flights = []
GridFins = []
Reused = []
Legs = []
LandingPad = []
Block = []
ReusedCount = []
Serial = []
Longitude = []
Latitude = []

In [None]:
# These functions will apply the outputs globally to the above variables. Let's take a looks at BoosterVersion variable. 
# Before we apply getBoosterVersion the list is empty:

BoosterVersion

[]

In [None]:
# Now, let's appl getBoosterVersion function method to get the booster version

# Call getBoosterVersion
getBoosterVersion(data)
BoosterVersion[0:5]

['Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 1', 'Falcon 9']

In [None]:
# we can apply the rest of the functions here:

# Call getLaunchSite
getLaunchSite(data)
# Call getPayloadData
getPayloadData(data)
# Call getCoreData
getCoreData(data)

In [None]:
# Finally lets construct our dataset using the data we have obtained. We we combine the columns into a dictionary.

launch_dict = {'FlightNumber': list(data['flight_number']),
'Date': list(data['date']),
'BoosterVersion':BoosterVersion,
'PayloadMass':PayloadMass,
'Orbit':Orbit,
'LaunchSite':LaunchSite,
'Outcome':Outcome,
'Flights':Flights,
'GridFins':GridFins,
'Reused':Reused,
'Legs':Legs,
'LandingPad':LandingPad,
'Block':Block,
'ReusedCount':ReusedCount,
'Serial':Serial,
'Longitude': Longitude,
'Latitude': Latitude}

In [None]:
# Then, we need to create a Pandas data frame from the dictionary launch_dict.

df = pd.DataFrame(launch_dict)

print(df.head(5))

   FlightNumber        Date BoosterVersion  PayloadMass Orbit  \
0             1  2006-03-24       Falcon 1         20.0   LEO   
1             2  2007-03-21       Falcon 1          NaN   LEO   
2             4  2008-09-28       Falcon 1        165.0   LEO   
3             5  2009-07-13       Falcon 1        200.0   LEO   
4             6  2010-06-04       Falcon 9          NaN   LEO   

        LaunchSite    Outcome  Flights  GridFins  Reused   Legs LandingPad  \
0  Kwajalein Atoll  None None        1     False   False  False       None   
1  Kwajalein Atoll  None None        1     False   False  False       None   
2  Kwajalein Atoll  None None        1     False   False  False       None   
3  Kwajalein Atoll  None None        1     False   False  False       None   
4     CCSFS SLC 40  None None        1     False   False  False       None   

   Block  ReusedCount    Serial   Longitude   Latitude  
0    NaN            0  Merlin1A  167.743129   9.047721  
1    NaN            0  Mer

In [None]:
# Task 2: Filter the dataframe to only include Falcon 9 launches
# Finally we will remove the Falcon 1 launches keeping only the Falcon 9 launches. Filter the data dataframe using the 
# BoosterVersion column to only keep the Falcon 9 launches. Save the filtered data to a new dataframe called data_falcon9.

# Hint data['BoosterVersion']!='Falcon 1'

#print(df.columns)


data_falcon9 = df[df['BoosterVersion'] != 'Falcon 1']

NameError: name 'df' is not defined

In [None]:
data_falcon9.head()
data_falcon9.describe

NameError: name 'data_falcon9' is not defined

In [None]:
# Data Wrangling
# We can see below that some of the rows are missing values in our dataset.

data_falcon9.isnull().sum()

FlightNumber       0
Date               0
BoosterVersion     0
PayloadMass        5
Orbit              0
LaunchSite         0
Outcome            0
Flights            0
GridFins           0
Reused             0
Legs               0
LandingPad        26
Block              0
ReusedCount        0
Serial             0
Longitude          0
Latitude           0
dtype: int64

In [None]:
# Before we can continue we must deal with these missing values. The LandingPad column will retain None values to represent when 
# landing pads were not used.

# Task 3: Dealing with Missing Values - Calculate below the mean for the PayloadMass using the .mean(). Then use the mean and the
# .replace() function to replace np.nan values in the data with the mean you calculated.


# Calculate the mean of the 'PayloadMass' column
mean_payload = data_falcon9['PayloadMass'].mean()

# Replace np.nan values in the 'PayloadMass' column with the mean
data_falcon9['PayloadMass'] = data_falcon9['PayloadMass'].replace(np.nan, mean_payload)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == "":


In [None]:
# You should see the number of missing values of the PayLoadMass change to zero.

# Now we should have no missing values in our dataset except for in LandingPad.

# We can now export it to a CSV for the next section,but to make the answers consistent, in the next lab we will provide data 
# in a pre-selected date range.