### Importing libraries

In [2]:
import requests
import pandas as pd

### Fetch data

In [2]:
url = "https://api.spacexdata.com/v4/launches"
res = requests.get(url, timeout=10)
res.raise_for_status()
data = res.json()   # list of launches

print("Total launches:", len(data))
print(data[0].keys())  # what fields are available

Total launches: 205
dict_keys(['fairings', 'links', 'static_fire_date_utc', 'static_fire_date_unix', 'net', 'window', 'rocket', 'success', 'failures', 'details', 'crew', 'ships', 'capsules', 'payloads', 'launchpad', 'flight_number', 'name', 'date_utc', 'date_unix', 'date_local', 'date_precision', 'upcoming', 'cores', 'auto_update', 'tbd', 'launch_library_id', 'id'])


### Normalize JSON → Table

In [3]:
df = pd.json_normalize(data)
cols = ["name", "date_utc", "success", "rocket"]
df = df[cols]

df["date_utc"] = pd.to_datetime(df["date_utc"], errors="coerce")

print(df.head())
df.to_csv("launches.csv", index=False)


          name                  date_utc success                    rocket
0    FalconSat 2006-03-24 22:30:00+00:00   False  5e9d0d95eda69955f709d1eb
1      DemoSat 2007-03-21 01:10:00+00:00   False  5e9d0d95eda69955f709d1eb
2  Trailblazer 2008-08-03 03:34:00+00:00   False  5e9d0d95eda69955f709d1eb
3       RatSat 2008-09-28 23:15:00+00:00    True  5e9d0d95eda69955f709d1eb
4     RazakSat 2009-07-13 03:35:00+00:00    True  5e9d0d95eda69955f709d1eb


## Launches Dataset Cleaning

In [None]:
df = pd.read_csv('launches.csv')

In [8]:
df.head()

Unnamed: 0,name,date_utc,success,rocket
0,FalconSat,2006-03-24 22:30:00+00:00,False,5e9d0d95eda69955f709d1eb
1,DemoSat,2007-03-21 01:10:00+00:00,False,5e9d0d95eda69955f709d1eb
2,Trailblazer,2008-08-03 03:34:00+00:00,False,5e9d0d95eda69955f709d1eb
3,RatSat,2008-09-28 23:15:00+00:00,True,5e9d0d95eda69955f709d1eb
4,RazakSat,2009-07-13 03:35:00+00:00,True,5e9d0d95eda69955f709d1eb


In [17]:
# Handle nulls in success column
df['success'] = df['success'].fillna('Unknown')
df

Unnamed: 0,name,date_utc,success,rocket
0,FalconSat,2006-03-24 22:30:00+00:00,False,5e9d0d95eda69955f709d1eb
1,DemoSat,2007-03-21 01:10:00+00:00,False,5e9d0d95eda69955f709d1eb
2,Trailblazer,2008-08-03 03:34:00+00:00,False,5e9d0d95eda69955f709d1eb
3,RatSat,2008-09-28 23:15:00+00:00,True,5e9d0d95eda69955f709d1eb
4,RazakSat,2009-07-13 03:35:00+00:00,True,5e9d0d95eda69955f709d1eb
...,...,...,...,...
200,Transporter-6,2022-12-01 00:00:00+00:00,Unknown,5e9d0d95eda69973a809d1ec
201,TTL-1,2022-12-01 00:00:00+00:00,Unknown,5e9d0d95eda69973a809d1ec
202,WorldView Legion 1 & 2,2022-12-01 00:00:00+00:00,Unknown,5e9d0d95eda69973a809d1ec
203,Viasat-3 & Arcturus,2022-12-01 00:00:00+00:00,Unknown,5e9d0d95eda69974db09d1ed


In [20]:
# Keep True as 1 and False as 0 in success column
df['success'] = df['success'].replace({True: 1, False: 0})
df['success']

0            0
1            0
2            0
3            1
4            1
        ...   
200    Unknown
201    Unknown
202    Unknown
203    Unknown
204    Unknown
Name: success, Length: 205, dtype: object

In [21]:
df.head()

Unnamed: 0,name,date_utc,success,rocket
0,FalconSat,2006-03-24 22:30:00+00:00,0,5e9d0d95eda69955f709d1eb
1,DemoSat,2007-03-21 01:10:00+00:00,0,5e9d0d95eda69955f709d1eb
2,Trailblazer,2008-08-03 03:34:00+00:00,0,5e9d0d95eda69955f709d1eb
3,RatSat,2008-09-28 23:15:00+00:00,1,5e9d0d95eda69955f709d1eb
4,RazakSat,2009-07-13 03:35:00+00:00,1,5e9d0d95eda69955f709d1eb


In [23]:
# remove duplicates from name + date_utc
df[['name', 'date_utc']] = df[['name', 'date_utc']].drop_duplicates()
df.head()

Unnamed: 0,name,date_utc,success,rocket
0,FalconSat,2006-03-24 22:30:00+00:00,0,5e9d0d95eda69955f709d1eb
1,DemoSat,2007-03-21 01:10:00+00:00,0,5e9d0d95eda69955f709d1eb
2,Trailblazer,2008-08-03 03:34:00+00:00,0,5e9d0d95eda69955f709d1eb
3,RatSat,2008-09-28 23:15:00+00:00,1,5e9d0d95eda69955f709d1eb
4,RazakSat,2009-07-13 03:35:00+00:00,1,5e9d0d95eda69955f709d1eb


In [24]:
df.to_csv('clean_launches.csv')