In [4]:
# Import required libraries
import pandas as pd
import numpy as np

print("Step 1: Loading the SpaceX dataset...")

# Load the dataset from the provided URL
df = pd.read_csv("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-DS0321EN-SkillsNetwork/datasets/dataset_part_1.csv")

print("\nFirst 10 rows of the dataset:")
print(df.head(10))

print("\n" + "="*50)
print("DATA WRANGLING - TASK 1: Calculate the number of launches on each site")
print("="*50)

# Calculate the number of launches for each site using value_counts()
launch_site_counts = df['LaunchSite'].value_counts()
print("\nNumber of launches on each site:")
print(launch_site_counts)

print("\n" + "="*50)
print("TASK 2: Calculate the number and occurrence of each orbit")
print("="*50)

# Calculate the number and occurrence of each orbit using value_counts()
orbit_counts = df['Orbit'].value_counts()
print("\nNumber and occurrence of each orbit:")
print(orbit_counts)

print("\n" + "="*50)
print("TASK 3: Calculate the number and occurrence of mission outcome of the orbits")
print("="*50)

# Calculate the number of landing outcomes using value_counts()
landing_outcomes = df['Outcome'].value_counts()
print("\nLanding outcomes (counts):")
print(landing_outcomes)

print("\nLanding outcomes with indices:")
for i, outcome in enumerate(landing_outcomes.keys()):
    print(i, outcome)

# Create a set of outcomes where the second stage did not land successfully
bad_outcomes = set(landing_outcomes.keys()[[1, 3, 5, 6, 7]])
print("\nBad outcomes (failed landings):")
print(bad_outcomes)

print("\n" + "="*50)
print("TASK 4: Create a landing outcome label from Outcome column")
print("="*50)

# Create a landing class label: 0 for bad outcomes (failed landing), 1 for successful landing
landing_class = []

for outcome in df['Outcome']:
    if outcome in bad_outcomes:
        landing_class.append(0)
    else:
        landing_class.append(1)

# Add the Class column to the dataframe
df['Class'] = landing_class

print("\nFirst 8 rows of the Class column:")
print(df[['Class']].head(8))

print("\nFirst 5 rows of the updated dataframe:")
print(df.head(5))

print("\n" + "="*50)
print("SUCCESS RATE")
print("="*50)

# Calculate the success rate
success_rate = df["Class"].mean()
print(f"\nSuccess rate (mean of Class column): {success_rate:.4f}")
print(f"Percentage of successful landings: {success_rate * 100:.2f}%")

print("\n" + "="*50)
print("ADDITIONAL DATA ANALYSIS")
print("="*50)

# Identify and calculate the percentage of missing values in each attribute
print("\nPercentage of missing values in each column:")
missing_percentage = df.isnull().sum() / len(df) * 100
print(missing_percentage)

# Identify which columns are numerical and categorical
print("\nData types of each column:")
print(df.dtypes)

# Export to CSV (optional)
df.to_csv("dataset_part_2.csv", index=False)
print("\nData exported to 'dataset_part_2.csv'")

print("\n" + "="*50)
print("All tasks completed successfully!")
print("="*50)

ModuleNotFoundError: No module named 'pandas'