In [1]:
"""
SpaceX Falcon 9 - Data Wrangling
Data cleaning and feature engineering
"""

import pandas as pd
import numpy as np

# Load data
df = pd.read_csv('spacex_launch_data.csv')
print(f"ðŸ“Š Data loaded: {len(df)} rows, {len(df.columns)} columns")

# Convert dates
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month

# Handle missing values
df['PayloadMass'].fillna(df['PayloadMass'].median(), inplace=True)
df['Orbit'].fillna('Unknown', inplace=True)
df['LandingOutcome'].fillna('No attempt', inplace=True)

# Create target variable
df['Class'] = df['Success'].astype(int)

# Feature Engineering
def categorize_site(site):
    if pd.isna(site):
        return 'Unknown'
    site = str(site).upper()
    if 'CAPE' in site or 'CCAFS' in site:
        return 'CCAFS'
    elif 'KENNEDY' in site or 'KSC' in site:
        return 'KSC'
    elif 'VANDENBERG' in site or 'VAFB' in site:
        return 'VAFB'
    return 'Other'

df['Site_Group'] = df['LaunchSiteName'].apply(categorize_site)

# Payload categories
bins = [0, 2000, 4000, 6000, 10000, 20000]
labels = ['Very Light', 'Light', 'Medium', 'Heavy', 'Very Heavy']
df['PayloadMass_Category'] = pd.cut(df['PayloadMass'], bins=bins, labels=labels)

# Save clean data
df.to_csv('spacex_clean.csv', index=False)
print(f"âœ… Clean data saved: spacex_clean.csv")
print(f"Success Rate: {df['Class'].mean():.2%}")


ðŸ“Š Data loaded: 205 rows, 17 columns
âœ… Clean data saved: spacex_clean.csv
Success Rate: 88.29%


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['PayloadMass'].fillna(df['PayloadMass'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Orbit'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we a