# 🚀 SpaceX Launch Data - Exploratory Data Analysis

## Objectives:
1. Load and inspect the data
2. Clean and preprocess features
3. Engineer new features
4. Perform comprehensive visualizations
5. Analyze patterns and correlations

In [None]:
# Import libraries
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
warnings.filterwarnings('ignore')

from data_preprocessing import load_data, clean_data, create_features
from visualizations import *

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("✓ Libraries imported successfully")

## 1. Data Loading

In [3]:
# Load raw data
df_raw = load_data('../data/spacex_launch_data.csv')
print(f"Dataset shape: {df_raw.shape}")
print("\nFirst 5 rows:")
df_raw.head()

NameError: name 'load_data' is not defined

In [None]:
# Data info
print("Dataset Information:")
df_raw.info()

In [None]:
# Check for missing values
print("Missing Values:")
missing = df_raw.isnull().sum()
missing[missing > 0]

In [None]:
# Basic statistics
df_raw.describe(include='all').T

## 2. Data Cleaning & Feature Engineering

In [None]:
# Clean data
df = clean_data(df_raw)
print("✓ Data cleaned")
print(f"New shape: {df.shape}")
df.head()

In [None]:
# Create additional features
df = create_features(df)
print("✓ Features engineered")
print(f"Final shape: {df.shape}")
print(f"\nNew columns: {df.columns.tolist()}")

In [None]:
# Save cleaned data
df.to_csv('../data/spacex_cleaned.csv', index=False)
print("✓ Cleaned data saved to ../data/spacex_cleaned.csv")

## 3. Summary Statistics

In [None]:
# Display summary stats
summary = create_summary_stats(df)
print("=" * 50)
print("SPACEX LAUNCH DATA SUMMARY")
print("=" * 50)
for key, value in summary.items():
    print(f"{key:.<40} {value}")

## 4. Visualizations

In [None]:
# Success rate by orbit type
fig = plot_success_rate_by_category(df, 'Orbit_Simplified', 
                                     'Mission Success Rate by Orbit Type')
plt.show()

In [None]:
# Success rate by launch site
fig = plot_success_rate_by_category(df, 'Launch_Site_Simplified', 
                                     'Mission Success Rate by Launch Site')
plt.show()

In [None]:
# Success rate by booster type
fig = plot_success_rate_by_category(df, 'Booster_Type', 
                                     'Mission Success Rate by Booster Type')
plt.show()

In [None]:
# Payload distribution
fig = plot_payload_distribution(df)
plt.show()

In [None]:
# Temporal trends
fig = plot_temporal_trends(df)
plt.show()

In [None]:
# Cumulative success rate
fig = plot_cumulative_success_rate(df)
plt.show()

In [None]:
# Booster reuse analysis
fig = plot_booster_reuse_analysis(df)
plt.show()

In [None]:
# Orbit and launch site heatmap
fig = plot_orbit_launch_site_heatmap(df)
plt.show()

In [None]:
# Correlation heatmap
numeric_cols = ['Payload Mass (kg)', 'Year', 'Month', 'Quarter',
                'Booster_Reused', 'Booster_Flight_Number', 'Cumulative_Launches',
                'Orbit_Difficulty', 'Mission_Success']
fig = plot_correlation_heatmap(df, numeric_cols)
plt.show()

## 5. Interactive Visualizations

In [None]:
# Interactive timeline
fig = create_interactive_timeline(df)
fig.show()

In [None]:
# 3D scatter plot
fig = create_3d_scatter(df)
fig.show()

## 6. Key Insights

### Observations:
- Mission success rate has improved significantly over time
- Certain orbit types (LEO) have higher success rates
- Booster reuse has become more common in recent years
- Launch sites show different success patterns
- Payload mass correlates with mission complexity