In [73]:
# Space Mission Data Analysis - Interactive Dashboard

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Install and import Dash libraries
%pip install dash
%pip install dash-bootstrap-components
import dash
from dash import dcc, html, Input, Output, State, callback_context
from dash.exceptions import PreventUpdate
import dash_bootstrap_components as dbc

# Other imports
import os
import joblib
from datetime import datetime
from io import BytesIO
import base64
import pathlib


Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [74]:
def load_and_preprocess_data(file_path):
    """
    Load and preprocess space mission data
    
    Args:
        file_path (str): Path to the CSV file
    
    Returns:
        pd.DataFrame: Preprocessed mission data
    """
    # Load the dataset
    missions_df = pd.read_csv(file_path)
    
    # Define column mappings
    column_mappings = {
        'Company': 'Agency',
        'Launch_Date': 'Launch_Date',  # already correct
        'Mission_Status': 'Mission_Status'  # already correct
    }
    
    # Rename columns
    for old_col, new_col in column_mappings.items():
        if old_col in missions_df.columns:
            missions_df[new_col] = missions_df[old_col]
    
    # Check required columns
    required_columns = ['Launch_Date', 'Agency', 'Mission_Status']
    missing_columns = [col for col in required_columns if col not in missions_df.columns]
    
    if missing_columns:
        raise KeyError(f"Missing required columns: {', '.join(missing_columns)}")
    
    # Convert date columns
    missions_df['Launch_Date'] = pd.to_datetime(missions_df['Launch_Date'], errors='coerce')
    
    # Drop rows with missing Launch_Date
    missions_df = missions_df.dropna(subset=['Launch_Date'])
    
    # Extract temporal features
    missions_df['Year'] = missions_df['Launch_Date'].dt.year
    missions_df['Month'] = missions_df['Launch_Date'].dt.month
    missions_df['Day'] = missions_df['Launch_Date'].dt.day
    missions_df['DayOfWeek'] = missions_df['Launch_Date'].dt.dayofweek
    
    # Add month and day names
    day_names = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    missions_df['DayName'] = missions_df['DayOfWeek'].map(lambda x: day_names[int(x)] if pd.notnull(x) else 'Unknown')
    missions_df['MonthName'] = missions_df['Month'].apply(
        lambda x: datetime(2000, int(x), 1).strftime('%B') if pd.notnull(x) else 'Unknown'
    )
    
    return missions_df

In [75]:
# Performance Analysis Functions
def calculate_success_rate_by_year(df):
    """
    Calculate mission success rate by year
    
    Args:
        df (pd.DataFrame): Mission data
    
    Returns:
        pd.DataFrame: Success rate by year
    """
    success_by_year = df.groupby('Year').apply(
        lambda x: pd.Series({
            'Success_Rate': (x['Mission_Status'] == 'Success').mean() * 100,
            'Total_Missions': len(x)
        })
    ).reset_index()
    return success_by_year


In [76]:
def calculate_agency_performance(df):
    """
    Calculate performance metrics for space agencies
    
    Args:
        df (pd.DataFrame): Mission data
    
    Returns:
        pd.DataFrame: Agency performance metrics
    """
    agency_performance = df.groupby('Agency').agg(
        Total_Launches=('Agency', 'count'),
        Success_Count=('Mission_Status', lambda x: (x == 'Success').sum()),
        Failure_Count=('Mission_Status', lambda x: (x == 'Failure').sum()),
        Partial_Success_Count=('Mission_Status', lambda x: (x == 'Partial Success').sum())
    ).reset_index()
    
    agency_performance['Success_Rate'] = (
        agency_performance['Success_Count'] / agency_performance['Total_Launches'] * 100
    )
    
    return agency_performance


In [77]:
# Visualization Functions
def create_success_time_series(df, year_range, selected_agencies=None, selected_rockets=None):
    """
    Create success rate time series plot
    
    Args:
        df (pd.DataFrame): Mission data
        year_range (list): Start and end years
        selected_agencies (list, optional): Filtered agencies
        selected_rockets (list, optional): Filtered rockets
    
    Returns:
        go.Figure: Time series plot
    """
    # Filter data
    filtered_df = df[
        (df['Year'] >= year_range[0]) & 
        (df['Year'] <= year_range[1])
    ]
    
    # Apply agency filter
    if selected_agencies:
        filtered_df = filtered_df[filtered_df['Agency'].isin(selected_agencies)]
    
    # Apply rocket filter
    if selected_rockets:
        filtered_df = filtered_df[filtered_df['Rocket'].isin(selected_rockets)]
    
    # Calculate success rate by year
    success_by_year = calculate_success_rate_by_year(filtered_df)
    
    # Create figure
    fig = px.line(
        success_by_year, 
        x='Year', 
        y='Success_Rate',
        title='Mission Success Rate Over Time',
        labels={'Success_Rate': 'Success Rate (%)', 'Year': 'Launch Year'}
    )
    
    return fig


In [78]:
def create_agency_performance_chart(df, year_range, selected_agencies=None):
    """
    Create agency performance comparison chart
    
    Args:
        df (pd.DataFrame): Mission data
        year_range (list): Start and end years
        selected_agencies (list, optional): Filtered agencies
    
    Returns:
        go.Figure: Agency performance bar chart
    """
    # Filter data
    filtered_df = df[
        (df['Year'] >= year_range[0]) & 
        (df['Year'] <= year_range[1])
    ]
    
    # Apply agency filter
    if selected_agencies:
        filtered_df = filtered_df[filtered_df['Agency'].isin(selected_agencies)]
    
    # Calculate agency performance
    agency_performance = calculate_agency_performance(filtered_df)
    
    # Create figure
    fig = px.bar(
        agency_performance, 
        x='Agency', 
        y='Success_Rate',
        title='Agency Mission Success Rates',
        labels={'Success_Rate': 'Success Rate (%)', 'Agency': 'Space Agency'}
    )
    
    return fig


In [79]:
# Machine Learning Prediction Function
def predict_mission_success(model, input_data):
    """
    Predict mission success probability
    
    Args:
        model: Trained machine learning model
        input_data (pd.DataFrame): Mission input features
    
    Returns:
        float: Probability of mission success
    """
    # Preprocess input data
    # Add feature engineering steps here
    
    # Make prediction
    success_prob = model.predict_proba(input_data)[0][1]
    
    return success_prob


In [80]:
# Main Dashboard Creation
def create_dashboard(missions_df):
    """
    Create interactive Dash dashboard
    
    Args:
        missions_df (pd.DataFrame): Preprocessed mission data
    
    Returns:
        dash.Dash: Configured dashboard application
    """
    # Initialize Dash app
    app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
    
    # Dashboard layout
    app.layout = html.Div([
        # Header
        html.H1("Space Mission Analysis Dashboard"),
        
        # Filters
        html.Div([
            # Year range slider
            dcc.RangeSlider(
                id='year-slider',
                min=missions_df['Year'].min(),
                max=missions_df['Year'].max(),
                value=[missions_df['Year'].min(), missions_df['Year'].max()],
                marks={str(year): str(year) for year in range(missions_df['Year'].min(), missions_df['Year'].max()+1, 5)},
                step=1
            ),
            
            # Agency multi-select
            dcc.Dropdown(
                id='agency-dropdown',
                options=[{'label': agency, 'value': agency} for agency in missions_df['Agency'].unique()],
                multi=True,
                placeholder='Select Agencies'
            )
        ]),
        
        # Visualizations
        html.Div([
            # Success Rate Time Series
            dcc.Graph(id='success-time-series'),
            
            # Agency Performance Chart
            dcc.Graph(id='agency-performance-chart')
        ])
    ])
    
    # Callbacks for interactive elements
    @app.callback(
        Output('success-time-series', 'figure'),
        [Input('year-slider', 'value'),
         Input('agency-dropdown', 'value')]
    )
    def update_success_time_series(year_range, selected_agencies):
        # Handle NaN or infinite values in year_range
        cleaned_year_range = [
            int(float(year)) if pd.notna(year) and np.isfinite(year) else int(missions_df['Year'].min())
            for year in (year_range or [missions_df['Year'].min(), missions_df['Year'].max()])
        ]
        return create_success_time_series(
            missions_df, 
            [cleaned_year_range[0], cleaned_year_range[1]], 
            selected_agencies
        )
    
    @app.callback(
        Output('agency-performance-chart', 'figure'),
        [Input('year-slider', 'value'),
         Input('agency-dropdown', 'value')]
    )
    def update_agency_performance_chart(year_range, selected_agencies):
        # Handle NaN or infinite values in year_range
        cleaned_year_range = [
            int(year) if pd.notna(year) and np.isfinite(year) else int(missions_df['Year'].min())
            for year in (year_range or [missions_df['Year'].min(), missions_df['Year'].max()])
        ]
        return create_agency_performance_chart(
            missions_df, 
            [cleaned_year_range[0], cleaned_year_range[1]], 
            selected_agencies
        )
    
    return app

# Main Execution
def main():
    try:
        # Load data
        missions_df = load_and_preprocess_data('/Users/naelamacbookair/desktop backup/self projects/space_mission_analysis/notebooks/data/processed/missions_cleaned_final.csv')
        
        # Create and run dashboard
        app = create_dashboard(missions_df)
        app.run(debug=True)  # Changed from run_server to run
    except KeyError as e:
        print(f"Error: {str(e)}")
        print("Please ensure your dataset contains all required columns: Launch_Date, Agency, Mission_Status")
    except Exception as e:
        print(f"Unexpected error: {str(e)}")

# Run the dashboard
if __name__ == '__main__':
    main()



