In [2]:
!pip install dash pandas

Defaulting to user installation because normal site-packages is not writeable


In [4]:
import pandas as pd
import dash
from dash import html, dcc
from dash.dependencies import Input, Output
import plotly.express as px

In [6]:
# Load data
spacex_df = pd.read_csv("spacex_launch_dash.csv")
max_payload = spacex_df['Payload Mass (kg)'].max()
min_payload = spacex_df['Payload Mass (kg)'].min()

# View first few rows
spacex_df.head()

Unnamed: 0.1,Unnamed: 0,Flight Number,Launch Site,class,Payload Mass (kg),Booster Version,Booster Version Category
0,0,1,CCAFS LC-40,0,0.0,F9 v1.0 B0003,v1.0
1,1,2,CCAFS LC-40,0,0.0,F9 v1.0 B0004,v1.0
2,2,3,CCAFS LC-40,0,525.0,F9 v1.0 B0005,v1.0
3,3,4,CCAFS LC-40,0,500.0,F9 v1.0 B0006,v1.0
4,4,5,CCAFS LC-40,0,677.0,F9 v1.0 B0007,v1.0


In [8]:
# Create a dash application
app = dash.Dash(__name__)
# Create an app layout
app.layout = html.Div(children=[
    html.H1('SpaceX Launch Records Dashboard',
            style={'textAlign': 'center', 'color': '#503D36',
                   'font-size': 40}),

    # TASK 1: Add dropdown
    dcc.Dropdown(id='site-dropdown',
                 options=[
                     {'label': 'All Sites', 'value': 'ALL'}
                 ] + [{'label': site, 'value': site} for site in spacex_df['Launch Site'].unique()],
                 value='ALL',
                 placeholder="Select a Launch Site",
                 searchable=True),
    
    html.Br(),

    # TASK 2: Add pie chart
    html.Div(dcc.Graph(id='success-pie-chart')),
    html.Br(),

    # TASK 3 will go here later: slider
    html.P("Payload range (Kg):"),
    dcc.RangeSlider(
    id='payload-slider',
    min=0,                  # slider starts at 0 kg
    max=10000,              # slider ends at 10,000 kg
    step=1000,              # move in 1000-kg increments
    marks={                 # tick labels
        0: '0',
        2500: '2.5k',
        5000: '5k',
        7500: '7.5k',
        10000: '10k'
    },
    value=[int(min_payload), int(max_payload)]  # default selection = data range
),
    # TASK 4 will go here later: scatter chart
    html.Div(dcc.Graph(id='success-payload-scatter-chart')),
])

@app.callback(
    Output('success-pie-chart', 'figure'),
    Input('site-dropdown', 'value')
)
def update_pie_chart(selected_site):
    if selected_site == 'ALL':
        df_all = spacex_df.groupby('Launch Site', as_index=False)['class'].sum()
        fig = px.pie(df_all, names='Launch Site', values='class',
        title='Total Successful Launches by Site')
    else:
        df_site = spacex_df[spacex_df['Launch Site'] == selected_site]
        counts = (df_site['class'].value_counts()
                  .rename_axis('Outcome')
                  .reset_index(name='count'))
        counts['Outcome'] = counts['Outcome'].map({1: 'Success', 0: 'Failure'})
        fig = px.pie(counts, names='Outcome', values='count',
                     title=f'Success vs Failure for {selected_site}',
                     color='Outcome',
                     color_discrete_map={'Success': 'green', 'Failure': 'red'})
    fig.update_traces(textinfo='percent+label')
    return fig
@app.callback(
    Output('success-payload-scatter-chart', 'figure'),
    Input('site-dropdown', 'value'),
    Input('payload-slider', 'value')
)
def update_scatter(selected_site, payload_range):
    low, high = payload_range

    # filter by payload range
    df = spacex_df[(spacex_df['Payload Mass (kg)'] >= low) &
                   (spacex_df['Payload Mass (kg)'] <= high)]

    # optional filter by site
    if selected_site != 'ALL':
        df = df[df['Launch Site'] == selected_site]
        title = f'Payload vs Outcome for {selected_site}'
    else:
        title = 'Payload vs Outcome for All Sites'

    color_col = 'Booster Version Category' if 'Booster Version Category' in df.columns else 'Launch Site'

    fig = px.scatter(
        df,
        x='Payload Mass (kg)', y='class',
        color=color_col,
        hover_data=['Launch Site', 'Payload Mass (kg)', 'class'],
        title=title
    )
    fig.update_yaxes(tickmode='array', tickvals=[0,1], ticktext=['Failure (0)','Success (1)'])
    return fig


app.run(debug=True, port=8050)

In [38]:
# Success rate per site = successes / total launches
site_stats = (
    spacex_df.groupby('Launch Site')['class']
             .agg(total_launches='count', successes='sum')
)
site_stats['success_rate'] = site_stats['successes'] / site_stats['total_launches']

# See all sites sorted by success rate (highest first)
site_stats_sorted = site_stats.sort_values('success_rate', ascending=False)
print(site_stats_sorted)

# The top site and its rate:
top_site = site_stats_sorted.index[0]
top_rate = site_stats_sorted.iloc[0]['success_rate']
print(f"Highest success rate: {top_site} — {top_rate:.1%}")

              total_launches  successes  success_rate
Launch Site                                          
KSC LC-39A                13         10      0.769231
CCAFS SLC-40               7          3      0.428571
VAFB SLC-4E               10          4      0.400000
CCAFS LC-40               26          7      0.269231
Highest success rate: KSC LC-39A — 76.9%


In [40]:
import pandas as pd

# 1) Define payload bins (adjust if you want finer/coarser bins)
bins  = [0, 1000, 2500, 5000, 7500, 10000]
labels = ["0–1k", "1–2.5k", "2.5–5k", "5–7.5k", "7.5–10k"]

df = spacex_df.copy()
df = df[df['Payload Mass (kg)'].notna()]  # drop NaNs
df['PayloadBin'] = pd.cut(df['Payload Mass (kg)'], bins=bins, labels=labels, include_lowest=True)

# 2) Compute totals and successes (class=1)
by_bin = (df.groupby('PayloadBin')['class']
            .agg(total_launches='count', successes='sum'))
by_bin['success_rate'] = by_bin['successes'] / by_bin['total_launches']

# 3) (Optional) require a minimum number of launches to avoid “1 point = 100%” traps
min_n = 5
by_bin_filtered = by_bin[by_bin['total_launches'] >= min_n].sort_values('success_rate', ascending=False)

print("All sites — success rate by payload bin")
print(by_bin.sort_values('success_rate', ascending=False))
print("\nWith at least", min_n, "launches:")
print(by_bin_filtered)

# 4) If you want to do it per site as well:
per_site = (df.groupby(['Launch Site','PayloadBin'])['class']
              .agg(total_launches='count', successes='sum'))
per_site['success_rate'] = per_site['successes'] / per_site['total_launches']

print("\nPer site (sample):")
print(per_site.sort_values(['Launch Site','success_rate'], ascending=[True,False]).head(20))

All sites — success rate by payload bin
            total_launches  successes  success_rate
PayloadBin                                         
7.5–10k                  5          3      0.600000
2.5–5k                  21         12      0.571429
1–2.5k                  11          5      0.454545
5–7.5k                   9          2      0.222222
0–1k                    10          2      0.200000

With at least 5 launches:
            total_launches  successes  success_rate
PayloadBin                                         
7.5–10k                  5          3      0.600000
2.5–5k                  21         12      0.571429
1–2.5k                  11          5      0.454545
5–7.5k                   9          2      0.222222
0–1k                    10          2      0.200000

Per site (sample):
                         total_launches  successes  success_rate
Launch Site  PayloadBin                                         
CCAFS LC-40  1–2.5k                   8          3     







In [42]:
print("points:", len(df))  # inside update_scatter, after filtering

points: 56
