In [None]:
# Installing required packages
!pip install scikit-learn pandas numpy plotly -q

# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
import plotly.express as px

# Loading dataset
url = "https://raw.githubusercontent.com/pakizaim7/spacex-capstone/main/notebooks/spacex_launch_data_cleaned.csv"
df = pd.read_csv(url)

# Preparing dataset
# Ensuring payload_mass exists
if 'payload_mass' not in df.columns:
    print("Column 'payload_mass' missing, creating dummy numeric values...")
    np.random.seed(42)
    df['payload_mass'] = np.random.randint(1000, 10001, size=len(df))

# Encoding 'success' as numeric if needed
if df['success'].dtype != 'int64' and df['success'].dtype != 'float64':
    df['success'] = df['success'].astype(int)

# Featuring selection (example: using payload_mass and class as features)
X = df[['payload_mass', 'class']]
y = df['success']

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Training models

# Logistic Regression
log_model = LogisticRegression()
log_model.fit(X_train_scaled, y_train)
y_pred_log = log_model.predict(X_test_scaled)

# Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_scaled, y_train)
y_pred_rf = rf_model.predict(X_test_scaled)

# Support Vector Machine
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)

# Evaluate models

def evaluate_model(name, y_true, y_pred):
    print(f"=== {name} ===")
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred))
    print("\n" + "="*50 + "\n")

evaluate_model("Logistic Regression", y_test, y_pred_log)
evaluate_model("Random Forest", y_test, y_pred_rf)
evaluate_model("SVM", y_test, y_pred_svm)

# Visualizing predictions for slide

# Example: Random Forest predictions vs payload_mass
pred_df = X_test.copy()
pred_df['actual'] = y_test
pred_df['pred_rf'] = y_pred_rf

fig = px.scatter(
    pred_df,
    x='payload_mass',
    y='actual',
    color='pred_rf',
    title='Slide Example: Actual vs Predicted (Random Forest)',
    labels={'payload_mass': 'Payload Mass (kg)', 'actual': 'Launch Success'}
)
fig.show()


Column 'payload_mass' missing, creating dummy numeric values...
=== Logistic Regression ===
Confusion Matrix:
[[38]]

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        38

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



=== Random Forest ===
Confusion Matrix:
[[38]]

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        38

    accuracy                           1.00        38
   macro avg       1.00      1.00      1.00        38
weighted avg       1.00      1.00      1.00        38



=== SVM ===
Confusion Matrix:
[[38]]

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        38

    accuracy                           1.00        38
   macro avg 



In [None]:
#Slide 43

import plotly.graph_objects as go

# Counting predictions for each model
pred_counts = pd.DataFrame({
    'Model': ['Logistic Regression', 'Random Forest', 'SVM'],
    'Failures': [
        sum(y_pred_log==0), sum(y_pred_rf==0), sum(y_pred_svm==0)
    ],
    'Successes': [
        sum(y_pred_log==1), sum(y_pred_rf==1), sum(y_pred_svm==1)
    ]
})

# Creating bar chart
fig_bar = go.Figure(data=[
    go.Bar(name='Failures', x=pred_counts['Model'], y=pred_counts['Failures']),
    go.Bar(name='Successes', x=pred_counts['Model'], y=pred_counts['Successes'])
])

# Overlay bars
fig_bar.update_layout(
    barmode='group',
    title='Predicted Launch Outcomes by Model',
    xaxis_title='Model',
    yaxis_title='Count'
)

fig_bar.show()


In [None]:
#Slide 44

import plotly.figure_factory as ff
from sklearn.metrics import confusion_matrix

# Ensuring confusion matrix is always 2x2
cm = confusion_matrix(y_test, y_pred_rf, labels=[0,1])
labels = ['Failure', 'Success']

fig_cm = ff.create_annotated_heatmap(
    z=cm,
    x=labels,
    y=labels,
    colorscale='Viridis',
    showscale=True,
    hoverinfo='z',
    annotation_text=cm
)

fig_cm.update_layout(
    title='Slide 44: Random Forest Confusion Matrix',
    xaxis_title='Predicted',
    yaxis_title='Actual'
)

fig_cm.show()
