# Case Study 3.1: Challenger Launch

In this classic challenge we try to assess whether the data available prior to Challenger going up in flames was sufficient to show that performing the launch at 30 degrees was a terrible idea.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_validate
from sklearn.linear_model import RidgeClassifier, LogisticRegression
from sklearn.pipeline import Pipeline

from interpret import show
from interpret.data import Marginal

In [None]:
data = pd.read_csv('data/challenger-data.csv').drop('Observation', axis=1)
scaler = StandardScaler()
X = scaler.fit_transform(X=data[['X']])
y = data['Y']
# We'll start by visualizing the margainl distribution between X and Y
marginals = Marginal()
show(marginals.explain_data(X=data[['X']], y=data['Y']))

### Modeling Strategy

We end up using simple Logistic Regression and the ridge varient in fitting this data.  After plotting both, it is obvious that launch at 30 degrees is a terrible idea. A property of scikit-learn is that we can only get probabilistic predictions from the LogsisticRegression and not the RidgeClassifier class, but using that I can tell you that it was a rougly 80% chance that Challenger explode when launching in these conditions. 

In [None]:
ridge = RidgeClassifier(alpha=0.1, normalize=True)
cross_validate(estimator=ridge, X=X, y=y, n_jobs=12, return_train_score=True)

In [None]:
ridge.fit(X=X, y=y)
ridge.coef_, ridge.intercept_

In [None]:
def sigmoid(x, model):
    return 1 / (1 + np.exp(-(model.intercept_ + model.coef_*x)))

rng = np.arange(-6, 3, .1)
pred = sigmoid(rng, ridge).reshape(-1)
plt.scatter(X, y)
plt.plot(rng, pred)

In [None]:
plt.scatter(X, y)
plt.plot(rng, pred)

In [None]:
linear = LogisticRegression()
linear.fit(X, y)
def predict_risk(temp):
    on_the_day = scaler.transform(X=[[temp]])
    return linear.predict_proba(on_the_day)[:, 1]

In [None]:
predict_risk(30)

In [None]:
rng = np.arange(-6, 3, .1)
pred = sigmoid(rng, linear).reshape(-1)
plt.scatter(X, y)
plt.plot(rng, pred)