In [55]:
from datasets import titanic_data

import statsmodels.api as sm
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
from scipy.special import expit
import pandas as pd
import numpy as np

from tools import roc

import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
original_X, original_y, train_X, train_y, test_X, test_y = titanic_data()

# Simple Logistic Regression

## Estimate test error by 5-fold cross validation

In [16]:
model = LogisticRegression(penalty='none')
cross_val_score(model, original_X['fare'].values.reshape(-1, 1), original_y).mean()

0.6645031699202812

## Fit Logistic Regression model

In [27]:
# add constant, since statsmodels does not add it by default
original_X_const = sm.add_constant(original_X['fare'])
model = sm.Logit(original_y, original_X_const)

result = model.fit()

Optimization terminated successfully.
         Current function value: 0.627143
         Iterations 6


In [28]:
result.summary()

0,1,2,3
Dep. Variable:,target,No. Observations:,891.0
Model:,Logit,Df Residuals:,889.0
Method:,MLE,Df Model:,1.0
Date:,"Wed, 16 Jun 2021",Pseudo R-squ.:,0.05822
Time:,13:47:25,Log-Likelihood:,-558.78
converged:,True,LL-Null:,-593.33
Covariance Type:,nonrobust,LLR p-value:,9.427e-17

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.4519,0.072,-6.257,0.000,-0.593,-0.310
fare,0.7548,0.111,6.809,0.000,0.537,0.972


## Model plot

In [58]:
df = pd.DataFrame(original_X['fare'].ravel(), columns=['fare'])
df['probability of death'] = original_y
fig = px.scatter(df, x='fare', y='probability of death')

X_test = np.linspace(-5, 10, 100)
loss = expit(X_test * model.coef_ + model.intercept_).ravel()
fig.add_trace(px.line(pd.DataFrame(zip(X_test, loss), columns=['x','y']), x='x', y='y').data[0])
fig.update_traces(line_color='red')

## Confusion matrix and ROC

In [59]:
model = LogisticRegression(penalty='none')
model = model.fit(original_X['fare'].values.reshape(-1, 1), original_y)

In [60]:
tn, fp, fn, tp = confusion_matrix(original_y, model.predict(original_X['fare'].values.reshape(-1, 1)).ravel()).ravel()

f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}"

'TP: 82, TN: 511, FP: 38, FN: 260'

In [61]:
roc(model, original_X['fare'].values.reshape(-1, 1), original_y).show()

# Multiple Logistic Regression

## Estimate test error by 5-fold cross validation

In [76]:
model = LogisticRegression(penalty='none')
cross_val_score(model, original_X, original_y).mean()

0.78788525516289

## Fit Logistic Regression model

In [77]:
# add constant, since statsmodels does not add it by default
original_X_const = sm.add_constant(original_X)
model = sm.Logit(original_y, original_X_const)

result = model.fit(maxiter=3000)

Optimization terminated successfully.
         Current function value: 0.440810
         Iterations 1271


In [78]:
result.summary()

0,1,2,3
Dep. Variable:,target,No. Observations:,891.0
Model:,Logit,Df Residuals:,882.0
Method:,MLE,Df Model:,8.0
Date:,"Wed, 16 Jun 2021",Pseudo R-squ.:,0.338
Time:,13:59:39,Log-Likelihood:,-392.76
converged:,True,LL-Null:,-593.33
Covariance Type:,nonrobust,LLR p-value:,1.073e-81

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.6486,0.092,-7.075,0.000,-0.828,-0.469
pclass,-0.9198,0.120,-7.665,0.000,-1.155,-0.685
sex,1.3034,0.096,13.611,0.000,1.116,1.491
age,-0.5086,0.102,-4.994,0.000,-0.708,-0.309
sibsp,-0.3586,0.120,-2.984,0.003,-0.594,-0.123
parch,-0.0730,0.095,-0.765,0.444,-0.260,0.114
fare,0.0985,0.118,0.833,0.405,-0.133,0.330
embarked_C,0.0780,3.22e+06,2.42e-08,1.000,-6.3e+06,6.3e+06
embarked_Q,0.0388,2.31e+06,1.68e-08,1.000,-4.53e+06,4.53e+06


## Confusion matrix and ROC

In [80]:
model = LogisticRegression(penalty='none')
model = model.fit(original_X, original_y)

In [82]:
tn, fp, fn, tp = confusion_matrix(original_y, model.predict(original_X).ravel()).ravel()

f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}"

'TP: 240, TN: 471, FP: 78, FN: 102'

In [83]:
roc(model, original_X, original_y).show()