In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import scipy.stats as ss
from scipy.interpolate import interp1d
from scipy.special import expit
from fractions import Fraction
from empiricaldist import Pmf, Cdf
import matplotlib.pyplot as plt
from collections import Counter
import statsmodels.formula.api as smfa

In [2]:
from data.utils import make_uniform, make_joint, plot_contour, transform

In [3]:
link = 'https://raw.githubusercontent.com/CamDavidsonPilon/Probabilistic-Programming-and-Bayesian-Methods-for-Hackers/master/Chapter2_MorePyMC/data/challenger_data.csv'
data = pd.read_csv(link, parse_dates=['Date'])
data.rename(columns={'Damage Incident': 'Damage'}, inplace=True)
data.drop(labels=[3, 24], inplace=True)
data['Damage'] = data['Damage'].astype(int)
data.head(), data.shape

(        Date  Temperature  Damage
 0 1981-04-12           66       0
 1 1981-11-12           70       1
 2 1982-03-22           69       0
 4 1982-01-11           68       0
 5 1983-04-04           67       0,
 (23, 3))

In [4]:
offset = round(data['Temperature'].mean(), 1)
data['x'] = data['Temperature'] - offset
data['y'] = data['Damage']

In [5]:
formula = 'y ~ x'
results = smfa.logit(formula, data=data).fit(disp=False)
results.params

Intercept   -1.115625
x           -0.232163
dtype: float64

In [6]:
results.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,23.0
Model:,Logit,Df Residuals:,21.0
Method:,MLE,Df Model:,1.0
Date:,"Fri, 15 Sep 2023",Pseudo R-squ.:,0.2813
Time:,18:07:30,Log-Likelihood:,-10.158
converged:,True,LL-Null:,-14.134
Covariance Type:,nonrobust,LLR p-value:,0.004804

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.1156,0.581,-1.921,0.055,-2.254,0.023
x,-0.2322,0.108,-2.145,0.032,-0.444,-0.020


In [7]:
inter = results.params['Intercept']
slope = results.params['x']
xs = np.arange(53, 83) - offset
log_odds = inter + slope * xs
#odds = np.exp(log_odds)
#ps = odds / (odds + 1)
ps = expit(log_odds)

In [8]:
log_odds, ps

(array([ 2.7382762 ,  2.50611346,  2.27395072,  2.04178797,  1.80962523,
         1.57746248,  1.34529974,  1.11313699,  0.88097425,  0.64881151,
         0.41664876,  0.18448602, -0.04767673, -0.27983947, -0.51200221,
        -0.74416496, -0.9763277 , -1.20849045, -1.44065319, -1.67281594,
        -1.90497868, -2.13714142, -2.36930417, -2.60146691, -2.83362966,
        -3.0657924 , -3.29795515, -3.53011789, -3.76228063, -3.99444338]),
 array([0.93924781, 0.92456929, 0.90669655, 0.88511521, 0.85931657,
        0.82884484, 0.79336013, 0.75271348, 0.70702407, 0.65674259,
        0.60268105, 0.54599114, 0.48808308, 0.43049313, 0.37472428,
        0.32209405, 0.27362105, 0.22996826, 0.19144422, 0.1580491 ,
        0.12954602, 0.10553894, 0.08554356, 0.06904407, 0.05553372,
        0.04454055, 0.03564141, 0.02846733, 0.02270329, 0.01808462]))