In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy.stats import chi2
from sklearn import metrics

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
sns.set_theme()
plt.rcParams['figure.figsize'] = [8,8]

In [None]:
throat = pd.read_csv("../datasets/throat.csv")
throat

In [None]:
pi_null = throat["SoreThroat"].mean()
pi_null

In [None]:
throat_null = np.log( pi_null / (1 - pi_null) )
throat_null

In [None]:
throat_model = smf.glm("SoreThroat ~ Duration", data=throat, family=sm.families.Binomial())
throat_fit = throat_model.fit()
throat_fit.params

In [None]:
1 - chi2.cdf(throat_fit.null_deviance - throat_fit.deviance, df=1)

In [None]:
transition = -throat_fit.params[0] / throat_fit.params[1]
transition

In [None]:
sns.regplot(data=throat, x="Duration", y="SoreThroat", logistic=True, ci=None)
plt.axvline( transition, linestyle='--')
plt.xlim(10,140)
# plt.savefig("throat_fit.png")

In [None]:
R2_M = 1 - (throat_fit.deviance / throat_fit.null_deviance )
R2_M

In [None]:
throat["Null_residual"] = throat["SoreThroat"] - pi_null
throat["Residual"] = throat["SoreThroat"] - throat_fit.fittedvalues
throat["Difference"] = throat_fit.fittedvalues - pi_null
R2_S = np.sum(throat["Difference"]**2) / np.sum(throat["Null_residual"]**2)
R2_S

In [None]:
throat["Fit_prob"] = throat_fit.fittedvalues
sns.displot( data=throat, x="Fit_prob", col="SoreThroat", binwidth=0.2)
# plt.savefig("throat_rd_hist.png")

In [None]:
fit_avgs = throat.groupby("SoreThroat").agg(Fit_average=('Fit_prob','mean'))
fit_avgs

In [None]:
R2_D = fit_avgs["Fit_average"][1] - fit_avgs["Fit_average"][0]
R2_D

In [None]:
print( throat_fit.summary())

In [None]:
throat.groupby("Duration").agg(Number=("Duration","size"),
                               Percentage=("SoreThroat","mean"))

In [None]:
np.sum( throat_fit.resid_response**2 )