In [7]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy import stats

In [8]:
# Step 1: Create a simple dataset
data = {
    'hours_studied': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    'passed': [0, 0, 0, 1, 1, 1, 1, 1, 1, 1]
}

df = pd.DataFrame(data)

In [9]:
# Step 2: Add a constant term to the model (for the intercept)
X = sm.add_constant(df['hours_studied'])
y = df['passed']

In [10]:
# Step 3: Fit a logistic regression model
model = sm.Logit(y, X)
result = model.fit()

         Current function value: 0.000000
         Iterations: 35




In [11]:
# Step 4: Perform the Wald Test
# The Wald test checks if the coefficient for 'hours_studied' is significantly different from zero
wald_test_p_value = result.pvalues['hours_studied']

print("Wald Test p-value for 'hours_studied':", wald_test_p_value)

Wald Test p-value for 'hours_studied': 0.9996019298444231


In [12]:
# Step 5: Likelihood Ratio Test (LRT)
# Fit a simpler model without the 'hours_studied' variable (just the intercept)
null_model = sm.Logit(y, np.ones(len(y))).fit(disp=0)

# Perform the LRT by comparing the full model with the null model
lr_test_stat = -2 * (null_model.llf - result.llf)  # Likelihood Ratio Test statistic
lr_test_p_value = stats.chi2.sf(lr_test_stat, df=1)  # p-value from chi-square distribution

print("Likelihood Ratio Test p-value:", lr_test_p_value)


Likelihood Ratio Test p-value: 0.000473487577224374
