In [8]:
import sys
import numpy as np
sys.path.append('..')
from stats_module import OLS, LinearModelTester

In [9]:
#generate a random dataset
np.random.seed(0)
n = 20
p = 3
X = np.random.randn(n, p)

#generate y linearly dependent on X
y = X @ np.array([1, 2, 3]) + np.random.randn(n)


#fit an OLS estimator
model = OLS(include_intercept=True)
model.fit(X, y)

summary = model.summary(X,y)
print(summary)


{'coefficients': array([-0.41221599,  0.93623639,  2.38843176,  2.99360915]), 'r_squared': np.float64(0.9786838149519005)}


In [10]:
#hypothesis testing on the coefficients

tester = LinearModelTester(model)
H0 = np.array([0, 1, 2, 3])
alpha = 0.05

# test H0 for each coefficient

results = tester.hypothesis_t_test(X, y, H0, alpha)
for result in results:
    print(f"Coefficient {result['coefficient']}:")
    print(f"  Estimated: {result['beta_estimate']}")
    print(f"  Null value: {result['null_value']}")
    print(f"  t-stat: {result['t_stat']}")
    print(f"  p-value: {result['p_value']}")
    print(f"  Reject null: {result['reject_null']}")

# build confidence intervals for coefficients
results = tester.confidence_interval(X, y, alpha)
for result in results:
    print('--'*40)
    print(f"Coefficient {result['coefficient']}:")
    print(f"  Estimated: {result['beta_estimate']}")
    print(f"  Confidence interval: [{result['confidence_lower']}, {result['confidence_upper']}]")

# hypothesis testing on linear combinations of coefficients
R = np.array([
    [0, 0, 1, 0],
    [0, 1, 0, 1]
])
r = [0, 0] 

# H0 = Rbeta = r

results = tester.hypothesis_F_test(X, y, R, r, alpha)
print('--'*40)
print(f"F-stat: {results['F_stat']}")
print(f"p-value: {results['p_value']}")
print(f"Reject null: {results['reject_null']}")



Coefficient 0:
  Estimated: -0.41221598820940447
  Null value: 0
  t-stat: -2.440361254246725
  p-value: 0.026682263977906073
  Reject null: True
Coefficient 1:
  Estimated: 0.9362363940133275
  Null value: 1
  t-stat: -0.46340266445100076
  p-value: 0.6493169758975523
  Reject null: False
Coefficient 2:
  Estimated: 2.388431760388897
  Null value: 2
  t-stat: 2.4816537047952063
  p-value: 0.024563519243711474
  Reject null: True
Coefficient 3:
  Estimated: 2.993609153939517
  Null value: 3
  t-stat: -0.03907216395463594
  p-value: 0.9693162326966738
  Reject null: False
--------------------------------------------------------------------------------
Coefficient 0:
  Estimated: -0.41221598820940447
  Confidence interval: [-0.7703018479599026, -0.0541301284589063]
--------------------------------------------------------------------------------
Coefficient 1:
  Estimated: 0.9362363940133275
  Confidence interval: [0.6445401725668481, 1.2279326154598067]
----------------------------------

In [11]:
#generate new point to predict
x_new = np.random.randn(1, p)

In [13]:
alpha = 0.05
result = tester.prediction_interval_m(X, y, x_new, alpha)
print('--'*40)
print(f"Prediction interval for new point m{x_new}:")
print(f"  Estimated m(x_new): {result['mx_new_estimate']}")
print(f"  Confidence interval: [{result['confidence_lower']}, {result['confidence_upper']}]")

result = tester.prediction_interval_y(X, y, x_new, alpha)
print('--'*40)
print(f"Prediction interval for response of new point, y_new:")
print(f"  Confidence interval: [{result['confidence_lower']}, {result['confidence_upper']}]")


--------------------------------------------------------------------------------
Prediction interval for new point m[[-1.16514984  0.90082649  0.46566244]]:
  Estimated m(x_new): 2.0425022606342313
  Confidence interval: [1.4122759816245984, 2.672728539643864]
--------------------------------------------------------------------------------
Prediction interval for response of new point, y_new:
  Confidence interval: [0.44011628066077035, 3.6448882406076923]
