<a href="https://colab.research.google.com/github/pranjalrawat007/Econometrics/blob/main/OLS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from numpy.random import normal, seed
from numpy import dot, shape, identity, round, sqrt, var, delete, diagonal, where, zeros, array, var
from numpy.linalg import inv, matrix_rank
from scipy import stats

# Generate Data
seed(1)
n, k = 100, 10
β = normal(10, 1, (k, 1)) 
X = normal(0, 1, (n, k))
ε = normal(0, 1, (n, 1))
y = dot(X, β) + ε
X[:, 0:3] = normal(0, 1, (n, 3)) # noise
print(X.shape, y.shape)

# Random Estimates
n, k = X.shape[0], X.shape[1]
b = normal(0, 1, (k, 1)) 
ŷ = dot(X, b)
e = y - ŷ
RSS = dot(e.T,e)
print(RSS)

# Least Squares
n, k = X.shape[0], X.shape[1]
A = inv(dot(X.T, X))
b = dot(A, dot(X.T, y))
ŷ = dot(X, b)
e = y - ŷ
RSS = dot(e.T, e)
P = dot(dot(X, dot(X.T, X)), X.T) #Projection
M = identity(X.shape[0]) - P # Annihilation
TSS = dot(y.T,y) # Total Sum of Squares
ESS = dot(ŷ.T, ŷ) # Explained Sum of Squares
s = sqrt(RSS/(n-k)) # Estimate of SE of Unobserved
b_V = (s ** 2) * inv(dot(X.T, X))
b_se = diagonal(s * sqrt(where(A<0,0,A))).reshape(-1,1) # Estimate of SE of b
t = b/b_se.reshape(-1,1)
R2_UC = ESS/TSS # done with no intercept, to avoid less than zero R2.
R2 =  1 - RSS/(var(y)*n) # done when intercept is included, good!

# Influence of a single row
ia = normal(0,1,(n,))
for i in range(n):
	x_ = X[i, :]
	ia[i] = dot(dot(x.T, A), x)

# Hypothesis testing under Normality assumption
# Individual tTests on Coefficients 

b_test = zeros((10,1)) # our guess of true Beta i.e Null Hyp
df = n-k # degree of freedom
tstat = (b - b_test)/b_se # t-statistic for Null Hyp
α = 0.05 # level of signifiance
c = stats.t.ppf(1-α/2, df) # t-critical values
where(abs(tstat)>c,1,0) # confirm or reject
lower_conf = b - b_se * c # Confidence intervals
upper_conf = b + b_se * c

# P-values - give true null, the prob of obtaining a more extreme t than currently obtained
cdf_bel = stats.t.cdf(abs(tstat), df) # prob of less extreme t
cdf_abv = 1 - cdf_bel # prob of more extreme t
p = cdf_abv*2 # as this is a two tailed test
round(p, 2)

# Wald F-Test for systemic hypothesis "Ho: dot(R,β) = r"
# our Ho: all coeffs are 0
R = identity(10)
r = zeros((1,10)).T
print(R)
print(r)

t1 = dot(R,b) - r
t2 = inv(dot(R, dot(b_varcov, R.T)))
Fstat = dot(t1.T, dot(t2,t1))/(matrix_rank(R))
cdf_bel = stats.f.cdf(Fstat, matrix_rank(R), n - k)
cdf_abv = 1 - cdf_bel # one tailed test
p = cdf_abv*2
print(p, Fstat)

# GLS
X_V = cov(X)

# Check
from statsmodels.api import OLS
model = sm.OLS(y, X)
result = model.fit()
result.summary()

R2

(100, 10) (100, 1)
[[78970.36727346]]


NameError: ignored