# Efficiency of OLS for linear models with correlated data

In [9]:
import numpy as np
from scipy import linalg
from scipy import stats

In [21]:
def make_covariates(n, cluster_covariates):
    covariates = np.tile(cluster_covariates, n)
    return np.column_stack((np.ones_like(covariates), covariates))

In [22]:
def make_exponential_correlation_matrix(m, rho):
    correlation_matrix = np.eye(m)
    for i in range(m - 1):
        for j in range(i + 1, m):
            correlation_matrix[i, j] = correlation_matrix [j, i] = np.power(rho, j - i)
    return correlation_matrix

make_exponential_correlation_matrix(5, 0.5)

array([[1.    , 0.5   , 0.25  , 0.125 , 0.0625],
       [0.5   , 1.    , 0.5   , 0.25  , 0.125 ],
       [0.25  , 0.5   , 1.    , 0.5   , 0.25  ],
       [0.125 , 0.25  , 0.5   , 1.    , 0.5   ],
       [0.0625, 0.125 , 0.25  , 0.5   , 1.    ]])

In [54]:
n = 10
m = 5
cluster_covariates = [-2., -1., 0., 1., 2.]
X = make_covariates(n, cluster_covariates)
sigma = linalg.block_diag(*(
    [make_exponential_correlation_matrix(len(cluster_covariates), 0.1)]*n))

In [55]:
tmp1 = np.diag(linalg.inv(X.T.dot(linalg.inv(sigma)).dot(X)))

In [56]:
tmp2 = np.diag(linalg.inv(X.T.dot(X)).dot(X.T.dot(sigma).dot(X)).dot(linalg.inv(X.T.dot(X))))

In [57]:
np.round(tmp1/tmp2, 3)

array([0.998, 0.997])

In [47]:
0.01153846/0.012

0.9615383333333334

In [41]:
0.09309493/0.10741723

0.8666666418413508