In [1]:
import warnings; warnings.filterwarnings('ignore')
import numpy as np
import oxyba as ox; from importlib import reload; reload(ox);
from time import perf_counter, process_time

### Load Demo Dataset

In [2]:
from sklearn.datasets import load_boston
tmp = load_boston()
num_obs = len(tmp.target);
y = tmp.target
X = np.c_[ np.ones(shape=(num_obs,1)), tmp.data[:,[5,12]] ];

### Matrix Inverse with QR Decomposition
The regressions coefficients 

$$
\hat{\beta} = (X^T X)^{-1} - (X^T y)
$$

will be estimated by compute the inverse matrix $A^{-1} = (X^T X)^{-1}$ with QR Decomposition $A = Q R$

$$
A^{-1} = R^{-1} Q^T
$$

The advantage is that $R$ is a triangular matrix that can be easily inverted.

In [3]:
from numpy import dot
from numpy.linalg import qr
q,r = qr(dot(X.T, X));
r

array([[  -7166.58642613,  -43760.55990978, -112995.6644441 ],
       [      0.        ,    -919.5901397 ,   12941.66875798],
       [      0.        ,       0.        ,     307.21651709]])

In [4]:
from numpy.linalg import inv
x2inv = dot(inv(r), q.T)
x2inv.round(3)

array([[ 0.328, -0.045, -0.003],
       [-0.045,  0.006,  0.   ],
       [-0.003,  0.   ,  0.   ]])

As comparision

In [5]:
inv(dot(X.T, X)).round(3)

array([[ 0.328, -0.045, -0.003],
       [-0.045,  0.006,  0.   ],
       [-0.003,  0.   ,  0.   ]])

### Test Implementations
The first implementation is the same as described above.
It uses Numpy's [qr](https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.qr.html).

In [6]:
def func1_pretty(y,X):
    from numpy import dot, diag
    from numpy.linalg import qr, inv
    q,r = qr(dot(X.T, X));
    x2inv = dot(inv(r), q.T);
    return dot(x2inv, dot(X.T,y)) 

In [7]:
def func1_faster(y,X):
    import numpy as np
    q,r = np.linalg.qr(np.dot(X.T, X));
    return np.dot(np.dot(np.linalg.inv(r), q.T), np.dot(X.T,y));

The second implementation is statsmodels `OLS.fit` that has the option to apply QR decomposition.

In [8]:
def func2_pretty(y,X):
    from statsmodels.api import OLS;
    estim = OLS(y,X).fit(method='qr');
    return estim.params;

In [9]:
def func2_faster(y, X):
    import statsmodels.api as sm
    return sm.OLS(y,X).fit(method='qr').params;

### Benchmarking

In [10]:
trials = 50000
funcnames = [func1_pretty, func1_faster, 
             func2_pretty, func2_faster,
             ox.linreg_ols_qr,
             ox.linreg_ols_lu]

In [11]:
for func in funcnames:
    beta = func(y,X);
    print(func.__name__, beta);

func1_pretty [-1.35827281  5.09478798 -0.64235833]
func1_faster [-1.35827281  5.09478798 -0.64235833]
func2_pretty [-1.35827281  5.09478798 -0.64235833]
func2_faster [-1.35827281  5.09478798 -0.64235833]
linreg_ols_qr [-1.35827281  5.09478798 -0.64235833]
linreg_ols_lu [-1.35827281  5.09478798 -0.64235833]


Check.

In [12]:
print('{0:6s} {1:6s} {2:s}'.format('Clock', 'CPU', 'function name'))
for func in funcnames:
    sh,sc = perf_counter(), process_time();
    for i in range(trials):
        beta = func(y,X); 
        if beta is None: print('error solving')
        beta = None;
    eh,ec = perf_counter(), process_time()
    print('{0:.4f} {1:.4f} {2:s}'.format(eh-sh, ec-sc, func.__name__))

Clock  CPU    function name
8.4616 7.9238 func1_pretty
7.2205 7.1730 func1_faster
30.1979 30.0159 func2_pretty
29.9465 29.7724 func2_faster
7.1734 7.1510 linreg_ols_qr
2.1842 2.1756 linreg_ols_lu


I picked `func1_faster` for `linreg_ols_qr`.