# Going from linear regression parameters to correlation

In [40]:
import numpy as np
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
%matplotlib inline

Let's generate some data.

In [19]:
N = 100
X = np.random.normal(0, 2, (N, 1))
X = np.hstack((np.ones((N, 1)), X))
y = np.random.normal(0, 1, N)

b = np.linalg.lstsq(X, y, rcond=None)[0]

What is the correlation between the predictor and the target? 

In [20]:
corr, p = pearsonr(X[:, 1], y)
print("Correlation: %.5f (p = %.3f)" % (corr, p))

Correlation: -0.15013 (p = 0.136)


Now, we can also calculate the correlation from the linear regression parameter ($\beta_{j}$) directly:

\begin{align}
r_{X_{j}, y} = \hat{\beta}_{j} \cdot \frac{\sigma_{X_{j}}}{\sigma_{y}}
\end{align}

which holds only for bivariate regression.

In [21]:
corr_from_beta = b[1] * (X[:, 1].std() / y.std())
print("Correlation calculated from parameter: %.5f" % corr_from_beta)

Correlation calculated from parameter: -0.15013


In [46]:
def semipartial_pearsonr(X, y, idx=0):
    other_idx = np.ones(X.shape[1]).astype(bool)
    other_idx[idx] = False
    other_X = X[:, other_idx]
    this_X = X[:, idx]
    resid_y = y - other_X.dot(np.linalg.lstsq(other_X, y, rcond=None)[0])
    return pearsonr(resid_y, this_X)[0]
    #resid_X = this_X - other_X.dot(np.linalg.lstsq(other_X, this_X, rcond=None)[0])
    #return pearsonr(resid_X, y)[0]

def partial_pearsonr(X, y, idx=0):
    other_idx = np.ones(X.shape[1]).astype(bool)
    other_idx[idx] = False
    other_X = X[:, other_idx]
    this_X = X[:, idx]
    resid_y = y - other_X.dot(np.linalg.lstsq(other_X, y, rcond=None)[0])
    resid_X = this_X - other_X.dot(np.linalg.lstsq(other_X, this_X, rcond=None)[0])
    return pearsonr(resid_y, resid_X)[0]

In [48]:
X = np.random.normal(0, 2, (N, 2))
X = np.hstack((np.ones((N, 1)), X))
X[:, 1:] = (X[:, 1:] - X[:, 1:].mean(axis=0)) / X[:, 1:].std(axis=0)
y = np.random.normal(0, 1, N)

print(semipartial_pearsonr(X, y, idx=1))
print(partial_pearsonr(X, y, idx=1))
print(pearsonr(X[:, 1], y)[0])
b = np.linalg.lstsq(X, y, rcond=None)[0]
print(b[1])

0.20251130419172506
0.20267002428413428
0.20506437871773314
0.1770180139750527
