In [79]:
from IPython.display import Image
import numpy as np

# Correlation

Correlation is a mesure how close are 2 variables to have a linear relationship

In [80]:
Image(url='https://upload.wikimedia.org/wikipedia/commons/0/02/Correlation_examples.png')

## Pearson product moment correlation coefficient

$$\text{corr}(X,Y) = \frac{\text{cov}(X,Y)}{\sigma_X \sigma_y} = \frac{\mathbb{E}[(X - \mu_X)(Y - \mu_Y)]}{\sigma_X \sigma_y}$$

In [41]:
X = 2.5 * np.random.randn(17) + 1.3
Y = 0.3 * np.random.randn(17) - 0.8

def corr(x, y):
    cx = np.mean(x)
    cy = np.mean(y)
    cov = np.mean((x - cx) * (y - cy))
    return cov / (np.std(x) * np.std(y))
    

c1 = np.corrcoef(X, Y)[0, 1]
c2 = corr(X, Y)
print(c1)
print(c2)

-0.037154431652671724
-0.03715443165267179


When $X$ and $Y$ are standardized vectors:

$$\text{corr}(X,Y) = \frac{\langle X, Y \rangle}{N}$$

In [78]:
X = 2.5 * np.random.randn(17) + 1.3
Y = 0.3 * np.random.randn(17) - 0.8

Xs = (X - np.mean(X)) / np.std(X)
Ys = (Y - np.mean(Y)) / np.std(Y)

c1 = np.corrcoef(Xs, Ys)[0, 1]
c2 = np.corrcoef(X, Y)[0, 1]
c3 = (Xs @ Ys) / len(X)
print(c1)
print(c2)
print(c3)

-0.10537373822472725
-0.10537373822472716
-0.10537373822472715
