# Joint vs Individual Linear Regression Loadings

First, let's establish some notation. Define $\texttt{linreg}$ such that e.g. $\texttt{linreg}(y,\, \{1, x_1, x_2\})$ means the vector of MLE coefficients from a linear regression of $y$ on a constant (AKA "intercept"), $x_1$, and $x_2$. This vector has the same dimension as the regressors, e.g. in this case $(\beta_0,\, \beta_1,\, \beta_2)$ has dimension 3.

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

def gen_data(mean=(0, 0), std=(1, 1), corr=0, n=10_000_000, seed=42):
    mean = pd.Series({"x1": mean[0], "x2": mean[1], "white_noise": 0})

    std = pd.Series({"x1": std[0], "x2": std[1], "white_noise": 1})
    # diagonal matrix with std's on the diagonal
    std_ = pd.DataFrame(np.diag(std), index=std.index, columns=std.index)

    corr = pd.DataFrame({
        "x1": {"x1": 1, "x2": corr, "white_noise": 0},
        "x2": {"x1": corr, "x2": 1, "white_noise": 0},
        "white_noise": {"x1": 0, "x2": 0, "white_noise": 1}
    })

    cov = std_ @ corr @ std_

    np.random.seed(seed=seed)
    df = pd.DataFrame(np.random.multivariate_normal(mean=mean, cov=cov, size=n), columns=mean.index)
    df.loc[:, "x1+x2"] = df["x1"] + df["x2"]
    df.loc[:, "y"] = df["x1+x2"] + df["white_noise"]
    return df[["y", "x1", "x2", "x1+x2", "white_noise"]]


def linreg(y, x, decimals=2):
    x = sm.add_constant(x)
    return np.round(sm.OLS(endog=y, exog=x, hasconst=True).fit().params, decimals=decimals)

In [2]:
df = gen_data()
linreg(y=df["y"], x=df["x2"])

const    0.0
x2       1.0
dtype: float64

## Experiment 0

## Experiment 1

## Experiment 2

## Experiment 3