## Causal Inference with Factorial Design

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [5]:
import numpy as np
import statsmodels.api as sm
from sklearn import preprocessing

### Set up FactorialModel class

In [6]:
class FactorialModel(object):
    def __init__(
        self,
        n,
        p_t=0.5,
        k=2,
        degree=2,
        sigma=0.1,
        sparsity=0.5,
        beta_seed=42,
    ) -> None:
        self.n = n
        self.p_t = p_t
        self.k = k
        self.degree = degree
        self.sigma = sigma
        self.sparsity = sparsity
        self.beta_seed = beta_seed
        # initialize beta random number generator
        self.rng_beta = np.random.default_rng(self.beta_seed)
        # initialize interaction expansion transformation
        self.xfm = preprocessing.PolynomialFeatures(
            degree=self.degree, interaction_only=True, include_bias=True
        )
        _ = self.xfm.fit_transform(np.zeros((1, self.k), dtype="float32"))
        # sample ground truth betas
        self.beta = self.rng_beta.normal(0, 1, self.xfm.n_output_features_).astype(
            "float32"
        )
        zero_indices = self.rng_beta.choice(
            self.xfm.n_output_features_,
            size=int(self.xfm.n_output_features_ * self.sparsity),
            replace=False,
        )
        self.beta[zero_indices] = 0.0

    def sample(self, seed=None):
        self.rng = np.random.default_rng(seed)
        # sample treatment array
        t = self.rng.binomial(1, self.p_t, (self.n, self.k)).astype("float32")
        # expand treatment array
        T = self.xfm.fit_transform(t)
        # build response surface
        self.mu = T @ self.beta
        # sample outcome
        self.eps = self.rng.normal(0, self.sigma, size=self.n)
        y = self.mu + self.eps
        return t, y

### Initialize model parameters (betas)

In [7]:
n = 1000
k = 3
degree = 3
sigma = 0.1
sparsity = 0.5

fm = FactorialModel(
    n=n,
    k=k,
    degree=degree,
    sigma=sigma,
    sparsity=sparsity,
    beta_seed=42,
)

In [8]:
print(fm.beta)

[ 0.        -1.0399841  0.         0.        -1.9510351  0.
  0.1278404 -0.3162426]


For `degree=3` and `interaction_only=True`, the parameters appear in the following order: [bias, $\beta_{t_1}$, $\beta_{t_2}$, $\beta_{t_3}$, $\beta_{t_1, t_2}$, $\beta_{t_1, t_3}$, $\beta_{t_2, t_3}$, $\beta_{t_1, t_2, t_3}$]

### Create a sample dataset

In [9]:
t, y = fm.sample(seed=23)
print(t.shape, y.shape)

(1000, 3) (1000,)


### Fit OLS model with sample data

In [10]:
T = preprocessing.PolynomialFeatures(
    degree=degree, interaction_only=True, include_bias=True,
).fit_transform(t)
print(T.shape)

(1000, 8)


In [11]:
m = sm.OLS(y, T)
results = m.fit()
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.994
Model:                            OLS   Adj. R-squared:                  0.994
Method:                 Least Squares   F-statistic:                 2.245e+04
Date:                Fri, 23 Feb 2024   Prob (F-statistic):               0.00
Time:                        20:24:35   Log-Likelihood:                 887.06
No. Observations:                1000   AIC:                            -1758.
Df Residuals:                     992   BIC:                            -1719.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0141      0.009      1.492      0.1