# Invariant Causal Prediction

## Synthetic example

In [1]:
import numpy as np
import pandas as pd
import icpy

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

environments = [0.1, 1]
POPULATION = 10000

data = pd.DataFrame()
data["environment"] = np.concatenate(
    [int(POPULATION/2) * [environment] for environment in environments],
    axis=0)

data["x_1"] = data.apply(lambda x: np.random.normal(0, 1) * x["environment"], axis=1)
data["y"] = data.apply(lambda x: 1 * x["x_1"] + np.random.normal(0, 1), axis=1)
data["x_2"] = data.apply(lambda x: x["y"] + np.random.normal(0, 1), axis=1)

X = data[["x_1", "x_2", "environment"]].values
y = data["y"].values

In [2]:
icpy.invariant_causal_prediction(X=X[:, :-1], y=y, z=X[:, -1])

ICP(S_hat=array([0]), q_values=array([8.25318014e-20, 1.00000000e+00]), p_value=1.334259940447282)

## Confounding 

In [3]:
data["c_1"] = data.apply(lambda x: np.random.normal(0, 1) * x["environment"], axis=1)
data["x_1"] = data.apply(lambda x: x["c_1"] + np.random.normal(0, 1) * x["environment"], axis=1)
data["y"] = data.apply(lambda x: 1 * x["x_1"] + 0.5 * x["c_1"] + np.random.normal(0, 1), axis=1)

In [4]:
X = data[["x_1", "x_2", "c_1", "environment"]].values
y = data["y"].values

In [5]:
icpy.invariant_causal_prediction(X=X[:, :-1], y=y, z=X[:, -1])

ICP(S_hat=array([0, 2]), q_values=array([1.57677934e-122, 1.00000000e+000, 4.09591474e-003]), p_value=2.1304361682130377)

In [6]:
X = data[["x_1", "x_2", "environment"]].values
y = data["y"].values

In [7]:
icpy.invariant_causal_prediction(X=X[:, :-1], y=y, z=X[:, -1])

ICP(S_hat=array([0, 1]), q_values=array([0.        , 0.00409591]), p_value=0.0040959147377446275)