In [1]:
import pandas as pd
import numpy as np
from causal_world import world_generator, show_parameters, sample_parameters, sample

In [2]:
np.random.seed(123)
world = world_generator(
    features=4,
    parameters_override={
        "t_effect": np.array([[10, 0, 0, 0, 0]]).T,
        "daily_samples": 10000
    }
)

In [3]:
data, world = world.get("sampler")()

In [4]:
data.head()

Unnamed: 0,T,x_1,x_2,x_3,x_4,Y
0,1.0,94.45,42.09,-163.6,-51.65,19.13
1,1.0,102.02,25.81,-147.16,-51.49,-12.66
2,1.0,103.17,29.29,-158.38,-50.41,-8.06
3,1.0,96.71,26.99,-158.74,-54.6,-5.03
4,0.0,114.16,26.23,-136.44,-45.06,-34.32


In [5]:
data, world = world.get("sampler")()

In [6]:
data.head()

Unnamed: 0,T,x_1,x_2,x_3,x_4,Y
0,0.0,93.61,23.03,-157.42,-60.4,-16.75
1,1.0,101.82,24.34,-140.58,-48.49,-16.12
2,0.0,94.01,28.95,-150.81,-65.07,-6.68
3,0.0,97.96,25.82,-140.27,-55.6,-18.37
4,0.0,107.16,31.7,-148.79,-48.54,-18.62


In [7]:
show_parameters(world)

time 
 1 

mean 
 [-108.56306033   99.73454466   28.29784981 -150.62947139  -57.8600252 ] 

covariance 
 [[46.65274563  5.82570355 -4.56908375 12.62744759 18.56452602]
 [ 5.82570355 49.88654907  7.24111127  8.20709825  3.51683289]
 [-4.56908375  7.24111127 33.89574108 -1.55437125 -3.10665618]
 [12.62744759  8.20709825 -1.55437125 63.73730212 -5.24548633]
 [18.56452602  3.51683289 -3.10665618 -5.24548633 28.37013779]] 

y_coef 
 [[-0.78680087]
 [ 1.53418437]
 [-0.04889153]
 [-0.34515174]] 

t_effect 
 [[10]
 [ 0]
 [ 0]
 [ 0]
 [ 0]] 

beta0 
 [-6.95981176] 

binary_treatment 
 True 

noise_tempering 
 [4.73343591] 

daily_samples 
 10000 



In [8]:
import statsmodels.formula.api as smf
result = smf.ols('Y ~ T', data=data).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-13.9949,0.134,-104.160,0.000,-14.258,-13.732
T,8.8142,0.200,44.090,0.000,8.422,9.206


In [9]:
result = smf.ols('Y ~ T+x_1+x_2+x_3+x_4+x_2', data=data).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-6.9589,0.002,-3884.465,0.000,-6.962,-6.955
T,9.9999,0.000,8.33e+04,0.000,10.000,10.000
x_1,-0.7868,8.74e-06,-9e+04,0.000,-0.787,-0.787
x_2,1.5342,1.06e-05,1.45e+05,0.000,1.534,1.534
x_3,-0.0489,7.69e-06,-6357.067,0.000,-0.049,-0.049
x_4,-0.3451,1.14e-05,-3.03e+04,0.000,-0.345,-0.345


### Random Sample

In [16]:
def treatment_fn(features):
    return np.random.binomial(1, 0.1, size=(features.shape[0],1))

In [18]:
data, world = world.get("sampler")(treatment_fn=treatment_fn)
data["T"].mean()
# random_sample = sate2.get("sampler")(n=daily_samples, treatment_fn=treatment_fn)

# random_sample.head()

0.098

In [19]:
random_sample.corr()

NameError: name 'random_sample' is not defined

## ATE

In [12]:
import statsmodels.formula.api as smf
result = smf.ols('Y ~ T', data=sample).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,2540.4419,17.459,145.510,0.000,2506.140,2574.744
T,48.9674,25.359,1.931,0.054,-0.856,98.791


In [13]:
result = smf.ols('Y ~ T+x_1+x_2+x_3+x_4+x_2', data=sample).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,576.1815,614.947,0.937,0.349,-632.052,1784.415
T,165.5372,26.294,6.296,0.000,113.875,217.199
x_1,4.8153,2.483,1.939,0.053,-0.063,9.694
x_2,-4.8963,1.700,-2.880,0.004,-8.236,-1.556
x_3,6.6579,1.643,4.052,0.000,3.430,9.886
x_4,-18.1774,2.144,-8.479,0.000,-22.390,-13.965


In [14]:
result = smf.ols('Y ~ T+x_1+x_2+x_3+x_4+x_2', data=random_sample).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,22.7475,682.800,0.033,0.973,-1318.803,1364.298
T,150.5875,26.204,5.747,0.000,99.102,202.073
x_1,1.9730,2.904,0.679,0.497,-3.733,7.679
x_2,-4.5622,1.829,-2.495,0.013,-8.155,-0.969
x_3,11.3375,1.997,5.677,0.000,7.413,15.262
x_4,-18.8083,2.467,-7.623,0.000,-23.656,-13.961


## CATE

In [15]:
result = smf.ols('Y ~ T*(x_1+x_2+x_3+x_4+x_2)', data=sample).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,1135.2239,847.244,1.340,0.181,-529.456,2799.904
T,-935.3820,1237.448,-0.756,0.450,-3366.741,1495.977
x_1,7.2362,3.391,2.134,0.033,0.574,13.898
x_2,-5.5801,2.259,-2.471,0.014,-10.018,-1.142
x_3,2.2210,2.317,0.959,0.338,-2.332,6.774
x_4,-17.5003,2.960,-5.913,0.000,-23.316,-11.685
T:x_1,-5.0779,4.973,-1.021,0.308,-14.849,4.693
T:x_2,1.6436,3.413,0.482,0.630,-5.062,8.349
T:x_3,8.9749,3.277,2.739,0.006,2.536,15.414


In [16]:
result = smf.ols('Y ~ T*(x_1+x_2+x_3+x_4+x_2)', data=random_sample).fit()
result.summary().tables[1]

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-578.5664,901.238,-0.642,0.521,-2349.334,1192.201
T,1269.1134,1340.709,0.947,0.344,-1365.134,3903.361
x_1,1.1599,3.808,0.305,0.761,-6.323,8.643
x_2,-0.9350,2.523,-0.371,0.711,-5.893,4.023
x_3,3.5371,2.651,1.334,0.183,-1.671,8.745
x_4,-21.2612,3.308,-6.426,0.000,-27.762,-14.761
T:x_1,1.3643,5.707,0.239,0.811,-9.849,12.578
T:x_2,-6.0505,3.582,-1.689,0.092,-13.088,0.987
T:x_3,16.3840,3.926,4.173,0.000,8.670,24.098


In [17]:
show_parameters(state1, "y_coef")
show_parameters(state1, "t_effect")

y_coef 
 [[-100]
 [   3]
 [  -2]
 [   4]
 [ -20]] 

t_effect 
 [[100]
 [  0]
 [ -2]
 [ 10]
 [  0]] 



## Evalation

In [18]:
from sklearn.model_selection import train_test_split
result = smf.ols('Y ~ T*(x_1+x_2+x_3+x_4+x_2)', data=random_sample).fit()

In [19]:
state3 = sate2.get("world")(3)
evaluation_set = state3.get("sampler")(n=daily_samples, treatment_fn=treatment_fn)

In [20]:
predictions = (evaluation_set
               .assign(lift=result.predict(evaluation_set.assign(T=1)).values -
                            result.predict(evaluation_set.assign(T=0)).values))

In [21]:
(predictions
 .assign(lift_band = pd.qcut(predictions["lift"], 5))
 .groupby(["lift_band", "T"])
 [["Y"]]
 .mean()
 .reset_index()
 .pivot(index='lift_band', columns='T', values='Y')
 .assign(true_lift = lambda d: d[1.0] - d[0.0])
)

T,0.0,1.0,true_lift
lift_band,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"(-293.35999999999996, 23.71]",2486.421277,2543.074906,56.653629
"(23.71, 100.885]",2526.553673,2583.875882,57.322209
"(100.885, 175.975]",2481.569574,2669.000189,187.430614
"(175.975, 270.065]",2535.781923,2731.205208,195.423285
"(270.065, 598.032]",2530.862653,2887.498431,356.635778


In [28]:
np.random.lognormal(6)

1967.4371907374166