# Demo for the DoWhy causal API
We show a simple example of adding a causal extension to any dataframe. 

In [3]:
import os, sys
sys.path.append(os.path.abspath("../../../"))

In [4]:
import dowhy.datasets
import dowhy.api

import numpy as np
import pandas as pd

from statsmodels.api import OLS

In [8]:
data = dowhy.datasets.linear_dataset(beta=5,
        num_common_causes=1,
        num_instruments = 0,
        num_samples=1000,
        treatment_is_binary=True)
df = data['df']
df['y'] = df['y'] + np.random.normal(size=len(df)) # Adding noise to data. Without noise, the variance in Y|X, Z is zero, and mcmc fails.
data['dot_graph'] = 'digraph { v ->y;X0-> v;X0-> y;}'
df

Unnamed: 0,W0,v0,y
0,-2.162280,True,3.221392
1,-0.414811,False,-1.699314
2,-1.723537,False,-1.503957
3,0.323579,False,0.981959
4,-1.628089,False,-2.090974
...,...,...,...
995,1.510481,True,6.481622
996,0.198010,False,-0.108632
997,-1.355205,False,0.244389
998,-0.153224,True,5.764859


In [7]:
# data['df'] is just a regular pandas.DataFrame
df.causal.do(x='v0',
                     variable_types={'v0': 'b', 'y': 'c', 'W0': 'c'},
                     outcome='y',
                     common_causes=['0']).groupby('v0').mean().plot(y='y', kind='bar')

INFO:dowhy.causal_model:Model to find the causal effect of treatment ['v0'] on outcome ['y']
INFO:dowhy.causal_identifier:Common causes of treatment and outcome:['X0']


WARN: Do you want to continue by ignoring any unobserved confounders? (use proceed_when_unidentifiable=True to disable this prompt) [y/n] y


INFO:dowhy.causal_identifier:Instrumental variables for treatment and outcome:[]
INFO:dowhy.do_sampler:Using WeightingSampler for do sampling.
INFO:dowhy.do_sampler:Caution: do samplers assume iid data.


ValueError: at least one array or dtype is required

In [None]:
df.causal.do(x={'v': 1}, 
              variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, 
              outcome='y',
              method='weighting', 
              common_causes=['X0'],
              proceed_when_unidentifiable=True).groupby('v').mean().plot(y='y', kind='bar')

In [None]:
cdf_1 = df.causal.do(x={'v': 1}, 
              variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, 
              outcome='y', 
              dot_graph=data['dot_graph'],
              proceed_when_unidentifiable=True)

cdf_0 = df.causal.do(x={'v': 0}, 
              variable_types={'v': 'b', 'y': 'c', 'X0': 'c'}, 
              outcome='y', 
              dot_graph=data['dot_graph'],
              proceed_when_unidentifiable=True)

In [None]:
cdf_0

In [None]:
cdf_1

## Comparing the estimate to Linear Regression
First, estimating the effect using the causal data frame, and the 95% confidence interval.

In [None]:
(cdf_1['y'] - cdf_0['y']).mean()

In [None]:
1.96*(cdf_1['y'] - cdf_0['y']).std() / np.sqrt(len(df))

Comparing to the estimate from OLS.

In [None]:
model = OLS(df['y'], df[['X0', 'v']])
result = model.fit()
result.summary()