### Import Libraries

In [1]:
%load_ext autoreload

%autoreload 2
# Libraries for data processing and math 
import pandas as pd
import numpy as np

# Library for causal estimation 
import dowhy
from dowhy import CausalModel
import econml
from sklearn.preprocessing import MinMaxScaler
from causal_estimate import ate_estimate_refutation 

# Library for file path manipulation 
import os

# Set seed to control randomness
np.random.seed(156)

In [2]:
# Declare Causal Graph for all time series data
causal_graph = """
digraph {
days; 
ACTV; 
TEMP; 
U[label="Unobserved Confounders"];
HUM; 
WSPD; 
PRES; 
NO2; 
O3; 
PM2.5; 
PM10; 
SO2; 
CO; 
Case; 
days->ACTV; days->TEMP; days->Case; 
U->ACTV; U->TEMP; U->PRES; U->O3; U->HUM; U->WSPD; 
ACTV->NO2; ACTV->Case; ACTV->PM2.5; ACTV->PM10; ACTV->SO2; ACTV->CO; 
PRES->NO2; PRES->O3; PRES->PM2.5; PRES->PM10; PRES->SO2; PRES->C0; 
TEMP->PRES; TEMP->HUM; 
HUM->PRES; HUM->N02; HUM->O3; HUM->PM2.5; HUM->PM10; HUM->SO2; HUM->CO;
WSPD->N02; WSPD->O3; WSPD->PM2.5; WSPD->PM10; WSPD->SO2; WSPD->CO; 
NO2->O3; 
}
"""

### Read Data

In [3]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_1.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.570104,0.502793,0.993318,0.0,0.869388,0.910256,0.425481,0.382456,0.240291,0.310345,0.491803,0.0
1,0.012987,0.509862,0.477654,1.0,0.0,0.869388,0.717949,0.414663,0.266667,0.156553,0.258621,0.368852,0.063063
2,0.025974,0.441704,0.469274,0.997773,0.062016,0.889796,0.589744,0.485577,0.245614,0.203883,0.275862,0.327869,0.099099
3,0.038961,0.312268,0.446927,0.979955,0.062016,0.922449,0.371795,0.485577,0.133333,0.131068,0.241379,0.213115,0.216216
4,0.051948,0.21367,0.430168,0.975501,0.124031,0.938776,0.262821,0.512019,0.101754,0.128641,0.275862,0.196721,0.216216


### Causal Refutation for Cluster 1: RCC

In [None]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD']
curr_treatments = []
ate_linear2 = []
ate_forest2 = []
for treatment in treatments:
    try:
        est_forest, ref_forest = ate_estimate_refutation(treatment='PRES', data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        curr_treatments.append(treatment)
        ate_linear2.append(first)
        ate_forest2.append(second)
    except Exception as e:
        continue 
for idx in range(len(curr_treatments)):
    print(f'Treatment: {curr_treatments[idx]}')
    print('-----------------------------')
    print(f'ATE Linear: {ate_linear2[idx]}')
    print(f'ATE Forest: {ate_forest2[idx]}')

In [4]:
est, ref = ate_estimate_refutation(treatment='PRES', data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')

In [5]:
print(est)

*** Causal Estimate ***

## Identified estimand
Estimand type: nonparametric-ate

### Estimand : 1
Estimand name: backdoor
Estimand expression:
   d                                                 
───────(Expectation(Case|HUM,CO,ACTV,WSPD,TEMP,days))
d[PRES]                                              
Estimand assumption 1, Unconfoundedness: If U→{PRES} and U→Case then P(Case|PRES,HUM,CO,ACTV,WSPD,TEMP,days,U) = P(Case|PRES,HUM,CO,ACTV,WSPD,TEMP,days)

## Realized estimand
b: Case~PRES+HUM+CO+ACTV+WSPD+TEMP+days+PRES*ACTV
Target units: ate

## Estimate
Mean value: 0.07008893758689774
### Conditional Estimates
__categorical__ACTV
(-0.001, 0.166]    0.086774
(0.166, 0.319]     0.079717
(0.319, 0.477]     0.070445
(0.477, 0.634]     0.062457
(0.634, 1.0]       0.050898
dtype: float64


In [6]:
print(ref)

Refute: Add a Random Common Cause
Estimated effect:0.07008893758689774
New effect:0.06756068743849529

