### Import Libraries

In [3]:
%load_ext autoreload

%autoreload 2
# Libraries for data processing and math 
import pandas as pd
import numpy as np

# Library for causal estimation 
import dowhy
from dowhy import CausalModel
import econml
from sklearn.preprocessing import MinMaxScaler
from causal_estimate import ate_estimate_refutation 

# Library for file path manipulation 
import os

# Set seed to control randomness
np.random.seed(156)

In [4]:
# Declare Causal Graph for all time series data
causal_graph = """
digraph {
days; 
ACTV; 
TEMP; 
U[label="Unobserved Confounders"];
HUM; 
WSPD; 
PRES; 
NO2; 
O3; 
PM2.5; 
PM10; 
SO2; 
CO; 
Case; 
days->ACTV; days->TEMP; days->Case; 
U->ACTV; U->TEMP; U->PRES; U->O3; U->HUM; U->WSPD; 
ACTV->NO2; ACTV->Case; ACTV->PM2.5; ACTV->PM10; ACTV->SO2; ACTV->CO; 
PRES->NO2; PRES->O3; PRES->PM2.5; PRES->PM10; PRES->SO2; PRES->C0; 
TEMP->PRES; TEMP->HUM; 
HUM->PRES; HUM->N02; HUM->O3; HUM->PM2.5; HUM->PM10; HUM->SO2; HUM->CO;
WSPD->N02; WSPD->O3; WSPD->PM2.5; WSPD->PM10; WSPD->SO2; WSPD->CO; 
NO2->O3; 
}
"""

### Read Data for Cluster 1 Overall

In [5]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_1.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.570104,0.502793,0.993318,0.0,0.869388,0.910256,0.425481,0.382456,0.240291,0.310345,0.491803,0.0
1,0.012987,0.509862,0.477654,1.0,0.0,0.869388,0.717949,0.414663,0.266667,0.156553,0.258621,0.368852,0.063063
2,0.025974,0.441704,0.469274,0.997773,0.062016,0.889796,0.589744,0.485577,0.245614,0.203883,0.275862,0.327869,0.099099
3,0.038961,0.312268,0.446927,0.979955,0.062016,0.922449,0.371795,0.485577,0.133333,0.131068,0.241379,0.213115,0.216216
4,0.051948,0.21367,0.430168,0.975501,0.124031,0.938776,0.262821,0.512019,0.101754,0.128641,0.275862,0.196721,0.216216


### Causal Refutation for Cluster 1 Overall: RCC

In [4]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.07008893758689767
New effect:0.06756068743849547

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.05713599417437626
New effect:-0.05728149785928478

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.1543099983712363
New effect:-0.152395836503529

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.20837767057637896
New effect:-0.20316430127324947

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.2809283302013028
New effect:-0.28188928051733614

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0

### Causal Refutation for Cluster 1 Overall: PT

In [5]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.07008893758689767
New effect:-5.551115123125783e-17
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.05713599417437626
New effect:-6.38378239159465e-16
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.1543099983712363
New effect:-2.7755575615628914e-16
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.20837767057637896
New effect:-1.3877787807814457e-16
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.2809283302013028
New effect:-2.7755575615628914e-17
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Treatm

### Read Data for Cluster 1 Spreading

In [12]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_1_spread.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.696947,0.489362,0.992481,0.0,0.859155,1.0,0.480315,0.458333,0.240291,0.321429,0.555556,0.0
1,0.0,1.0,0.489362,0.616541,0.186047,0.380282,0.671533,0.102362,0.35307,0.509709,0.642857,0.488889,0.0
2,0.0,0.582318,0.446809,0.917293,0.0,0.859155,0.781022,0.220472,0.524123,0.436893,0.0,0.555556,0.0
3,0.0,0.690603,0.0,0.526316,0.0,0.84507,0.934307,0.161417,0.379386,0.429612,0.642857,0.488889,0.0
4,0.0,0.701333,0.446809,0.571429,0.534884,0.0,0.715328,0.409449,0.649123,0.822816,0.535714,0.422222,0.0


### Causal Refutation for Cluster 1 Spreading: RCC

In [13]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.09033774598507255
New effect:0.08349846152332097

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.08369025142680481
New effect:0.0970167657697118

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.2883820671934073
New effect:-0.29563810463038054

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.40842802472796047
New effect:-0.41186368302544635

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.6902091896369899
New effect:-0.6843774309116296

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.3

### Causal Refutation for Cluster 1 Spreading: PT

In [14]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.09033774598507255
New effect:2.7755575615628914e-15
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.08369025142680481
New effect:2.7755575615628914e-16
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.2883820671934073
New effect:-5.551115123125783e-16
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.40842802472796047
New effect:5.551115123125783e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.6902091896369899
New effect:-2.7755575615628914e-16
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Treatment

### Read Data for Cluster 1 Postpeak

In [15]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_1_postpeak.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.154162,0.435754,0.65625,0.0,0.942857,0.487179,0.637019,0.173684,0.141772,0.37931,0.213115,0.49505
1,0.015625,0.16631,0.441341,0.6875,0.0,0.942857,0.474359,0.608173,0.157895,0.159494,0.344828,0.213115,0.60396
2,0.03125,0.152003,0.424581,0.665179,0.062016,0.967347,0.320513,0.550481,0.082456,0.101266,0.275862,0.163934,0.534653
3,0.046875,0.145876,0.391061,0.767857,0.062016,0.971429,0.24359,0.526442,0.091228,0.108861,0.275862,0.180328,0.574257
4,0.0625,0.134365,0.374302,0.785714,0.062016,0.983673,0.192308,0.502404,0.115789,0.124051,0.275862,0.180328,0.425743


### Causal Refutation for Cluster 1 Postpeak: RCC

In [16]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.09820273489625936
New effect:0.09776673661459574

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.04530647131225454
New effect:-0.04545241597709136

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.007980779002190977
New effect:-0.008024829637250666

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.16486950881736065
New effect:-0.16512405932416627

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.18458368911534512
New effect:-0.18624225694020874

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated eff

### Causal Refutation for Cluster 1 Postpeak: PT

In [17]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.09820273489625936
New effect:5.551115123125783e-17
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.04530647131225454
New effect:3.885780586188048e-16
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.007980779002190977
New effect:-3.3306690738754696e-16
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.16486950881736065
New effect:0.0
p value:1.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.18458368911534512
New effect:1.942890293094024e-16
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: 

### Read Data for Cluster 2 Overall

In [18]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_2.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.771312,0.31746,0.73913,1.9081960000000002e-17,0.956373,0.533865,0.259179,0.286727,0.330045,0.393939,0.295238,0.0
1,0.012987,0.702972,0.299603,0.820652,0.03519062,0.95509,0.450199,0.359611,0.250253,0.297758,0.339394,0.252381,0.0
2,0.025974,0.630765,0.242063,0.757246,0.04692082,0.966638,0.40239,0.37149,0.225937,0.265471,0.357576,0.2,0.0
3,0.038961,0.514089,0.162698,0.721014,0.04692082,0.977759,0.414343,0.384449,0.395137,0.441256,0.660606,0.228571,0.017964
4,0.051948,0.431731,0.119048,0.576087,0.04692082,0.988024,0.466135,0.37149,0.529889,0.559641,0.733333,0.27619,0.023952


### Causal Refutation for Cluster 2 Overall: RCC

In [19]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.030364101048289134
New effect:0.030255029587572584

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.011411525994509389
New effect:0.011459588532184591

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.0014221138843769354
New effect:0.0014411882680604206

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.0483790808560258
New effect:-0.048371553650728524

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.04004488590274043
New effect:-0.03995043789594396

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated e

### Causal Refutation for Cluster 2 Overall: PT

In [20]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.030364101048289134
New effect:-2.7755575615628914e-17
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.011411525994509389
New effect:-1.6306400674181987e-16
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.0014221138843769354
New effect:2.7755575615628914e-17
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.0483790808560258
New effect:1.0408340855860843e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.04004488590274043
New effect:0.0
p value:1.0

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refu

### Read Data for Cluster 2 Spreading

In [21]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_2_spread.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.859248,0.357143,0.724138,0.0,0.957192,0.528226,0.178947,0.286727,0.330045,0.393939,0.27451,0.0
1,0.0,0.864816,0.464286,0.666667,0.45977,0.949486,0.334677,0.484211,0.08612,0.133632,0.193939,0.098039,0.0
2,0.0,0.643376,0.772321,0.724138,0.091954,0.946918,0.358871,0.526316,0.14387,0.144395,0.048485,0.186275,0.0
3,0.0,0.50742,0.825893,0.678161,0.264368,0.928938,0.576613,0.294737,0.195542,0.176682,0.10303,0.156863,0.0
4,0.0,0.748671,0.772321,1.0,0.0,0.903253,0.237903,0.431579,0.101317,0.125561,0.121212,0.156863,0.0


### Causal Refutation for Cluster 2 Spreading: RCC

In [22]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.09017843218493397
New effect:0.08898558581155286

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.037235143534653806
New effect:0.03774187402133976

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.007320396562896722
New effect:-0.007008880658995777

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.15540378630553078
New effect:-0.1547789102590444

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.08895434138143865
New effect:-0.08933393121333688

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effec

### Causal Refutation for Cluster 2 Spreading: PT

In [23]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.09017843218493397
New effect:-6.245004513516506e-17
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.037235143534653806
New effect:-3.3306690738754696e-16
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.007320396562896722
New effect:-5.689893001203927e-16
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.15540378630553078
New effect:2.0816681711721685e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.08895434138143865
New effect:-1.3183898417423734e-16
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Tre

### Read Data for Cluster 2 Postpeak

In [24]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_2_postpeak.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.262461,0.0,0.445652,0.11437,0.986266,0.34555,0.360691,0.254545,0.266332,0.431818,0.235294,0.08642
1,0.016393,0.256937,0.055777,0.568841,0.046921,0.981974,0.445026,0.397408,0.347475,0.355109,0.545455,0.305882,0.037037
2,0.032787,0.249815,0.135458,0.630435,0.02346,0.973391,0.471204,0.460043,0.416162,0.457286,0.515152,0.376471,0.037037
3,0.04918,0.282388,0.187251,0.742754,0.067449,0.964807,0.623037,0.477322,0.638384,0.703518,0.575758,0.588235,0.012346
4,0.065574,0.312527,0.278884,0.807971,0.067449,0.957082,0.565445,0.537797,0.666667,0.735343,0.507576,0.588235,0.012346


### Causal Refutation for Cluster 2 Postpeak: RCC

In [25]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.025386135653179483
New effect:0.025388418058453592

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.019189470560790088
New effect:0.01923383355879963

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.02909727969576052
New effect:0.02911541263133227

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.013312073088215071
New effect:-0.01333795456952786

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.014187679101234247
New effect:-0.014055511374795892

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated eff

### Causal Refutation for Cluster 2 Postpeak: PT

In [26]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.025386135653179483
New effect:4.5102810375396984e-17
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.019189470560790088
New effect:6.938893903907228e-17
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.02909727969576052
New effect:5.551115123125783e-17
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.013312073088215071
New effect:-2.42861286636753e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.014187679101234247
New effect:-6.938893903907228e-17
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Treatme

### Read Data for Cluster 3 Overall

In [27]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_3.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.594985,0.478723,0.748092,0.412371,0.45898,0.495798,0.244211,0.423745,0.412568,0.103321,0.302326,0.0
1,0.012987,0.569882,0.505319,0.776718,0.324742,0.462306,0.483193,0.275789,0.446911,0.428962,0.097786,0.325581,0.0
2,0.025974,0.516293,0.510638,0.778626,0.295533,0.476718,0.411765,0.36,0.529923,0.48725,0.114391,0.372093,0.0
3,0.038961,0.435802,0.514184,0.80916,0.185567,0.501109,0.323529,0.442105,0.573359,0.508197,0.125461,0.395349,0.0
4,0.051948,0.34796,0.503546,0.832061,0.185567,0.514412,0.210084,0.513684,0.531853,0.452641,0.114391,0.364341,0.000979


### Causal Refutation for Cluster 3 Overall: RCC

In [28]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.010384902101555661
New effect:0.010391602733908755

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.03145032107940496
New effect:0.03133909156351193

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.016726384395160734
New effect:0.016732280820490636

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.008824081170037188
New effect:0.008824521335154715

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.011679833822159173
New effect:-0.011666370843327476

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated eff

### Causal Refutation for Cluster 3 Overall: PT

In [29]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.010384902101555661
New effect:-5.204170427930421e-18
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.03145032107940496
New effect:0.0
p value:1.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.016726384395160734
New effect:-4.336808689942018e-18
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.008824081170037188
New effect:1.3010426069826053e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.011679833822159173
New effect:9.80118763926896e-17
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: 

### Read Data for Cluster 3 Spreading

In [30]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_3_spread.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.589402,0.505618,0.65445,0.412371,0.43649,0.491525,0.262195,0.423745,0.412568,0.1,0.336538,0.0
1,0.0,0.58698,0.550562,0.497382,0.082474,0.533487,0.669492,0.390244,0.322394,0.407104,0.422222,0.394231,0.0
2,0.0,0.573983,0.58427,0.685864,0.237113,0.817552,0.288136,0.472561,0.157336,0.221311,0.088889,0.221154,0.0
3,0.0,0.473801,0.797753,0.685864,0.082474,0.852194,0.199153,0.481707,0.1139,0.142077,0.033333,0.192308,0.0
4,0.0,0.58384,0.707865,0.60733,0.082474,0.540416,0.377119,0.417683,0.316602,0.31694,0.055556,0.192308,0.0


### Causal Refutation for Cluster 3 Spreading: RCC

In [31]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.017529413136504336
New effect:0.017525011285654573

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.07483488419635859
New effect:0.07478070762638214

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.03278764563221474
New effect:0.03281103879285063

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.008607627084153493
New effect:0.008583535040381942

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.006812367559506838
New effect:0.006880412814411003

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:

### Causal Refutation for Cluster 3 Spreading: PT

In [32]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.017529413136504336
New effect:1.3877787807814457e-17
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.07483488419635859
New effect:7.632783294297951e-17
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.03278764563221474
New effect:4.85722573273506e-17
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.008607627084153493
New effect:2.0816681711721685e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.006812367559506838
New effect:0.0
p value:1.0

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Use

### Read Data for Cluster 3 Postpeak

In [33]:
# Load the data and train the XGBoost models
root = os.path.dirname(os.getcwd())
data_dir = os.path.join(root, 'data')
### Cluster 1 Overall ###
c1_overall_path = os.path.join(data_dir, 'time_cluster_1_spread.csv')
c1_overall = pd.read_csv(c1_overall_path)
all_vars = ['days', 'ACTV', 'TEMP', 'HUM', 'WSPD', 'PRES', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO', 'Case']
cluster1_df = c1_overall[all_vars]
# Normalize the data 
scaler = MinMaxScaler()
normalized = scaler.fit_transform(cluster1_df.values)
cluster1_df_norm = pd.DataFrame(normalized)
cluster1_df_norm.columns = cluster1_df.columns
cluster1_df_norm.head()

Unnamed: 0,days,ACTV,TEMP,HUM,WSPD,PRES,NO2,O3,PM2.5,PM10,SO2,CO,Case
0,0.0,0.696947,0.489362,0.992481,0.0,0.859155,1.0,0.480315,0.458333,0.240291,0.321429,0.555556,0.0
1,0.0,1.0,0.489362,0.616541,0.186047,0.380282,0.671533,0.102362,0.35307,0.509709,0.642857,0.488889,0.0
2,0.0,0.582318,0.446809,0.917293,0.0,0.859155,0.781022,0.220472,0.524123,0.436893,0.0,0.555556,0.0
3,0.0,0.690603,0.0,0.526316,0.0,0.84507,0.934307,0.161417,0.379386,0.429612,0.642857,0.488889,0.0
4,0.0,0.701333,0.446809,0.571429,0.534884,0.0,0.715328,0.409449,0.649123,0.822816,0.535714,0.422222,0.0


### Causal Refutation for Cluster 3 Postpeak: RCC

In [34]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='RCC')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.09033774598507255
New effect:0.09124528496493622

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:0.08369025142680481
New effect:0.07925059140890045

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.2883820671934073
New effect:-0.2671417574030767

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.40842802472796047
New effect:-0.4167602780547716

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.6902091896369899
New effect:-0.6428956196197994

Treatment: O3
-----------------------------
OLS RCC Treatment Effect: Refute: Add a Random Common Cause
Estimated effect:-0.35

### Causal Refutation for Cluster 3 Postpeak: PT

In [35]:
# ATE refutation
treatments = ['PRES', 'TEMP', 'HUM', 'WSPD', 'NO2', 'O3', 'PM2.5', 'PM10', 'SO2', 'CO']
curr_treatments = []
#est_linear2 = []
#ate_forest2 = []
for treatment in treatments:
    try:
        est_ols, ref_ols = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
                                causal_graph=causal_graph, model_type='linear', refutation_type='PT')
#         est_forest, ref_forest = ate_estimate_refutation(treatment=treatment, data=cluster1_df_norm, outcome='Case',
#                                 causal_graph=causal_graph, model_type='nonlinear', refutation_type='RCC')
        print(f'Treatment: {treatment}')
        print('-----------------------------')
#         print(f'OLS Treatment Effect: {est_ols}')
        print(f'OLS RCC Treatment Effect: {ref_ols}')
#         print(f'Forest Treatment Effect: {est_forest}')
#         print(f'Forest RCC Treatment Effect: {ref_forest}')
    except Exception as e:
        print(e)
        continue 

  return np.sqrt(eigvals[0]/eigvals[-1])
  return self.params / self.bse
  cond2 = cond0 & (x <= _a)


Treatment: PRES
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.09033774598507255
New effect:2.7755575615628914e-15
p value:0.0

Treatment: TEMP
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:0.08369025142680481
New effect:2.7755575615628914e-16
p value:0.0

Treatment: HUM
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.2883820671934073
New effect:-5.551115123125783e-16
p value:0.0

Treatment: WSPD
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.40842802472796047
New effect:5.551115123125783e-17
p value:0.0

Treatment: NO2
-----------------------------
OLS RCC Treatment Effect: Refute: Use a Placebo Treatment
Estimated effect:-0.6902091896369899
New effect:-2.7755575615628914e-16
p value:0.0

Treatment: O3
-----------------------------
OLS RCC Treatment