In [None]:
import sys
sys.path.insert(0, "../pycre/")

from sklearn.ensemble import GradientBoostingClassifier

from cre import *
from parsers import get_parser
from dataset import dataset_generator

# Binary Outcome

### 2 Rules

Ground Truth: 
- **ATE**:  0.0
- **x1>0 & x2<=0**: -1.0
- **x5>0 & x6<=0**:  1.0

In [None]:
# Generate synthetic dataset
X, y, z, ite = dataset_generator(N = 2000,
                                 M = 2,
                                 binary_out=True)

In [None]:
# Example 1: Baseline
model = CRE()
model.fit(X, y, z)
model.visualize()

In [None]:
# Example 2: 
# - Use a different learner for outcome estimation (classification)
# - Relax filtering, 
# - Increase N. sample in bootsraping,
# - round rules' thresholds to 0 decimals

model = CRE(learner_y = GradientBoostingClassifier(),
            t_corr = 0.8, 
            B = 10,
            decimal = 0)
model.fit(X, y, z)
model.visualize() # perfect discovery!

In [None]:
# save predictions
ite_pred = model.eval(X)

### 2 Rules - variant with unobserved confounders

Ground Truth: 
- **ATE**:  0.0
- **x1>0 & x2<=0**: -1.0
- **x5>0 & x6<=0**:  1.0

In [None]:
# Generate synthetic dataset
X, y, z, ite = dataset_generator(N = 2000,
                                 M = 2,
                                 binary_out = True,
                                 confounding = "lin")
W = X.iloc[:,[2,3]]
X = X.iloc[:,[0,1,4,5,6,7,8]]

In [None]:
model = CRE(learner_y = GradientBoostingClassifier(),
            t_corr = 0.8, 
            B = 10,
            decimal = 0)
model.fit(X, y, z)
model.visualize() # perfect discovery!

In [None]:
model = CRE(learner_y = GradientBoostingClassifier(),
            t_corr = 0.8, 
            B = 10,
            decimal = 0)
model.fit(X, y, z, W) # also unconfounding
model.visualize() # perfect discovery!

# Continuous Outcome

### 2 Rules

Ground Truth: 
- **ATE**:  0.0
- **x1>0 & x2<=0**: -2.0
- **x5>0 & x6<=0**:  2.0

In [None]:
# Generate synthetic dataset
X, y, z, ite = dataset_generator(N = 2000,
                                 M = 2,
                                 effect_size=2,
                                 binary_out=False)

In [None]:
# Example 3: Baseline
model = CRE()
model.fit(X, y, z)
model.visualize()

In [None]:
# Example 4: 
# - Increase N. sample in bootsraping,
# - Increase heterogeneity in bootsrapping (reducing subsample), 
# - Relax filtering, 
# - round rules' thresholds to 0 decimals

model = CRE(B = 50,
            t_corr = 0.8,
            subsample = 0.05,
            decimal = 0)
model.fit(X, y, z)
model.visualize() # perfect discovery!

In [None]:
# save predictions
ite_pred = model.eval(X)

### 4 Rules

Ground Truth: 
- **ATE**:  0.0
- **x1>0 & x2<=0**: -10.0
- **x5>0 & x6<=0**:  10.0
- **x4>0**: -5.0
- **x5<=0 & x7>0 & x8<=0**: 20.0

In [None]:
# Generate synthetic dataset
X, y, z, ite = dataset_generator(N = 5000,
                                 M = 4,
                                 effect_size=5,
                                 binary_out=False)

In [None]:
# Example 5: Baseline
model = CRE()
model.fit(X, y, z)
model.visualize()

In [None]:
# Example 6: 
# - Increase N. sample in bootsraping,
# - Relax filtering, 
# - Stronger rules selection
# - Increase heterogeneity in bootsrapping (reducing subsample), 
# - round rules' thresholds to 0 decimals
# - Allow discovery for longer rules

model = CRE(B = 50,
            t_corr = 0.8,
            t_ss = 0.9,
            subsample = 0.05,
            decimal = 0,
            max_depth = 3)
model.fit(X, y, z)
model.visualize() # better discovery

In [None]:
# save predictions
ite_pred = model.eval(X)