In [1]:
import numpy as np
import torch

In [2]:
from sklearn.gaussian_process.kernels import RBF

In [3]:
from matplotlib import pyplot as plt

In [4]:
from tqdm.auto import tqdm

In [5]:
import kcmc
from kcmc.estimators import confounding_robust_estimator, hajek, ipw
from kcmc.data_binary import generate_data, evaluate_policy, estimate_p_t
from kcmc.data_continuous import generate_data, evaluate_policy, estimate_p_t
from kcmc.data_real import generate_data, estimate_p_t

In [8]:
Y, T, X = generate_data()

In [9]:
Y.shape

torch.Size([667])

### Data

In [54]:
import csv 
import os.path

EXAMPLE_PARAMS = {
    'D': 200,
    'lambd': 1.5, 
    'gamma': 0.01,
    'alpha': 0.05,
    'sigma2': 0.01,
    'kernel': RBF(),
    'hard_kernel_const': False,
    'normalize_p_t': False,
    'f_divergence': 'total variation', 
    'hajek_const': False,
    'kernel_const': False,
    'quantile_const': False,
    'regressor_const': False,
    'tan_box_const': False,
    'lr_box_const': False,
    'f_const': False,
}

def run_policy_evaluation_experiment(
    log_file, params, policy, data_type='synthetic binary', sample_size=1000, n_seeds=1, seed0=0
):
    assert data_type in ['synthetic binary', 'synthetic continuous', 'real binary']
    assert set(params.keys()) == set(EXAMPLE_PARAMS.keys())
    for seed in range(seed0, seed0 + n_seeds):
        Y, T, X, p_t = get_data(data_type, sample_size, seed)
        try:
            lower_bound = confounding_robust_estimator(Y, T, X, p_t, policy, **params).data.numpy()
            upper_bound = -confounding_robust_estimator(-Y, T, X, p_t, policy, **params).data.numpy()
        except:
            print(f"Encountered error for data_type={data_type}, sample_size={sample_size}, params={params}. Skipping the experiment.")
            continue
        log_csv(log_file, data_type, policy.__name__, lower_bound, upper_bound, params, min(sample_size, T.shape[0]), seed)

def get_data(data_type, sample_size, seed):
    if 'synthetic' in data_type:
        if 'binary' in data_type:
            from kcmc.data_binary import generate_data, evaluate_policy, estimate_p_t
        elif 'continuous' in data_type:
            from kcmc.data_continuous import generate_data, evaluate_policy, estimate_p_t
        else:
            raise ValueError
        np.random.seed(seed)
        Y, T, X, U, e_x, e_xu = generate_data(sample_size)
        p_t = estimate_p_t(X, T)
    elif 'real' in data_type:
        from kcmc.data_real import generate_data, estimate_p_t
        Y, T, X = generate_data()
        p_t = estimate_p_t(X, T)
    return Y, T, X, p_t

def log_csv(log_file, data_type, policy_name, lower_bound, upper_bound, params, sample_size, seed):
    if not os.path.exists(log_file):
        # make a column name
        columns = ['data_type', 'policy_name', 'sample_size', 'seed', 'lower_bound', 'upper_bound', *params.keys()]
        with open(log_file, 'a') as file:
            writer = csv.writer(file)
            writer.writerow(columns)  
    # log data by appending to the csv file
    fields=[data_type, policy_name, sample_size, seed, lower_bound, upper_bound, *params.values()]
    with open(log_file, 'a') as file:
        writer = csv.writer(file)
        writer.writerow(fields)

In [58]:
log_file='logs/toy_experiments_log.csv'
params = EXAMPLE_PARAMS.copy()
#params['hajek_const'] = True
#params['tan_box_const'] = True
params['lr_box_const'] = True
params['quantile_const'] = True
params['kernel'] = RBF()
run_policy_evaluation_experiment(log_file, params, toy_policy, data_type='synthetic continuous', n_seeds=1, sample_size=100)



Notes on data types:

- Only use torch tensor for `Y` and `r`
- For other data, use numpy array

In [37]:
n = 1000
np.random.seed(0)
Y, T, X, U, e_x, e_xu = generate_data(n)
p_t = estimate_p_t(X, T)

In [13]:
#Y, T, X = generate_data()
#p_t = estimate_p_t(X, T)

### Log data

In [39]:
import csv   
fields=['first','second','third']
with open('experiments_log.csv', 'a') as f:
    writer = csv.writer(f)
    writer.writerow(fields)

### Define Toy Policy

In [10]:
beta_e_x = np.asarray([0, .75, -.5, 0, -1])

def toy_policy(X, T):
    n = X.shape[0]
    T = torch.as_tensor(T)
    z = torch.as_tensor(X) @ torch.as_tensor(beta_e_x)
    e_x = torch.exp(z) / (1 + torch.exp(z))
    return (1. - T) * e_x + T * (1. - e_x)

# def zero_policy(X):
#    return np.zeros(X.shape[0], dtype=int)

In [56]:
def wrap_continuous_policy(policy):
    def wrapped_policy(X, T=None, return_sample=False, requires_grad=False): 
        policy_dist = policy(X)
        if return_sample:
            return policy_dist.rsample() if requires_grad else policy_dist.sample()
        else:
            return torch.exp(policy_dist.log_prob(torch.as_tensor(T)))
    return wrapped_policy

beta_e_x = np.asarray([0, .75, -.5, 0, -1])

@wrap_continuous_policy
def toy_policy(X):
    z = torch.as_tensor(X) @ torch.as_tensor(beta_e_x)
    mu_t = torch.exp(z) / (1 + torch.exp(z))
    a, b = 3 * mu_t + 1, 3 * (1 - mu_t) + 1
    return torch.distributions.beta.Beta(a, b)

### Ground Truth

In [8]:
evaluate_policy(toy_policy, n=1000000)

tensor(3.8210, dtype=torch.float64)

### IPW estimator

In [9]:
est_ipw = ipw(Y, T, X, p_t, toy_policy)

In [10]:
est_ipw

tensor(4.1126, dtype=torch.float64)

### Hajek estimator

In [11]:
est_hajek = hajek(Y, T, X, p_t, toy_policy)

In [14]:
est_hajek

tensor(6988.2551, dtype=torch.float64)

### Implement Confounding Robust Inference

In [15]:
kcmc.estimators.f_divergences

['KL',
 'inverse KL',
 'Jensen-Shannon',
 'squared Hellinger',
 'Pearson chi squared',
 'Neyman chi squared',
 'total variation']

In [57]:
"est" in "estimator"

True

In [None]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.01,
    alpha=0.05,
    hard_kernel_const=False,
    f_divergence='total variation', 
    hajek_const=True,
    kernel_const=False,
    quantile_const=False,
    tan_box_const=True,
    lr_box_const=False,
    f_const=False,
    return_w=True
)

In [None]:
est

In [None]:
est_hajek

#### Select Kernel for KCMC

In [12]:
n = 1000
np.random.seed(0)
Y, T, X, U, e_x, e_xu = generate_data(n)
p_t = estimate_p_t(X, T)



In [None]:
# Guessing kernel with approximate solution
_, w_guess = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy, lambd=1.5,
    hajek_const=True, tan_box_const=True, return_w=True,
)
e_guess = p_t * w_guess - 1
gp_kernel = kcmc.estimators.fit_gp_kernel(e_guess, T, X)
sigma2 = gp_kernel.k1.noise_level
kernel = gp_kernel.k2

est = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.5,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    normalize_p_t=False,
    f_divergence='KL', 
    hajek_const=False,
    kernel_const=False,
    quantile_const=False,
    tan_box_const=False,
    lr_box_const=False,
    f_const=False,
)

In [None]:
est

### Quantile Balancing with KernelPCA

est = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.5,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    f_divergence='KL', 
    hajek_const=False,
    kernel_const=False,
    quantile_const=True,
    tan_box_const=True,
    lr_box_const=False,
    f_const=False,
)

est

In [14]:
# Guessing kernel with approximate solution
_, w_guess = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5,
    alpha=0.05,
    sigma2=0.1,
    kernel=RBF(),
    hard_kernel_const=False,
    kernel_const=True,
    lr_box_const=True,
    return_w=True,
)
e_guess = p_t * w_guess - 1
gp_kernel = kcmc.estimators.fit_gp_kernel(e_guess, T, X)
sigma2 = gp_kernel.k1.noise_level
kernel = gp_kernel.k2

In [15]:
sigma2

0.09932265319225153

In [30]:
est = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.5,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=True,
    normalize_p_t=False,
    f_divergence='KL',
    kernel_const=True,
    quantile_const=False,
    regressor_const=False,
    lr_box_const=True,
    f_const=False,
)

In [31]:
est

tensor(3.7812, dtype=torch.float64)

In [27]:
est

tensor(3.8054, dtype=torch.float64)

In [21]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.5,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    normalize_p_t=False,
    f_divergence='KL',
    kernel_const=False,
    quantile_const=True,
    lr_box_const=True,
    f_const=False,
    return_w=True,
)

In [22]:
est

tensor(3.7142, dtype=torch.float64)

In [53]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=50,
    lambd=2.0, 
    gamma=0.01,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=True,
    normalize_p_t=False,
    f_divergence='KL',
    kernel_const=True,
    quantile_const=True,
    lr_box_const=True,
    f_const=False,
    return_w=True,
)

In [54]:
est

tensor(3.5237, dtype=torch.float64)

In [23]:
sigma2=0.1 
kernel=None

In [55]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.01,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    normalize_p_t=False,
    f_divergence='KL',
    hajek_const=False,
    kernel_const=False,
    quantile_const=True,
    regressor_const=False,
    lr_box_const=True,
    f_const=False,
    return_w=True,
)

In [56]:
est

tensor(3.0735, dtype=torch.float64)

In [61]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=2.0, 
    gamma=0.01,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    normalize_p_t=False,
    f_divergence='KL',
    kernel_const=True,
    quantile_const=False,
    lr_box_const=True,
    f_const=False,
    return_w=True,
)

In [62]:
est

tensor(2.7427, dtype=torch.float64)

In [29]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.01,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    normalize_p_t=False,
    f_divergence='KL',
    kernel_const=True,
    quantile_const=False,
    lr_box_const=True,
    f_const=True,
    return_w=True,
)

In [30]:
est

tensor(3.6462, dtype=torch.float64)

In [31]:
est, w = confounding_robust_estimator(
    Y, T, X, p_t, toy_policy,
    D=200,
    lambd=1.5, 
    gamma=0.01,
    alpha=0.05,
    sigma2=sigma2,
    kernel=kernel,
    hard_kernel_const=False,
    normalize_p_t=False,
    f_divergence='KL',
    kernel_const=False, # doesn't have influence alone!
    quantile_const=True,
    lr_box_const=True,
    f_const=True,
    return_w=True,
)

In [32]:
est

tensor(3.9044, dtype=torch.float64)

In [25]:
w_guess = w
e_guess = p_t * w_guess - 1
gp_kernel = kcmc.estimators.fit_gp_kernel(e_guess, T, X)
sigma2 = gp_kernel.k1.noise_level
kernel = gp_kernel.k2

In [26]:
sigma2

0.16560784904777315

### Compare Policy Learning Performance

In [32]:
def base_policy(X, T):
    n = X.shape[0]
    return 0.5 * torch.ones(n, dtype=float)

def LR_policy(X, T, beta):
    n = X.shape[0]
    T = torch.as_tensor(T)
    X = np.concatenate([np.ones([n, 1]), X], axis=1)
    p = torch.sigmoid(torch.tensor(X) @ beta)
    return (1 - T) * p + T * (1 - p)

In [33]:
class nnPolicy(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.nn = torch.nn.Sequential(
            torch.nn.Linear(5, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 1),
            torch.nn.Sigmoid(),
        )
        self.nn[-2].weight.data[:] = 0.
        self.nn[-2].bias.data[:] = 0.

    def forward(self, X, T):
        X = torch.as_tensor(X, dtype=torch.float32)
        T = torch.as_tensor(T)
        p = self.nn(X)[:, 0]
        return (1 - T) * p + T * (1 - p)

In [70]:
@wrap_continuous_policy
def base_policy(X):
    mu_t = 0.5 * torch.ones(X.shape[0])
    a, b = 10 * mu_t + 1, 10 * (1 - mu_t) + 1
    return torch.distributions.beta.Beta(a, b)

def LR_policy(X, beta):
    X = np.concatenate([np.ones([X.shape[0], 1]), X], axis=1)
    mu_t = torch.sigmoid(torch.tensor(X) @ beta)
    a, b = 10 * mu_t + 1, 10 * (1 - mu_t) + 1
    return torch.distributions.beta.Beta(a, b)

In [64]:
class nnPolicy(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.nn = torch.nn.Sequential(
            torch.nn.Linear(5, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 32),
            torch.nn.ReLU(),
            torch.nn.Linear(32, 1),
            torch.nn.Sigmoid(),
        )
        self.nn[-2].weight.data[:] = 0.
        self.nn[-2].bias.data[:] = 0.
    
    def forward(self, X):
        X = torch.as_tensor(X, dtype=torch.float32)
        mu_t = torch.sigmoid(self.nn(X)[:, 0])
        a, b = 10 * mu_t + 1, 10 * (1 - mu_t) + 1
        return torch.distributions.beta.Beta(a, b)

In [65]:
evaluate_policy(base_policy, n=100000)

tensor(3.7413, dtype=torch.float64)

In [66]:
nn_policy = wrap_continuous_policy(nnPolicy())
evaluate_policy(nn_policy, n=100000)

tensor(3.7534, dtype=torch.float64)

#### Best Policy

In [72]:
train_steps = 101
beta_best = torch.zeros(6, requires_grad=True, dtype=float)
best_policy = wrap_continuous_policy(lambda X: LR_policy(X, beta_best))
optimizer = torch.optim.RMSprop(lr=1e-1, params=[beta_best])
pbar = tqdm(range(train_steps))
for i in pbar:
    value = evaluate_policy(best_policy, n=100000, requires_grad=True)
    (- value).backward()
    optimizer.step()
    optimizer.zero_grad()
    pbar.set_description(f"Value: {value}")

  0%|          | 0/101 [00:00<?, ?it/s]

In [None]:
train_steps = 101
beta_best = torch.zeros(6, requires_grad=True, dtype=float)
best_policy = lambda X, T: LR_policy(X, T, beta_best)
optimizer = torch.optim.RMSprop(lr=1e-1, params=[beta_best])
pbar = tqdm(range(train_steps))
for i in pbar:
    value = evaluate_policy(best_policy, n=100000)
    (- value).backward()
    optimizer.step()
    optimizer.zero_grad()
    pbar.set_description(f"Value: {value}")

#### Just Run Hajek optimization

In [55]:
train_steps = 101
beta_hajek = torch.zeros(6, requires_grad=True, dtype=float)
hajek_policy = lambda X, T: LR_policy(X, T, beta_hajek)
optimizer = torch.optim.RMSprop(lr=1e-1, params=[beta_hajek])
pbar = tqdm(range(train_steps))
for i in pbar:
    value = hajek(Y, T, X, p_t, hajek_policy)
    (- value).backward()
    optimizer.step()
    optimizer.zero_grad()
    pbar.set_description(f"Value: {value}")

  0%|          | 0/101 [00:00<?, ?it/s]

In [56]:
evaluate_policy(hajek_policy, n=100000)

tensor(4.6822, dtype=torch.float64, grad_fn=<MeanBackward0>)

### Just run min-max optimzation

In [57]:
def evaluate_confounding_robust_hajek(policy):
    est = confounding_robust_estimator(
        Y, T, X, p_t, policy, 
        lambd=1.5, hajek_const=True, tan_box_const=True,
    )
    return est

In [58]:
def evaluate_confounding_robust_kernel(policy):
    est = confounding_robust_estimator(
        Y, T, X, p_t, policy, 
        D=200, lambd=1.5, alpha=0.05, 
        sigma2=sigma2, kernel=kernel,
        kernel_const=True,
        tan_box_const=True,
    )
    return est

In [60]:
# Hajek policy min-max value
train_steps = 101
beta_hajek = torch.zeros(6, requires_grad=True, dtype=float)
hajek_policy = lambda X, T: LR_policy(X, T, beta_hajek)
optimizer = torch.optim.SGD(lr=1e-1, params=[beta_hajek])
pbar = tqdm(range(train_steps))
for i in pbar:
    hajek_value = evaluate_confounding_robust_hajek(hajek_policy)
    (- hajek_value).backward()
    optimizer.step()
    optimizer.zero_grad()
    pbar.set_description(f"Value: {hajek_value}")

  0%|          | 0/101 [00:00<?, ?it/s]

In [61]:
evaluate_policy(hajek_policy, n=100000)

tensor(4.4034, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [62]:
evaluate_confounding_robust_kernel(hajek_policy)

tensor(4.0741, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [63]:
# Kernel policy min-max value
train_steps = 101
beta_kernel = torch.zeros(6, requires_grad=True, dtype=float)
kernel_policy = lambda X, T: LR_policy(X, T, beta_kernel)
optimizer = torch.optim.RMSprop(lr=1e-1, params=[beta_kernel])
pbar = tqdm(range(train_steps))
for i in pbar:
    kernel_value = evaluate_confounding_robust_kernel(kernel_policy)
    (- kernel_value).backward()
    optimizer.step()
    optimizer.zero_grad()
    pbar.set_description(f"Value: {kernel_value}")

  0%|          | 0/101 [00:00<?, ?it/s]

In [64]:
evaluate_policy(kernel_policy, n=100000)

tensor(4.6240, dtype=torch.float64, grad_fn=<MeanBackward0>)

In [65]:
evaluate_confounding_robust_kernel(kernel_policy)

tensor(4.4347, dtype=torch.float64, grad_fn=<MeanBackward0>)

### Conclusions
- The difference in performance is sample-dependent
  - If the sample is well confounded, then kernel + hajek tends to be better than hajek
- IPW estimator is good enough in most cases, so we might not actually need confounding-robust methods
- Still, kernel + hajek method offers better lower-bound of the policy value, so it must have some use cases.

### Make a Table for Slide

In [None]:
pi1 = lambda x: 0.5
pi2 = toy_policy
pi3 = best_policy
pi4 = hajek_policy
pi5 = kernel_policy
pi6 = ipw_policy

In [None]:
table = {}
for i, pi in tqdm.tqdm(enumerate([pi1, pi2, pi3, pi4, pi5, pi6])):
    table[f"$\\pi_{i+1}$"] = row = {}
    row['$V_\\text{inf}$'] = float(evaluate_policy(pi, n=100000))
    prob = pi(X) * torch.tensor(T) + (1 - pi(X)) * torch.tensor(1 - T)
    r = torch.tensor(Y) * prob
    row['$\\hat V_\\text{inf}^\\Hajek$'] = float(confoundingRobustHajek(r, T, a, b))
    row['$\\hat V_\\text{inf}^\\kernel$'] = float(confoundingRobustKernel(r, T, X, a, b, p_t))
    row['$\\hat V_\\text{inf}^\\text{IPW}$'] = float(IPW(r, p_t))

In [None]:
table_copy = table.copy()
table.pop("$\\pi_6$")

In [None]:
print("\\begin{tabular}{ | m{4em} || " + "m{3em} " * len(table) + "| }")
print("\\hline")
header = "{}\t" + "&{}\t" * len(table) + "\\\\"
print(header.format("", *table.keys()))
print("\\hline")

row_names = table["$\\pi_1$"].keys()
for row in row_names:
    s = "{}\t" + "&{:1.3f}\t" * len(table) + "\\\\"
    print(s.format(row, *[col[row] for col in table.values()]))
    print("\\hline")
print("\\end{tabular}")


### Estimate Propensity Score (Conditional Density Estimation)
- This requires consistent nominal propensity score (p_obs(t|x))
- For discrete t, just run kernel logistic regressionregret_kernel

#### Discrete Case

In [None]:
from sklearn.linear_model import LogisticRegressionCV
# from sklearn.metrics import roc_auc_score, make_scorer
# roc_auc_score = make_scorer(roc_auc_score, greater_is_better=True, needs_proba=True)

model = LogisticRegressionCV().fit(X, T)
e_xx = model.predict_proba(X)[:, 1]

np.mean(np.abs(e_xx - e_x))

In [None]:
import optuna
from sklearn.decomposition import KernelPCA
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

def objective(trial):
    gamma = trial.suggest_float('gamma', 1e-2, 1e+2, log=True)
    lambd = trial.suggest_float('lambd', 1e-3, 1e+3, log=True)
    model = Pipeline([
        ('kpca', KernelPCA(n_components=100, kernel='rbf', gamma=gamma)),
        ('LogReg', LogisticRegression(C=1/lambd)),
    ])   
    cv_scores = cross_val_score(model, X, y=T, cv=4)
    return cv_scores.mean()

In [None]:
study = optuna.create_study(direction='maximize')
study.optimize(func=objective, n_trials=100)

In [None]:
study.best_params

In [None]:
Z = KernelPCA(n_components=200, kernel='rbf', gamma=0.01).fit_transform(X)
model = LogisticRegression(C=5).fit(Z, T)
e_xx = model.predict_proba(Z)[:, 1]

np.mean(np.abs(e_xx - e_x))

In [None]:
np.mean(1 / e_x * T), np.mean(1 / (1 - e_x) * (1 - T)), # both should be 1

In [None]:
np.mean(1 / e_xx * T), np.mean(1 / (1 - e_xx) * (1 - T)), # both should be 1

#### Continuous Case (Maybe on the next notebook)

In [None]:
from statsmodels.nonparametric.kernel_density import KDEMultivariateConditional

In [None]:
EPS = 1e-6

def benchmark_bandwidth(bw, T, X, cv=5):
    n = T.shape[0]
    indep_type='c' * X.shape[1]
    dep_type='u'
    split = (np.arange(n) % 5)
    cross_entropy = 0
    for i in range(cv):
        train = (split != i)
        test = (split == i)
        model = KDEMultivariateConditional(T[train, None], X[train, :], dep_type='c', indep_type='c' * X.shape[1], bw=bw)
        cross_entropy += np.mean(model.pdf(T[test, None], X[test, :]) + EPS) / cv
    return cross_entropy

In [None]:
best_scale = 0
best_score = - float('inf')
for scale in [0.1, 0.2, 0.3, 0.5, 0.7, 1, 1.5, 2, 3, 5]:
    bw = scale * np.concatenate([[Y.std()], X.std(axis=0)]) / Y.shape[0] ** (1 / 7)
    score = benchmark_bandwidth(bw, Y, X)
    if score > best_score:
        best_scale = scale
        best_score = score

In [None]:
best_scale, best_score

In [None]:
bw0 = best_scale * np.concatenate([[Y.std()], X.std(axis=0)]) / Y.shape[0] ** (1 / 7)
model = KDEMultivariateConditional(Y[:, None], X, dep_type='c', indep_type='c' * X.shape[1], bw=bw0)

In [None]:
e_xx = model.pdf(Y[:, None], X)
e_xx

In [None]:
plt.hist(e_xx)

In [None]:
model.bw

In [None]:
[np.mean(1. / e_xx * ((i <= Y) & (Y <= i + 1))) for i in range(7)]  # Should go to 1

In [None]:
# Shouldn't we use loo estimate for propensity? (For reducing the sampling bias)

In [None]:
model_loocv = KDEMultivariateConditional(Y[:500, None], X[:500], dep_type='c', indep_type='c' * X.shape[1], bw='cv_ml')
model_loocv = KDEMultivariateConditional(Y[:, None], X, dep_type='c', indep_type='c' * X.shape[1], bw=model_loocv.bw)
model_loocv.pdf(T[:, None], X)

In [None]:
model_loocv.bw

In [None]:
e_xx = model_loocv.pdf(Y[:, None], X)
e_xx

In [None]:
[np.mean(1. / e_xx * ((i <= Y) & (Y <= i + 1))) for i in range(7)]  # Should go to 1

In [None]:
Y, T, X, U, e_x, e_xu = generate_data(2000)

In [None]:
# Maybe better to use gaussian process regression
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import ConstantKernel, WhiteKernel, RBF
from scipy.stats import norm

kernel = WhiteKernel() + ConstantKernel() * RBF()
model = GaussianProcessRegressor(kernel=kernel).fit(X, Y)
mu, sigma = model.predict(X, return_std=True)
e_xx = norm.pdf(Y, loc=mu, scale=sigma)

In [None]:
plt.hist(e_xx)

In [None]:
[np.mean(1. / e_xx * ((i <= Y) & (Y <= i + 1))) for i in range(7)]  # Should go to 1