In [None]:
import numpy as np
from numpy.linalg import inv
from numpy import linalg as LA
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
from scipy.optimize import curve_fit
import pandas as pd
import scipy.stats as st
import pickle

import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.utils.data
import torch.optim as optim
import torch.backends.cudnn as cudnn
import os
import os.path
import argparse
from torch.autograd import Variable
import statsmodels.api as sm
import time
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

In [None]:
def round_p(value, round_digits):
    return format(round(value,round_digits), "."+str(round_digits)+"f")

def generate_x(d_c, n):
    x = np.random.uniform(0,1, [n, d_c])
  
    return x

def generate_t(t_combo, t_dist, n):
    return t_combo[np.random.choice(np.shape(t_dist)[0], n, p=t_dist),]


def powerset(s):
    x = len(s)
    masks = [1 << i for i in range(x)]
    for i in range(1 << x):
        yield [ss for mask, ss in zip(masks, s) if i & mask]

def calculate_mse(loader, is_gpu, net):
    """Calculate accuracy.

    Args:
        loader (torch.utils.data.DataLoader): training / test set loader
        is_gpu (bool): whether to run on GPU
    Returns:
        tuple: (overall accuracy, class level accuracy)
    """
    cnt = 0
    total_loss = 0

    for data in loader:
        inputs, labels = data
        if is_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()
        inputs, labels = Variable(inputs), Variable(labels)
        b,outputs = net(inputs)
        
        
        cnt += labels.size(0)
        total_loss += sum((outputs-labels)**2)

    return total_loss/float(cnt)

def plot_dev(loader, is_gpu, net):
    cnt = 0
    total_loss = 0
    
    real_y = []
    pred_y = []

    for data in loader:
        inputs, labels = data
        if is_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()
        inputs, labels = Variable(inputs), Variable(labels)
        outputs = net(inputs)
        
        real_y.append(labels.tolist())
        pred_y.append(outputs.tolist())
        
        cnt += labels.size(0)
        total_loss += sum((outputs-labels)**2)
    real_y = [x for sublist in real_y for x in sublist]
    pred_y = [x for sublist in pred_y for x in sublist]
    print('MSELoss= ', total_loss/float(cnt))
    plt.scatter(real_y, pred_y)
    plt.xlabel('real y')
    plt.ylabel('pred y')
    plt.show()
    

parser = argparse.ArgumentParser()
# hyperparameters settings
parser.add_argument('--lr', type=float, default=0.001, help='learning rate')
parser.add_argument('--wd', type=float, default=5e-4, help='weight decay')#lr/(c+wd)
parser.add_argument('--epochs', type=int, default=50,
                    help='number of epochs to train')
parser.add_argument('--batch_size_train', type=int,
                    default=1000, help='training set input batch size')
parser.add_argument('--batch_size_test', type=int,
                    default=1000, help='test set input batch size')
parser.add_argument('--is_gpu', type=bool, default=False,
                    help='whether training using GPU')
import sys
sys.argv=['']
del sys


In [None]:
m = 4 #number of experiments
d_c = 4 #number of features
lr = 0.05

sample_size = m + 100
reg_term = 0.0005
#reg_loss = 0.001
reg_loss = 0
train_epochs = 2000
test_thres = 0.3
n_cnver_test = 10000

feature_list = []
for i in range(d_c):
    feature_list.append(str('x')+str(i+1))
t_list = []
for i in range(m+1):
    t_list.append(str('t')+str(i))

In [None]:
def powerset(s):
    x = len(s)
    masks = [1 << i for i in range(x)]
    for i in range(1 << x):
        yield [ss for mask, ss in zip(masks, s) if i & mask]
def calculate_mse(loader, is_gpu, net):
    """Calculate accuracy.

    Args:
        loader (torch.utils.data.DataLoader): training / test set loader
        is_gpu (bool): whether to run on GPU
    Returns:
        tuple: (overall accuracy, class level accuracy)
    """
    cnt = 0
    total_loss = 0

    for data in loader:
        inputs, labels = data
        if is_gpu:
            inputs = inputs.cuda()
            labels = labels.cuda()
        inputs, labels = Variable(inputs), Variable(labels)
        outputs = net(inputs)
        
        
        cnt += labels.size(0)
        total_loss += sum((outputs-labels)**2)

    return total_loss/float(cnt)

all_combo = list(powerset(list(np.arange(1, m+1))))
t_star_all = []
for i in all_combo:
    t = np.zeros(m+1)
    t[0] = 1
    t[i] = 1
    t_star_all.append(t)
t_star_all = np.int16(t_star_all)
t_dist_obs = (1/(2**m))*np.ones(2**m)

In [None]:
class FNN_asig(nn.Module):
    def __init__(self, d_c, m):
        """Feedforward Neural Network for assignment."""
        super(FNN_asig, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Linear(d_c, 10, bias=False),  # Set d_c to 46 (number of input features)
            nn.ReLU(inplace=True),
            nn.Linear(10, m + 1)  # Output size depends on m (e.g., treatments)
        )
        #self.siglayer = nn.Sigmoid()
        self.layer3 = nn.Linear(1, 1, bias=False)
        #self.constant = nn.Parameter(torch.randn(1))

    def forward(self, x):
        b = self.layer1(x[:, :d_c])  # Apply first layer to feature inputs
        u = torch.sum(b * x[:, d_c:], 1)  # Element-wise multiplication with treatments
        #u_sigmoid_scaled = self.siglayer(u)*self.constant
        u = u.unsqueeze(1)
        u = self.layer3(u)
        return torch.reshape(u, (-1,))

    
def get_activation(name,activation):
        def hook(model, input, output):
            activation[name] = output.detach()
        return hook

def generate_y_true(coef, c, d, x, t, n,std):
    y = np.zeros(n)
    y_error = np.random.normal(0,std,n)
    #y_error = 0.15*np.random.uniform(-1, 1, n)
    for i in range(n):
        y[i] = c/(1+np.exp(-((x[i].dot(coef))).dot(t[i]))) + d + y_error[i]
    return y, y_error
def generate_y_true_1(coef, x, t,c):
    return c/(1 + np.exp( -((x.dot(coef))).dot(t)))


def generate_true_ate(coef, c, m):
    samples_x_est = generate_x(d_c, 100000)
    all_combo = list(powerset(list(np.arange(1, m+1))))
    est_dict = {}
    for i in all_combo:
        t = np.zeros(m+1)
        t[0] = 1
        t[i] = 1
        base_est = np.mean(generate_y_true_1(coef, samples_x_est, t, c))
        est_dict[str(t)] = base_est
    return est_dict


In [None]:
for period in tqdm(range(10000)):
    #coef = np.random.normal(1, 5, [d_c, m+1])
    coef = np.random.uniform(-0.3, 0.5, [d_c, m+1])
    c_true = np.random.uniform(10, 20)
    a = generate_true_ate(coef, c_true, m)
    max_value = max(a.values())
    base = a[str(list(a.keys())[0])]
    if max_value == base or max_value - base > 1:
        continue
    elif (min(a.values())- base)/(max_value - base) < -5:
        print(max_value - base,(min(a.values())- base)/(max_value - base))

In [None]:
def train_fnn_asig(trainloader,testloader):
    # =========================
    # Model Training
    # =========================

    is_gpu = False
    net = FNN_asig(d_c, m)
    train_accuracy_list = []
    test_accuracy_list = []
    # Full training
    #print("---------train---------")
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=1e-4)
    criterion = nn.MSELoss()
    # for epoch in range(train_epochs):
    for epoch in range(6):
        for i, data_i in enumerate(trainloader, 0):
            inputs, labels = data_i
            inputs, labels = Variable(inputs), Variable(labels)
            optimizer.zero_grad()
            outputs = net(inputs)
            l1_norm = sum(p.abs().sum() for p in net.parameters())
            loss = criterion(outputs, labels) + reg_loss * l1_norm
            loss.backward()
            optimizer.step()
        train_accuracy = calculate_mse(trainloader, is_gpu,net)
        test_accuracy = calculate_mse(testloader, is_gpu,net)
        
        train_accuracy_list.append(train_accuracy)
        test_accuracy_list.append(test_accuracy)
    #     if epoch % 100 == 0:
        #print(f"Iteration: {epoch} | Training MSE: {train_accuracy} | Test MSE: {test_accuracy}")
        if test_accuracy < test_thres and epoch > 6:
            return net
    return net


In [None]:
def inference(X_inference, T_inference_full, Y_inference, X_model, T_model, Y_model):    
    X_train, X_test, T_train_full, T_test_full, Y_train, Y_test = train_test_split(X_model, T_model, Y_model, test_size=0.3, random_state=42)  
# Add a column of ones to T_train and T_test
    # T_train_full = np.hstack([np.ones((T_train.shape[0], 1)), T_train])
    # T_test_full = np.hstack([np.ones((T_test.shape[0], 1)), T_test])

    # Combine with features
    X_train_full = np.hstack([X_train, T_train_full])
    X_test_full = np.hstack([X_test, T_test_full])
    X_train_full = X_train_full.astype(np.float32)
    X_test_full = X_test_full.astype(np.float32)


    # Add a column of ones to T_inference
    #T_inference_full = np.hstack([np.ones((T_inference.shape[0], 1)), T_inference])

    # Combine with features
    full_data_est = np.hstack([X_inference, T_inference_full])
    full_data_est = np.append(full_data_est, Y_inference.reshape(X_inference.shape[0],1), axis=1)
    
    # =========================
    # Prepare PyTorch datasets
    # =========================

    opt = parser.parse_args()
    trainset = torch.utils.data.TensorDataset(torch.Tensor(X_train_full), torch.Tensor(Y_train))
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.batch_size_train, shuffle=True)


    testset = torch.utils.data.TensorDataset(torch.Tensor(X_test_full), torch.Tensor(Y_test))
    testloader = torch.utils.data.DataLoader(testset, batch_size=opt.batch_size_test, shuffle=False)

    # =========================
    # Training
    # =========================
    net = train_fnn_asig(trainloader,testloader)
    # =========================
    # Inference
    # =========================
    activation = {}
    net.layer1.register_forward_hook(get_activation('layer1',activation))


    # Inference
    # =============================================


    com_ate = []

    results_list = []

    for t_star in t_star_all:
        start_time = time.time()
        data_est = pd.DataFrame(full_data_est, columns = feature_list + t_list + ['y'])
        #at t=0
        t_star_base = t_star.copy()
        x_all_set=np.float32(data_est)[:, 0:-1]
        y_all_set=np.float32(data_est)[:, -1]
        
        allset = torch.utils.data.TensorDataset(torch.Tensor(x_all_set), torch.Tensor(y_all_set)) # create your datset
        allloader = torch.utils.data.DataLoader(
            allset, batch_size=1000, shuffle=False)
        
        beta_ = []
        pred_y_loss = []
        with torch.no_grad():
            for i, data_ in enumerate(allloader, 0):
                inputs, labels = data_
                inputs, labels = Variable(inputs), Variable(labels)
                outputs = net(inputs)
                beta_.extend(activation['layer1'].tolist())  
                pred_y_loss.extend(outputs.tolist())
        beta_ = np.array(beta_).reshape(data_est.shape[0], m+1)
        pred_y_loss = np.array(pred_y_loss).reshape(data_est.shape[0])
        real_y = Y_inference.copy() 
        
        for j in range(m):
            x_all_set[:, -m+j] = t_star_base[j+1] 
        allset = torch.utils.data.TensorDataset(torch.Tensor(x_all_set), torch.Tensor(y_all_set)) # create your datset
        allloader = torch.utils.data.DataLoader(
            allset, batch_size=1000, shuffle=False)
        pred_y = []
        with torch.no_grad():
            for i, data_ in enumerate(allloader, 0):
                inputs, labels = data_
                inputs, labels = Variable(inputs), Variable(labels)
                outputs = net(inputs)
                pred_y.extend(outputs.tolist())
        pred_y = np.array(pred_y).reshape(data_est.shape[0])
        

        lambda_inv = []
        G_theta = []
        G_theta_loss = []
        cnt = 0

        for beta_temp in beta_:
            
            lambda_ = np.zeros([m+1, m+1])#asig use m+2
            u_ = beta_temp.dot(t_star_base)
            G_theta.append(np.array(t_star_base))
            # G_theta是一个 ooo*5 的list

            u_ = beta_temp.dot(T_inference_full[cnt])
            G_theta_loss.append(np.array(T_inference_full[cnt]))
            # G_theta_loss 也是一个 ooo*5 的list 和G_theta的区别是用的是真实Treatment的情况而不是t_start_base

            cnt += 1

            for i in range(t_star_all.shape[0]):
                #y = avg_y(beta_temp, c_true, d_true, t)
                t = t_star_all[i]
                u = beta_temp.dot(t)
                G_prime = np.array(t)
                lambda_ += t_dist_obs[i]*2*np.outer(G_prime, G_prime)

            try:
                lambda_inv.append(inv(lambda_ + reg_term*np.eye(m+1)))
            except:
                print('Singular matrix')
                

        lambda_inv_loss_prime = []
        for i in range(data_est.shape[0]):
            lambda_inv_loss_prime.append(2*(pred_y_loss[i]-real_y[i])*lambda_inv[i].dot(G_theta_loss[i]))
        lambda_inv_loss_prime = np.array(lambda_inv_loss_prime)

        pred_y_debiased = []
        for i in range(data_est.shape[0]):
            pred_y_debiased.append(pred_y[i]-G_theta[i].dot(lambda_inv_loss_prime[i]))
        results_list.append(pred_y_debiased)
        #print(len(pred_y_debiased))  

    return np.array(results_list)

In [None]:
Exp_time = 1000
for std in [3,4,5]:
    #std = 5
    optimal_ate = []
    results_list_N = []
    for period in tqdm(range(Exp_time)):
        #coef = np.random.normal(1, 5, [d_c, m+1])
        coef = np.random.uniform(-0.3, 0.5, [d_c, m+1])
        c_true = np.random.uniform(10, 20)
        a = generate_true_ate(coef, c_true, m)
        optimal_ate.append(a)
        # Generate samples
        X_sample =  generate_x(d_c, sample_size)
        T_sample = generate_t(t_star_all, t_dist_obs, sample_size)
        Y_sample, Y_error = generate_y_true(coef, c_true, 0, X_sample, T_sample, sample_size,std)

        # Split data into training, testing  and inference sets
        X_model, X_inference, T_model, T_inference, Y_model, Y_inference = train_test_split(X_sample, T_sample, Y_sample, test_size=0.5, random_state=42)
        
        results1 = inference(X_inference, T_inference, Y_inference, X_model, T_model, Y_model)
        results2 = inference(X_model, T_model, Y_model, X_inference, T_inference, Y_inference)
        results = np.concatenate((results1, results2), axis = 1)
        #print(results.shape)
        results_list_N.append(results)
    results_array_N = np.array(results_list_N)
    np.save('Misspecification_DML_linear_results_array_{}_{}.npy'.format(m,std), results_array_N)
    np.save('Misspecification_DML_linear_optimal_ate_{}_{}.npy'.format(m,std), optimal_ate)

In [None]:
#test
from scipy.stats import norm,ttest_ind

m = 4
std = 5
base_t = np.zeros(m+1)
base_t[0] = 1
#base_t = list(base_t)
all_combo = list(powerset(list(np.arange(1, m+1))))
t_star_all = []
for i in all_combo:
    t = np.zeros(m+1)
    t[0] = 1
    t[i] = 1
    t_star_all.append(t)
t_star_all = np.int16(t_star_all)


alpha = 0.05
Z = norm.ppf(1 - alpha/2)
cost = np.zeros((3,1000))
#optimal_ate_list = optimal_ate.copy()
results_array_N = np.load('Misspecification_DML_linear_results_array_{}_{}.npy'.format(m,std))
optimal_ate_list = np.load('Misspecification_DML_linear_optimal_ate_{}_{}.npy'.format(m,std), allow_pickle=True)

for j in range(results_array_N.shape[0]):
    F = optimal_ate_list[j]
    #optimal_ate =  max(F.values()) - min(F.values())
    optimal_ate =  max(F.values()) - F[str(base_t)]

    base = results_array_N[j][0]
    N1 = base.shape[0]

    decision_dml = np.zeros(m+1)
    decision_dml[0] = 1

    decision_experiments = []
    for idx,i in enumerate(t_star_all):
        if np.sum(i) == 2:
            decision_experiments.append(idx)
    #print(decision_experiments)

    tao_hat_nonlinear = np.zeros(len(t_star_all))
    p_value_list_nonlinear = np.zeros(len(t_star_all))
    for idx,i in enumerate(t_star_all):
        group1 = results_array_N[j][idx]
        diff_mean = group1.mean() - base.mean()
        if np.std(group1) == 0 and np.std(base) == 0:
            p_value = 0
        else:
            t_stat, p_value = ttest_ind(group1, base, equal_var = False) 
        tao_hat_nonlinear[idx] = diff_mean
        p_value_list_nonlinear[idx] = p_value
    
    decision3 = np.intersect1d(np.argwhere(p_value_list_nonlinear<alpha), np.argwhere(tao_hat_nonlinear>0))
    if decision3.size == 0:
        decision_nonlinear = np.array(t_star_all[0]).astype(float)
    else:
        idx = np.argmax(tao_hat_nonlinear[decision3])
        decision_nonlinear = np.array(t_star_all[decision3[idx]]).astype(float)
    if optimal_ate == 0:
        if np.sum(decision_nonlinear) == 1:
            cost[2,j] = 1
        else:
            cost[2,j] = 0
    else:
        cost[2,j] = (F[str(decision_nonlinear)] - F[str(base_t)])/optimal_ate   



    tao_hat = np.zeros(m)
    variance = np.zeros(m)
    p_value_list = np.zeros(m)
    for idx,i in enumerate(decision_experiments):
        group1 = results_array_N[j][i]
        diff_mean = group1.mean() - base.mean()
        if np.std(group1) == 0 and np.std(base) == 0:
            p_value = 0
        else:
            t_stat, p_value = ttest_ind(group1, base, equal_var = False) 
        tao_hat[idx] = diff_mean
        p_value_list[idx] = p_value
        variance[idx] = N1*(group1.var(ddof=1) / len(group1) + base.var(ddof=1) / len(base))
    
        
    decision1 = np.intersect1d(np.argwhere(p_value_list<alpha), np.argwhere(tao_hat>0))
    for i in decision1:
        decision_dml[i + 1] = 1
    if optimal_ate == 0:
        if np.sum(decision_dml) == 1:
            cost[0,j] = 1
        else:
            cost[0,j] = 0
    else:
        cost[0,j] = (F[str(decision_dml)] - F[str(base_t)])/optimal_ate   

    tao_0 = np.mean(tao_hat)
    numerator = np.mean(variance)
    denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N1
    
    if tao_0 == 0 or denumerator == 0:
        beta = 0
    else:
        beta = numerator/denumerator + Z*np.sqrt(N1*numerator)/tao_0
    
    beta = max(0,beta)
    #print(beta)
    theta = N1/(N1+beta)
    tao_shunken_hat = np.zeros(m)
    p_value_list_shrunken = np.zeros(m)
    for idx,i in enumerate(decision_experiments):
        group_11 = theta*results_array_N[j][i] + (1-theta)*tao_0
        group_00 = theta*base
        if np.std(group_11) == 0 and np.std(group_00) == 0:
            p_value = 0
        else:
            t_stat, p_value = ttest_ind(group_11, group_00, equal_var = False)

        diff_mean1 = group_11.mean() - group_00.mean()
        tao_shunken_hat[idx] = diff_mean1
        p_value_list_shrunken[idx] = p_value
    
    decision_dml_shrunken = np.zeros(m + 1)
    decision_dml_shrunken[0] = 1
    decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<alpha), np.argwhere(tao_shunken_hat>0))
    for i in decision2:
        decision_dml_shrunken[i+1] = 1
    if optimal_ate == 0:
        if np.sum(decision_dml_shrunken) == 1:
            cost[1,j] = 1
        else:
            cost[1,j] = 0
    else:
        cost[1,j] = (F[str(decision_dml_shrunken)] -  F[str(base_t)])/optimal_ate    
#np.save("DML_nonlinear_cost.npy", cost)