In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from scipy.stats import norm
from copy import deepcopy
from scipy.stats import t as t_dist
import numpy as np
from scipy.stats import ttest_ind

## 1 Boxplot in basic setting 

In [None]:
K = 100 # number of Experiments
N = 10 # number of samples in each experiment
Z = norm.ppf(0.975) # critical value for 95% confidence interval
std = 3 # standard deviation of noise

cost = np.zeros((3,1000)) # Optimality Ratio
accuracy = np.zeros((3,1000)) 
recall = np.zeros((3,1000))
FPR = np.zeros((3,1000))
precision = np.zeros((3,1000))
anchor_mean = 1 # mean of the anchor treatment effect

# Simulation
for time_seq in tqdm(range(1000)):

    # Generate the ture ATEs
    true_tao = np.random.normal(anchor_mean,3, K)
    optimal_cost = np.sum(true_tao[np.argwhere(true_tao>0)]) 

    # Generate the data   
    X = np.zeros((K,N))
    for i in range(K):
        random_set = np.random.choice(range(N), 5, replace=False)
        for j in random_set:
            X[i,j] = 1
    Y = np.zeros((K,N))
    for i in range(K):
        for j in range(N):
            Y[i,j] = true_tao[i] * X[i,j] + np.random.normal(0,std)

    #IHT
    tao_hat = np.zeros(K)
    variance = np.zeros(K)
    p_value_list = np.zeros(K)
    for k in range(K):
        group_1 = Y[k,X[k,:]==1]
        group_0 = Y[k,X[k,:]==0]
        t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
        diff_mean = group_1.mean() - group_0.mean()
        tao_hat[k] = diff_mean
        p_value_list[k] = p_value
        variance[k] = N*(group_1.var(ddof=1) / len(group_1) + group_0.var(ddof=1) / len(group_0))
    
    
    tao_0 = np.mean(tao_hat)
    numerator = np.mean(variance)
    denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N
    
    # Bayesian Decision Making
    bayesian_tao = np.zeros(K)
    bayesian_beta = np.zeros(K)

    decision3 = []

    for k in range(K):
        if denumerator <= 0:
            theta = 1
            posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
            posteri_var = 1/(N/variance[k])
            dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))
            prob = dist.sf(0)  # survival function: P(X > x)
            if prob > 1 - 0.025:
                decision3.append(k)
        else:
            bayesian_beta[k] = max(variance[k]/denumerator,0)
            theta = N/(N+bayesian_beta[k])
            posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
            posteri_var = 1/(1/denumerator+ N/variance[k])
            dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))

            prob = dist.sf(0)  # survival function: P(X > x)
            if prob > 1 - 0.025:
                decision3.append(k)


    #DPTR
    beta = numerator/denumerator + Z*np.sqrt(N*numerator)/tao_0
    beta = max(0,beta)
    theta = N/(N+beta)
    tao_shunken_hat = np.zeros(K)
    p_value_list_shrunken = np.zeros(K)
    Y_shunken = deepcopy(Y)
    Y_shunken = theta*Y_shunken 
    for k in range(K):
        group_1 = Y_shunken[k,X[k,:]==1] + (1-theta)*tao_0
        group_0 = Y_shunken[k,X[k,:]==0]
        t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
        diff_mean1 = group_1.mean() - group_0.mean()
        tao_shunken_hat[k] = diff_mean1
        p_value_list_shrunken[k] = p_value
    

    decision1 = np.intersect1d(np.argwhere(p_value_list<0.05), np.argwhere(tao_hat>0))
    decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<0.05), np.argwhere(tao_shunken_hat>0))

    # Calculate the optimality ratio
    cost[0,time_seq] = np.sum(true_tao[decision1])/optimal_cost
    cost[1,time_seq] = np.sum(true_tao[decision2])/optimal_cost
    cost[2,time_seq] = np.sum(true_tao[decision3])/optimal_cost

    # Calculate the performance metrics
    for k in range(K):
        if (true_tao[k] < 0 and k not in decision1) or (true_tao[k] > 0 and k in decision1):
            accuracy[0,time_seq] += 1
        if (true_tao[k] < 0 and k not in decision2) or (true_tao[k] > 0 and k in decision2):
            accuracy[1,time_seq] += 1
        if (true_tao[k] < 0 and k not in decision3) or (true_tao[k] > 0 and k in decision3):
                    accuracy[2,time_seq] += 1
        if true_tao[k] > 0 and k in decision1:
            recall[0,time_seq] += 1
        if true_tao[k] > 0 and k in decision2:
            recall[1,time_seq] += 1
        if true_tao[k] > 0 and k in decision3:
                    recall[2,time_seq] += 1
        if true_tao[k] < 0 and k in decision1:
            FPR[0,time_seq] += 1
        if true_tao[k] <0 and k in decision2:
            FPR[1,time_seq] += 1
        if true_tao[k] < 0 and k in decision3:
                    FPR[2,time_seq] += 1
    precision[0,time_seq] = recall[0,time_seq]/(recall[0,time_seq] +FPR[0,time_seq] )
    precision[1,time_seq] = recall[1,time_seq]/(recall[1,time_seq] +FPR[1,time_seq] )
    precision[2,time_seq] = recall[2,time_seq]/(recall[2,time_seq] +FPR[2,time_seq] )
    accuracy[:,time_seq] = accuracy[:,time_seq]/K
    recall[:,time_seq] = recall[:,time_seq]/(len(np.argwhere(true_tao>0)))
    FPR[:,time_seq] = FPR[:,time_seq]/(len(np.argwhere(true_tao<0)))  

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st


def mean_ci(data_array):
    means = []
    lowers = []
    uppers = []
    for d in data_array:
        mean = np.mean(d)
        ci_low, ci_high = np.percentile(d, [2.5, 97.5])
        means.append(mean)
        lowers.append(mean - ci_low)
        uppers.append(ci_high - mean)
    return np.array(means), np.array(lowers), np.array(uppers)

# Organize data for plotting
data = [
    cost[1,:] - cost[0,:],
    accuracy[1,:] - accuracy[0,:],
    recall[1,:] - recall[0,:],
    FPR[0,:] - FPR[1,:],
    precision[1,:] - precision[0,:]
]

data1 = [
    cost[2,:] - cost[0,:],
    accuracy[2,:] - accuracy[0,:],
    recall[2,:] - recall[0,:],
    FPR[0,:] - FPR[2,:],
    precision[2,:] - precision[0,:]
]

# Calculate means and confidence intervals
mean_dtr, err_low_dtr, err_up_dtr = mean_ci(data)
mean_bayes, err_low_bayes, err_up_bayes = mean_ci(data1)

# x-axis
x = np.arange(5)
width = 0.2

fig, ax = plt.subplots(figsize=(8, 6))
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)


ax.errorbar(x - width/2, mean_dtr, yerr=[err_low_dtr, err_up_dtr], fmt='o', 
            capsize=8, markersize=6, color='#495373', ecolor='#495373', label='DPTR', linewidth=2)


ax.errorbar(x + width/2, mean_bayes, yerr=[err_low_bayes, err_up_bayes], fmt='o', 
            capsize=8, markersize=6, color='#E3738B', ecolor='#E3738B', label='Bayesian', linewidth=2)


ax.set_xticks(x)
ax.set_xticklabels(['OR', 'Accuracy', 'Recall', 'Specificity', 'Precision'], fontsize=15)


ax.legend(fontsize=12, loc='upper right')
ax.set_ylim(-0.4, 0.8)
ax.tick_params(axis='y', labelsize=15)
ax.grid(axis='y', linestyle='--', alpha=0.6)
plt.savefig('Scenario1_no_feature_basic.png', dpi=300, bbox_inches='tight')

plt.tight_layout()
plt.show()

plt.show()

## 2 Cost calculation

### 2.1 Change with significance level for basic setting 

In [None]:
K = 100 # number of Experiments
N = 10 # number of samples in each experiment
std = 3 # standard deviation of noise
anchor_mean = 1 # mean of the anchor treatment effect

cost = np.zeros((3,5,1000)) # Optimality Ratio

for index1,alpha in enumerate([0.05,0.1,0.15,0.2,0.25]):
    Z = norm.ppf(1 - alpha/2)
    for time_seq in tqdm(range(1000)):
        
        # Generate the true ATEs
        true_tao = np.random.normal(anchor_mean,3, K)
        optimal_cost = np.sum(true_tao[np.argwhere(true_tao>0)])    

        # Generate the data
        X = np.zeros((K,N))
        for i in range(K):
            random_set = np.random.choice(range(N), 5, replace=False)
            for j in random_set:
                X[i,j] = 1
        Y = np.zeros((K,N))
        for i in range(K):
            for j in range(N):
                Y[i,j] = true_tao[i] * X[i,j] + np.random.normal(0,std)

        #DM
        tao_hat = np.zeros(K)
        variance = np.zeros(K)
        p_value_list = np.zeros(K)
        for k in range(K):
            group_1 = Y[k,X[k,:]==1]
            group_0 = Y[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean = group_1.mean() - group_0.mean()
            tao_hat[k] = diff_mean
            p_value_list[k] = p_value
            variance[k] = N*(group_1.var(ddof=1) / len(group_1) + group_0.var(ddof=1) / len(group_0))


        tao_0 = np.mean(tao_hat)
        numerator = np.mean(variance)
        denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N


        # Bayesian Decision Making
        bayesian_tao = np.zeros(K)
        bayesian_beta = np.zeros(K)
        decision3 = []
        for k in range(K):
            if denumerator <= 0:
                theta = 1
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))
                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - alpha/2:
                    decision3.append(k)
            else:
                bayesian_beta[k] = max(variance[k]/denumerator,0)
                theta = N/(N+bayesian_beta[k])
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(1/denumerator+ N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))

                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - alpha/2:
                    decision3.append(k)

        
        
        #DPTR
        beta = numerator/denumerator + Z*np.sqrt(N*numerator)/tao_0
        beta = max(0,beta)
        theta = N/(N+beta)
        tao_shunken_hat = np.zeros(K)
        p_value_list_shrunken = np.zeros(K)
        Y_shunken = deepcopy(Y)
        Y_shunken = theta*Y_shunken 
        for k in range(K):
            group_1 = Y_shunken[k,X[k,:]==1] + (1-theta)*tao_0
            group_0 = Y_shunken[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean1 = group_1.mean() - group_0.mean()
            tao_shunken_hat[k] = diff_mean1
            p_value_list_shrunken[k] = p_value
        
        decision1 = np.intersect1d(np.argwhere(p_value_list<alpha), np.argwhere(tao_hat>0))
        decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<alpha), np.argwhere(tao_shunken_hat>0))
       
        cost[0,index1,time_seq] = np.sum(true_tao[decision1])/optimal_cost
        cost[1,index1,time_seq] = np.sum(true_tao[decision2])/optimal_cost
        cost[2,index1,time_seq] = np.sum(true_tao[decision3])/optimal_cost
      
            
            
            

In [None]:
plt.figure(figsize=(8,6))
plt.xlabel(r'Significance level $\alpha$',fontsize=15)
plt.ylabel('Optimality Ratio (OR)',fontsize=15)
x = [0.05,0.1,0.15,0.2,0.25]
y3 = np.mean(cost[0],axis=1)
plt.plot(x, y3, color = '#8CA5EA',linestyle = '--', marker='o',label = "IHT")

y3 = np.mean(cost[1],axis=1)
plt.plot(x, y3, color = '#495373',marker = "s",label = "DPTR")

y3 = np.mean(cost[2],axis=1)
plt.plot(x, y3, color = '#E3738B',marker = "s",label = "Bayesian",linestyle = '--')#

plt.xticks([0.05,0.1,0.15,0.2,0.25],fontsize=15)
plt.yticks(fontsize=15)
plt.legend(fontsize=15)
plt.grid()
plt.savefig('performance_compare_with_alpha.png',dpi=300,bbox_inches = 'tight')
plt.show()

### 2.2 Change with anchor mean - Uniform-Uniform setting 

In [None]:
K = 100 # number of Experiments
N = 10 # number of samples in each experiment
Z = norm.ppf(0.975) # critical value for 95% confidence interval
std = 3 # standard deviation of noise

cost = np.zeros((3,5,1000)) # Optimality Ratio

for index1,anchor_mean in enumerate([1,2,3,4,5]):
    for time_seq in tqdm(range(1000)):

        # Generate the true ATEs using a uniform distribution
        true_tao = np.random.uniform(anchor_mean - np.sqrt(9*12)/2, anchor_mean + np.sqrt(9*12)/2, K)
        optimal_cost = np.sum(true_tao[np.argwhere(true_tao>0)])    

        # Generate the data
        X = np.zeros((K,N))
        for i in range(K):
            random_set = np.random.choice(range(N), 5, replace=False)
            for j in random_set:
                X[i,j] = 1
        Y = np.zeros((K,N))
        for i in range(K):
            for j in range(N):
                Y[i,j] = true_tao[i] * X[i,j] + np.random.uniform(-np.sqrt(std**2*12)/2,np.sqrt(std**2*12)/2)

        #DM
        tao_hat = np.zeros(K)
        variance = np.zeros(K)
        p_value_list = np.zeros(K)
        for k in range(K):
            group_1 = Y[k,X[k,:]==1]
            group_0 = Y[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean = group_1.mean() - group_0.mean()
            tao_hat[k] = diff_mean
            p_value_list[k] = p_value
            variance[k] = N*(group_1.var(ddof=1) / len(group_1) + group_0.var(ddof=1) / len(group_0))
        
        tao_0 = np.mean(tao_hat)
        numerator = np.mean(variance)
        denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N

        # Bayesian Decision Making 
        bayesian_tao = np.zeros(K)
        bayesian_beta = np.zeros(K)
        
        decision3 = []
        for k in range(K):
            if denumerator <= 0:
                theta = 1
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))
                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
            else:
                bayesian_beta[k] = max(variance[k]/denumerator,0)
                theta = N/(N+bayesian_beta[k])
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(1/denumerator+ N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))

                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
     

        
        #DPTR
        beta = numerator/denumerator + Z*np.sqrt(N*numerator)/tao_0
        beta = max(0,beta)
        theta = N/(N+beta)
        tao_shunken_hat = np.zeros(K)
        p_value_list_shrunken = np.zeros(K)
        
        Y_shunken = deepcopy(Y)
        Y_shunken = theta*Y_shunken 
        for k in range(K):
            group_1 = Y_shunken[k,X[k,:]==1] + (1-theta)*tao_0
            group_0 = Y_shunken[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean1 = group_1.mean() - group_0.mean()
            tao_shunken_hat[k] = diff_mean1
            p_value_list_shrunken[k] = p_value
        
        decision1 = np.intersect1d(np.argwhere(p_value_list<0.05), np.argwhere(tao_hat>0))
        decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<0.05), np.argwhere(tao_shunken_hat>0))
       
        cost[0,index1,time_seq] = (np.sum(true_tao[decision1]) )/(optimal_cost )
        cost[1,index1,time_seq] = (np.sum(true_tao[decision2]) )/(optimal_cost )
        cost[2,index1,time_seq] = (np.sum(true_tao[decision3]) )/(optimal_cost )

In [None]:
plt.figure(figsize=(8,6))
plt.xlabel(r'The value $\tau_0$',fontsize=15)
plt.ylabel('Optimality Ratio (OR)',fontsize=15)
x = [1,2,3,4,5]
y3 = np.mean(cost[0],axis=1)
plt.plot(x, y3, color = '#8CA5EA',linestyle = '--', marker='o',label = "IHT")

y3 = np.mean(cost[1],axis=1)
plt.plot(x, y3, color = '#495373',marker = "s",label = "DPTR")

y3 = np.mean(cost[2],axis=1)
plt.plot(x, y3, color = '#E3738B',marker = "s",label = "Bayesian",linestyle = '--')

plt.xticks([1,2,3,4,5],fontsize=15)
plt.yticks(fontsize=15)
plt.legend(fontsize=15)

plt.grid()
plt.savefig('performance_compare_with_tau0_uniform.png',dpi=300,bbox_inches = 'tight')
plt.show()

### 2.3 Change with anchor mean - Normal-Normal Setting

In [None]:
K = 100
N = 10
Z = norm.ppf(0.975)
std = 3

cost = np.zeros((3,5,1000))


for index1,anchor_mean in enumerate([1,2,3,4,5]):
    for time_seq in tqdm(range(1000)):
        true_tao = np.random.normal(anchor_mean,3, K)
        optimal_cost = np.sum(true_tao[np.argwhere(true_tao>0)])    
        X = np.zeros((K,N))
        for i in range(K):
            random_set = np.random.choice(range(N), 5, replace=False)
            for j in random_set:
                X[i,j] = 1
        Y = np.zeros((K,N))
        for i in range(K):
            for j in range(N):
                Y[i,j] = true_tao[i] * X[i,j] + np.random.normal(0,std)

        #DM
        tao_hat = np.zeros(K)
        variance = np.zeros(K)
        p_value_list = np.zeros(K)
        for k in range(K):
            group_1 = Y[k,X[k,:]==1]
            group_0 = Y[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean = group_1.mean() - group_0.mean()
            tao_hat[k] = diff_mean
            p_value_list[k] = p_value
            variance[k] = N*(group_1.var(ddof=1) / len(group_1) + group_0.var(ddof=1) / len(group_0))
       

        tao_0 = np.mean(tao_hat)
        numerator = np.mean(variance)
        denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N
 
        # Bayesian Decision Making
        bayesian_tao = np.zeros(K)
        bayesian_beta = np.zeros(K)
        
        decision3 = []
        for k in range(K):
            if denumerator <= 0:
                theta = 1
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))
                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
            else:
                bayesian_beta[k] = max(variance[k]/denumerator,0)
                theta = N/(N+bayesian_beta[k])
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(1/denumerator+ N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))

                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)

        
        
        #DPTR
        beta = numerator/denumerator + Z*np.sqrt(N*numerator)/tao_0
        beta = max(0,beta)
        theta = N/(N+beta)

        tao_shunken_hat = np.zeros(K)
        p_value_list_shrunken = np.zeros(K)
        
        Y_shunken = deepcopy(Y)
        Y_shunken = theta*Y_shunken 
        for k in range(K):
            group_1 = Y_shunken[k,X[k,:]==1] + (1-theta)*tao_0
            group_0 = Y_shunken[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean1 = group_1.mean() - group_0.mean()
            tao_shunken_hat[k] = diff_mean1
            p_value_list_shrunken[k] = p_value
        
        
        decision1 = np.intersect1d(np.argwhere(p_value_list<0.05), np.argwhere(tao_hat>0))
        decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<0.05), np.argwhere(tao_shunken_hat>0))
        
        cost[0,index1,time_seq] = np.sum(true_tao[decision1])/optimal_cost
        cost[1,index1,time_seq] = np.sum(true_tao[decision2])/optimal_cost
        cost[2,index1,time_seq] = np.sum(true_tao[decision3])/optimal_cost
    
            
            
            

In [None]:
plt.figure(figsize=(8,6))
plt.xlabel(r'The value of $\tau_0$',fontsize=15)
plt.ylabel('Optimality Ratio (OR)',fontsize=15)
x = [1,2,3,4,5]
y3 = np.mean(cost[0],axis=1)
plt.plot(x, y3, color = '#8CA5EA',linestyle = '--', marker='o',label = "IHT")

y3 = np.mean(cost[1],axis=1)
plt.plot(x, y3, color = '#495373',marker = "s",label = "DPTR")

y3 = np.mean(cost[2],axis=1)
plt.plot(x, y3, color = '#E3738B',marker = "s",label = "Bayesian",linestyle = '--')

plt.xticks([1,2,3,4,5],fontsize=15)
plt.yticks(fontsize=15)
plt.legend(fontsize=15)
plt.grid()
plt.savefig('performance_compare_with_tau0.png',dpi=300,bbox_inches = 'tight')
plt.show()

### 2.4 Change with number of products

In [None]:
N = 10
Z = norm.ppf(0.975)
std = 3

cost = np.zeros((3,5,1000))

anchor_mean = 1
K_list = [20,40,60,80,100]
for index1,K in enumerate(K_list):
    for time_seq in tqdm(range(1000)): 
        # Generate the true ATEs
        true_tao = np.random.normal(anchor_mean,3, K)
        optimal_cost = np.sum(true_tao[np.argwhere(true_tao>0)])    

        # Generate the data
        X = np.zeros((K,N))
        for i in range(K):
            random_set = np.random.choice(range(N), 5, replace=False)
            for j in random_set:
                X[i,j] = 1
        Y = np.zeros((K,N))
        for i in range(K):
            for j in range(N):
                Y[i,j] = true_tao[i] * X[i,j] + np.random.normal(0,std)

        #DM
        tao_hat = np.zeros(K)
        variance = np.zeros(K)
        p_value_list = np.zeros(K)
        for k in range(K):
            group_1 = Y[k,X[k,:]==1]
            group_0 = Y[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean = group_1.mean() - group_0.mean()
            tao_hat[k] = diff_mean
            p_value_list[k] = p_value
            variance[k] = N*(group_1.var(ddof=1) / len(group_1) + group_0.var(ddof=1) / len(group_0))
        

        tao_0 = np.mean(tao_hat)
        numerator = np.mean(variance)
        denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N
  
        # Bayesian Decision Making 
        bayesian_tao = np.zeros(K)
        bayesian_beta = np.zeros(K)
       
        decision3 = []
   
        for k in range(K):
            if denumerator <= 0:
                theta = 1
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))
                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
            else:
                bayesian_beta[k] = max(variance[k]/denumerator,0)
                theta = N/(N+bayesian_beta[k])
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(1/denumerator+ N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))

                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
        
        #DPTR
        beta = numerator/denumerator + Z*np.sqrt(N*numerator)/tao_0
        beta = max(0,beta)
        theta = N/(N+beta)
        
        tao_shunken_hat = np.zeros(K)
        p_value_list_shrunken = np.zeros(K)
        
        Y_shunken = deepcopy(Y)
        Y_shunken = theta*Y_shunken 
        for k in range(K):
            group_1 = Y_shunken[k,X[k,:]==1] + (1-theta)*tao_0
            group_0 = Y_shunken[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean1 = group_1.mean() - group_0.mean()
            tao_shunken_hat[k] = diff_mean1
            p_value_list_shrunken[k] = p_value
        
        decision1 = np.intersect1d(np.argwhere(p_value_list<0.05), np.argwhere(tao_hat>0))
        decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<0.05), np.argwhere(tao_shunken_hat>0))
        
        cost[0,index1,time_seq] = np.sum(true_tao[decision1])/optimal_cost
        cost[1,index1,time_seq] = np.sum(true_tao[decision2])/optimal_cost
        cost[2,index1,time_seq] = np.sum(true_tao[decision3])/optimal_cost

In [None]:
plt.figure(figsize=(8,6))
plt.xlabel(r'The number of experiments $K$',fontsize=15)
plt.ylabel('Optimality Ratio (OR)',fontsize=15)
x = [20,40,60,80,100]
y3 = np.mean(cost[0],axis=1)
plt.plot(x, y3, color = '#8CA5EA',linestyle = '--', marker='o',label = "IHT")

y3 = np.mean(cost[1],axis=1)
plt.plot(x, y3, color = '#495373',marker = "s",label = "DPTR")

y3 = np.mean(cost[2],axis=1)
plt.plot(x, y3, color = '#E3738B',marker = "s",label = "Bayesian",linestyle = '--')#

plt.xticks([20,40,60,80,100],fontsize=15)
plt.yticks(fontsize=15)
plt.legend(fontsize=15)
plt.grid()
plt.savefig('performance_compare_with_K.png',dpi=300,bbox_inches = 'tight')
plt.show()

### 2.5 Change with sample size

In [None]:
K = 100
Z = norm.ppf(0.975)
std = 3

cost = np.zeros((3,5,1000))

anchor_mean = 1
N_list = [10,15,20,25,30]
for index1,N in enumerate(N_list):
    for time_seq in tqdm(range(1000)): 
        # Generate the true ATEs
        true_tao = np.random.normal(anchor_mean,3, K)
        optimal_cost = np.sum(true_tao[np.argwhere(true_tao>0)])    

        # Generate the data
        X = np.zeros((K,N))
        for i in range(K):
            random_set = np.random.choice(range(N), int(N/2), replace=False)
            for j in random_set:
                X[i,j] = 1
        Y = np.zeros((K,N))
        for i in range(K):
            for j in range(N):
                Y[i,j] = true_tao[i] * X[i,j] + np.random.normal(0,std)

        #DM
        tao_hat = np.zeros(K)
        variance = np.zeros(K)
        p_value_list = np.zeros(K)
        for k in range(K):
            group_1 = Y[k,X[k,:]==1]
            group_0 = Y[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean = group_1.mean() - group_0.mean()
            tao_hat[k] = diff_mean
            p_value_list[k] = p_value
            variance[k] = N*(group_1.var(ddof=1) / len(group_1) + group_0.var(ddof=1) / len(group_0))
        

        tao_0 = np.mean(tao_hat)
        numerator = np.mean(variance)
        denumerator = np.mean((tao_hat - tao_0)**2) - numerator/N
  
        # Bayesian Decision Making 
        bayesian_tao = np.zeros(K)
        bayesian_beta = np.zeros(K)
       
        decision3 = []
   
        for k in range(K):
            if denumerator <= 0:
                theta = 1
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))
                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
            else:
                bayesian_beta[k] = max(variance[k]/denumerator,0)
                theta = N/(N+bayesian_beta[k])
                posteri_mean = tao_hat[k]*theta + (1 - theta)*tao_0
                posteri_var = 1/(1/denumerator+ N/variance[k])
                dist = norm(loc=posteri_mean, scale=np.sqrt(posteri_var))

                prob = dist.sf(0)  # survival function: P(X > x)
                if prob > 1 - 0.025:
                    decision3.append(k)
        
        #DPTR
        beta = numerator/denumerator + Z*np.sqrt(N*numerator)/tao_0
        beta = max(0,beta)
        theta = N/(N+beta)
        
        tao_shunken_hat = np.zeros(K)
        p_value_list_shrunken = np.zeros(K)
        
        Y_shunken = deepcopy(Y)
        Y_shunken = theta*Y_shunken 
        for k in range(K):
            group_1 = Y_shunken[k,X[k,:]==1] + (1-theta)*tao_0
            group_0 = Y_shunken[k,X[k,:]==0]
            t_stat, p_value = ttest_ind(group_1, group_0, equal_var = False)  
            diff_mean1 = group_1.mean() - group_0.mean()
            tao_shunken_hat[k] = diff_mean1
            p_value_list_shrunken[k] = p_value
        
        decision1 = np.intersect1d(np.argwhere(p_value_list<0.05), np.argwhere(tao_hat>0))
        decision2 = np.intersect1d(np.argwhere(p_value_list_shrunken<0.05), np.argwhere(tao_shunken_hat>0))
        
        cost[0,index1,time_seq] = np.sum(true_tao[decision1])/optimal_cost
        cost[1,index1,time_seq] = np.sum(true_tao[decision2])/optimal_cost
        cost[2,index1,time_seq] = np.sum(true_tao[decision3])/optimal_cost

In [None]:
plt.figure(figsize=(8,6))
plt.xlabel(r'Sample size $N$',fontsize=15)
plt.ylabel('Optimality Ratio (OR)',fontsize=15)
x = [10,15,20,25,30]
y3 = np.mean(cost[0],axis=1)
plt.plot(x, y3, color = '#8CA5EA',linestyle = '--', marker='o',label = "IHT")

y3 = np.mean(cost[1],axis=1)
plt.plot(x, y3, color = '#495373',marker = "s",label = "DPTR")

y3 = np.mean(cost[2],axis=1)
plt.plot(x, y3, color = '#E3738B',marker = "s",label = "Bayesian",linestyle = '--')#

plt.xticks([10,15,20,25,30],fontsize=15)
plt.yticks(fontsize=15)
plt.legend(fontsize=15)
plt.grid()
plt.savefig('performance_compare_with_N.png',dpi=300,bbox_inches = 'tight')
plt.show()