In [1]:
import numpy as np
import pandas as pd

from tqdm import tqdm
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

import simulate_data.simulate_data as sd

In [2]:
for i in range(1000):

    n=250
    # data
    data = sd.make_zaidi_data_A(n, seed=i)
    X = data["X"]
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((X.copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    X_names = ["X"+str(i) for i in range(X.shape[1])]
    column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                X, 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_A/"+str(i%10)+"/zaidi_data_A_seed="+str(i)+".csv", index=False)

In [3]:
for i in range(1000):
    n=250
    # data
    data = sd.make_zaidi_data_B(n, seed=i)
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    column_names=['X0','X1','X2','X3','X4', 'Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                data["X"], 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_B/zaidi_data_B_seed="+str(i)+".csv", index=False)

In [4]:
#linnlin = ["linear","nonlinear"]
#hethom = ["homogeneous", "heterogeneous"]
#n_size = [250,500]
#for ll in range(len(linnlin)):
#    for hh in range(len(hethom)):
#        for n in range(len(n_size)):
#            for i in range(1000):
#                # data
#                data = sd.make_hahn_data(
#                    function_type=linnlin[ll], 
#                    effect_type=hethom[hh], 
#                    n_in_study=n_size[n],
#                    seed=i
#                )
#                Y=data["Y"]
#                W=data["W"]
#                pi=data["p"]
#                data["Y_i_star"] = sd.get_Y_i_star(Y,W,pi)
#                
#                X=data[["X0","X1","X2","X3","X4_2","X4_3","X5","X1_X3"]].copy()
#                X['intercept'] = 1
#                max_abs_scaler = preprocessing.MaxAbsScaler()
#                X_maxabs = max_abs_scaler.fit_transform(X)
#                clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
#                p_hat = clf.predict_proba(X_maxabs)[:,1]
#                
#                data.to_csv(
#                    "simulate_data/hahn_data_"+
#                    linnlin[ll]+"_"+ hethom[hh]+"_n="+ str(n_size[n])+
#                    "/hahn_data_seed="+str(i)+".csv", index=False)

In [5]:
for i in tqdm(range(1000)):

    n=250
    # data
    data = sd.make_zaidi_data_A(n, seed=i, variance=1)
    X = data["X"]
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    X_names = ["X"+str(i) for i in range(X.shape[1])]
    column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                X, 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_A_var=1/"+str(i%10)+"/zaidi_data_A_seed="+str(i)+".csv", index=False)
    

100%|██████████| 1000/1000 [00:23<00:00, 42.85it/s]


In [6]:
for i in tqdm(range(1000)):
    n=250
    # data
    data = sd.make_zaidi_data_B(n, seed=i, variance=1)
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    column_names=['X0','X1','X2','X3','X4', 'Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                data["X"], 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_B_var=1/zaidi_data_B_seed="+str(i)+".csv", index=False)

100%|██████████| 1000/1000 [00:10<00:00, 96.56it/s]


In [7]:
for i in tqdm(range(1000)):

    n=250
    # data
    data = sd.make_zaidi_data_A(n, seed=i, variance=25)
    X = data["X"]
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    X_names = ["X"+str(i) for i in range(X.shape[1])]
    column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                X, 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_A_var=25/"+str(i%10)+"/zaidi_data_A_seed="+str(i)+".csv", index=False)
    

100%|██████████| 1000/1000 [00:21<00:00, 45.58it/s]


In [8]:
for i in tqdm(range(1000)):
    n=250
    # data
    data = sd.make_zaidi_data_B(n, seed=i, variance=25)
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    column_names=['X0','X1','X2','X3','X4', 'Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                data["X"], 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_B_var=25/zaidi_data_B_seed="+str(i)+".csv", index=False)

100%|██████████| 1000/1000 [00:08<00:00, 117.51it/s]


In [9]:
for i in tqdm(range(1000)):

    n=250
    # data
    data = sd.make_zaidi_data_A(n, seed=i, variance=1000)
    X = data["X"]
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    X_names = ["X"+str(i) for i in range(X.shape[1])]
    column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                X, 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_A_var=1000/"+str(i%10)+"/zaidi_data_A_seed="+str(i)+".csv", index=False)
    

100%|██████████| 1000/1000 [00:21<00:00, 45.75it/s]


In [10]:
for i in tqdm(range(1000)):

    n=250
    # data
    data = sd.make_zaidi_data_B(n, seed=i, variance=1000)
    X = data["X"]
    Y=data["Y"]
    W=data["W"]
    pi=data["p"]
    Y_i_star = sd.get_Y_i_star(Y,W,pi)
    
    X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
    max_abs_scaler = preprocessing.MaxAbsScaler()
    X_maxabs = max_abs_scaler.fit_transform(X_copy)
    clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
    p_hat = clf.predict_proba(X_maxabs)[:,1]
    
    X_names = ["X"+str(i) for i in range(X.shape[1])]
    column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)']
    output_data = pd.DataFrame(
        np.hstack(
            (
                X, 
                data["Y"].reshape(n,1), 
                data["W"].reshape(n,1), 
                data['p'].reshape(n,1), 
                data['tau'].reshape(n,1), 
                data['Y1'].reshape(n,1), 
                data['Y0'].reshape(n,1),
                Y_i_star.reshape(n,1),
                p_hat.reshape(n,1),
                data['h(x)'].reshape(n,1), 
            )
        ),
        columns=column_names
    )
    output_data.to_csv("simulate_data/zaidi_data_B_var=1000/zaidi_data_B_seed="+str(i)+".csv", index=False)
    

100%|██████████| 1000/1000 [00:08<00:00, 113.57it/s]


In [8]:
percent_variation = [0.001,0.005,0.01,0.05,0.1,0.5,1.0, 2.0,10.0,100.0]
for i in tqdm(range(20)):
    for pv in percent_variation:
        n=250#1000
        # data
        data = sd.make_CMM_data_B(n, per_var=pv, seed=i)
        X = data["X"]
        Y=data["Y_obs"]
        W=data["W"]
        pi=data["p"]
        Y_i_star = data["Y_i_star"]
        sig = np.ones(n)*data['sig']
        
        X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
        max_abs_scaler = preprocessing.MaxAbsScaler()
        X_maxabs = max_abs_scaler.fit_transform(X_copy)
        clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
        p_hat = clf.predict_proba(X_maxabs)[:,1]
        
        X_names = ["X"+str(i) for i in range(X.shape[1])]
        column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)','sig']
        output_data = pd.DataFrame(
            np.hstack(
                (
                    X, 
                    data["Y_obs"].reshape(n,1), 
                    data["W"].reshape(n,1), 
                    data['p'].reshape(n,1), 
                    data['g(x)'].reshape(n,1), 
                    data['Y1'].reshape(n,1), 
                    data['Y0'].reshape(n,1),
                    Y_i_star.reshape(n,1),
                    p_hat.reshape(n,1),
                    data['h(x)'].reshape(n,1),
                    sig.reshape(n,1),
                )
            ),
            columns=column_names
        )
        output_data.to_csv("simulate_data/CMM_data_B_var_percent="+str(pv)+"/CMM_data_B_seed="+str(i)+".csv", index=False)
        

  5%|▌         | 1/20 [00:00<00:02,  7.21it/s]

Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.027603791041122608
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.061723953204649844
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.08729085174529808
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.19518827831634267
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.2760379104112261
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.6172395320464985
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 0.8729085174529808
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 1.234479064092997
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 2.760379104112261
Var(g): 0.7619692798419608
Var(h): 1307.9290317385037
sig= 8.729085174529807
Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig= 0.028030278285808754
Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig= 0.06267760767530466
Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig=

 15%|█▌        | 3/20 [00:00<00:02,  7.62it/s]

Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig= 0.8863952283151584
Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig= 1.253552153506093
Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig= 2.803027828580875
Var(g): 0.7856965007798817
Var(h): 1213.6296392948934
sig= 8.863952283151583
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.02727996738897105
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.06099986150571671
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.08626683144426506
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.19289849931289302
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.2727996738897105
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.6099986150571671
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 0.8626683144426506
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 1.2199972301143343
Var(g): 0.7441966207433238
Var(h): 915.3883343018998
sig= 2.72799673889

 25%|██▌       | 5/20 [00:00<00:01,  8.07it/s]

Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 0.1918888877014437
Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 0.27137186745606945
Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 0.6068059428128343
Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 0.858153194054503
Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 1.2136118856256686
Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 2.713718674560695
Var(g): 0.7364269044659454
Var(h): 925.7580671233713
sig= 8.58153194054503
Var(g): 0.7628364033680546
Var(h): 1342.1464253715665
sig= 0.027619493177248104
Var(g): 0.7628364033680546
Var(h): 1342.1464253715665
sig= 0.061759064248418406
Var(g): 0.7628364033680546
Var(h): 1342.1464253715665
sig= 0.08734050625958466
Var(g): 0.7628364033680546
Var(h): 1342.1464253715665
sig= 0.19529930918567717
Var(g): 0.7628364033680546
Var(h): 1342.1464253715665
sig= 0.27619493177248106
Var(g): 0.7628364033680546
Var(h): 1342.1464253715665
sig= 0.617590

 30%|███       | 6/20 [00:00<00:01,  8.17it/s]

Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 0.058652858736159004
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 0.08294766829662933
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 0.18547662488636737
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 0.26230355841748787
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 0.5865285873615901
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 0.8294766829662933
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 1.1730571747231802
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 2.6230355841748785
Var(g): 0.6880315675847647
Var(h): 1006.9831193904547
sig= 8.294766829662933
Var(g): 0.6612976205035555
Var(h): 1379.8165472800113
sig= 0.025715707660952197
Var(g): 0.6612976205035555
Var(h): 1379.8165472800113
sig= 0.057502070419401224
Var(g): 0.6612976205035555
Var(h): 1379.8165472800113
sig= 0.08132020785164998
Var(g): 0.6612976205035555
Var(h): 1379.8165472800113
s

 40%|████      | 8/20 [00:00<00:01,  8.42it/s]

Var(g): 0.6612976205035555
Var(h): 1379.8165472800113
sig= 8.132020785164997
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.02795764020272543
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.06251518398377506
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.08840982104410398
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.19769036973320794
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.2795764020272543
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.6251518398377506
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 0.8840982104410398
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 1.2503036796755012
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 2.795764020272543
Var(g): 0.7816296457050491
Var(h): 1350.8770718517346
sig= 8.840982104410399
Var(g): 0.8294445961503066
Var(h): 724.4871467072211
sig= 0.028800079794165618
Var(g): 0.8294445961503066
Var(h): 724.4871467072211
sig= 0.06

 50%|█████     | 10/20 [00:01<00:01,  8.46it/s]

Var(g): 0.8294445961503066
Var(h): 724.4871467072211
sig= 0.9107384894415667
Var(g): 0.8294445961503066
Var(h): 724.4871467072211
sig= 1.2879787235434494
Var(g): 0.8294445961503066
Var(h): 724.4871467072211
sig= 2.8800079794165616
Var(g): 0.8294445961503066
Var(h): 724.4871467072211
sig= 9.107384894415667
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.027113758864221224
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.06062820794593615
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.08574123393951916
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.19172322756347693
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.27113758864221227
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.6062820794593615
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 0.8574123393951915
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 1.212564158918723
Var(g): 0.735155919747135
Var(h): 1038.0015556824192
sig= 2.711375886422

 60%|██████    | 12/20 [00:01<00:00,  8.62it/s]

Var(g): 0.7071640642577652
Var(h): 1276.1466652767851
sig= 0.2659255655738585
Var(g): 0.7071640642577652
Var(h): 1276.1466652767851
sig= 0.5946276415782255
Var(g): 0.7071640642577652
Var(h): 1276.1466652767851
sig= 0.8409304752818543
Var(g): 0.7071640642577652
Var(h): 1276.1466652767851
sig= 1.189255283156451
Var(g): 0.7071640642577652
Var(h): 1276.1466652767851
sig= 2.659255655738585
Var(g): 0.7071640642577652
Var(h): 1276.1466652767851
sig= 8.409304752818542
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.02678632392591897
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.05989604116568385
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.0847057937489673
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.18940791291076767
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.2678632392591897
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.5989604116568386
Var(g): 0.717507149464259
Var(h): 931.022290955333
sig= 0.8470579374896732
Var(g):

 70%|███████   | 14/20 [00:01<00:00,  8.90it/s]

Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 0.08903161006730488
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 0.19908073225674833
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 0.2815426715646604
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 0.6295485521854777
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 0.8903161006730488
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 1.2590971043709553
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 2.8154267156466037
Var(g): 0.7926627591176623
Var(h): 1189.115381695822
sig= 8.903161006730487
Var(g): 0.7578659450011087
Var(h): 991.1076069896055
sig= 0.027529365139812228
Var(g): 0.7578659450011087
Var(h): 991.1076069896055
sig= 0.061557531830033145
Var(g): 0.7578659450011087
Var(h): 991.1076069896055
sig= 0.08705549638024636
Var(g): 0.7578659450011087
Var(h): 991.1076069896055
sig= 0.19466200772121775
Var(g): 0.7578659450011087
Var(h): 991.1076069896055
sig= 0.275293651

 75%|███████▌  | 15/20 [00:01<00:00,  8.88it/s]

Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 0.06314057612986795
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 0.08929425909891017
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 0.19966803334564223
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 0.28237324072977066
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 0.6314057612986794
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 0.8929425909891016
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 1.2628115225973588
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 2.8237324072977064
Var(g): 0.79734647080233
Var(h): 1219.904303758814
sig= 8.929425909891016
Var(g): 0.7663852501471006
Var(h): 1479.3296268778074
sig= 0.02768366395813785
Var(g): 0.7663852501471006
Var(h): 1479.3296268778074
sig= 0.06190255447665712
Var(g): 0.7663852501471006
Var(h): 1479.3296268778074
sig= 0.08754343208642786
Var(g): 0.7663852501471006
Var(h): 1479.3296268778074
sig= 0.19575306512888893
Var(g)

 85%|████████▌ | 17/20 [00:01<00:00,  9.02it/s]

Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.027197352337871385
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.060815128635493244
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.08600557971377892
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.19231432268428658
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.27197352337871383
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.6081512863549324
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 0.8600557971377892
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 1.2163025727098649
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 2.7197352337871386
Var(g): 0.7396959741903181
Var(h): 928.8617332187657
sig= 8.600557971377892
Var(g): 0.7654138788247472
Var(h): 890.5024526119446
sig= 0.02766611427043464
Var(g): 0.7654138788247472
Var(h): 890.5024526119446
sig= 0.06186331218196885
Var(g): 0.7654138788247472
Var(h): 890.5024526119446
sig= 0.08748793

 95%|█████████▌| 19/20 [00:02<00:00,  9.04it/s]

Var(g): 0.7654138788247472
Var(h): 890.5024526119446
sig= 8.748793510106106
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.02840339196623771
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.06351191522807893
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.08981941188783954
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.20084231068026193
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.2840339196623771
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.6351191522807893
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 0.8981941188783953
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 1.2702383045615786
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 2.840339196623771
Var(g): 0.806752675187737
Var(h): 1278.2041597909736
sig= 8.981941188783953
Var(g): 0.7382913856882439
Var(h): 1095.6963692098004
sig= 0.027171517912848445
Var(g): 0.7382913856882439
Var(h): 1095.6963692098004
sig= 0.06075736110

100%|██████████| 20/20 [00:02<00:00,  8.65it/s]

Var(g): 0.7382913856882439
Var(h): 1095.6963692098004
sig= 2.7171517912848446
Var(g): 0.7382913856882439
Var(h): 1095.6963692098004
sig= 8.592388408866558





In [2]:
percent_variation = [0.001,0.005,0.01,0.05,0.1,0.5,1.0, 2.0,10.0,100.0]
for i in tqdm(range(20)):
    for pv in percent_variation:
        n=250#1000
        # data
        data = sd.make_CMM_data_C(n, per_var=pv, seed=i)
        X = data["X"]
        Y=data["Y_obs"]
        W=data["W"]
        pi=data["p"]
        Y_i_star = data["Y_i_star"]
        sig = np.ones(n)*data['sig']
        
        X_copy = np.hstack((data["X"].copy(), np.ones(len(Y)).reshape(n,1)))
        max_abs_scaler = preprocessing.MaxAbsScaler()
        X_maxabs = max_abs_scaler.fit_transform(X_copy)
        clf = LogisticRegression(random_state=0).fit(X_maxabs, W)
        p_hat = clf.predict_proba(X_maxabs)[:,1]
        
        X_names = ["X"+str(i) for i in range(X.shape[1])]
        column_names=X_names+['Y', 'W', 'p', 'tau', 'Y1', 'Y0', 'Y_i_star', 'p_hat','h(x)','sig']
        output_data = pd.DataFrame(
            np.hstack(
                (
                    X, 
                    data["Y_obs"].reshape(n,1), 
                    data["W"].reshape(n,1), 
                    data['p'].reshape(n,1), 
                    data['g(x)'].reshape(n,1), 
                    data['Y1'].reshape(n,1), 
                    data['Y0'].reshape(n,1),
                    Y_i_star.reshape(n,1),
                    p_hat.reshape(n,1),
                    data['h(x)'].reshape(n,1),
                    sig.reshape(n,1),
                )
            ),
            columns=column_names
        )
        output_data.to_csv("simulate_data/CMM_data_C_var_percent="+str(pv)+"/CMM_data_C_seed="+str(i)+".csv", index=False)
        

  5%|▌         | 1/20 [00:00<00:02,  8.31it/s]

Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.019575450172839817
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.04377203727662984
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.06190300876931059
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.13841933561994407
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.19575450172839814
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.4377203727662983
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.6190300876931059
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 0.8754407455325967
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 1.9575450172839814
Var(g): 0.38319824946933434
Var(h): 0.0
sig= 6.190300876931059
Var(g): 0.4259125390748779
Var(h): 0.0
sig= 0.02063764858395641
Var(g): 0.4259125390748779
Var(h): 0.0
sig= 0.0461471851294788
Var(g): 0.4259125390748779
Var(h): 0.0
sig= 0.06526197507545094
Var(g): 0.4259125390748779
Var(h): 0.0
sig= 0.14593021261460526
Var(g): 0.4259125390748779
Var(h): 0.0
sig= 0.20637648583956408
Var(g): 0.4259125390748779
Var(h): 0

 15%|█▌        | 3/20 [00:00<00:01,  8.65it/s]

Var(g): 0.4259125390748779
Var(h): 0.0
sig= 2.0637648583956407
Var(g): 0.4259125390748779
Var(h): 0.0
sig= 6.526197507545094
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.01930506565582738
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.04316743911652659
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.06104797785150676
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.13650742836487068
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.19305065655827383
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.43167439116526585
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.6104797785150675
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 0.8633487823305317
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 1.9305065655827383
Var(g): 0.37268555997580594
Var(h): 0.0
sig= 6.104797785150676
Var(g): 0.38720339254585645
Var(h): 0.0
sig= 0.01967748440593605
Var(g): 0.38720339254585645
Var(h): 0.0
sig= 0.04400019275786508
Var(g): 0.38720339254585645
Var(h): 0.0
sig= 0.06222566934520323
Var(g): 0.38720339254585645
Var(h):

 25%|██▌       | 5/20 [00:00<00:01,  9.01it/s]

Var(g): 0.38720339254585645
Var(h): 0.0
sig= 1.967748440593605
Var(g): 0.38720339254585645
Var(h): 0.0
sig= 6.222566934520323
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.020016118754841022
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.04475740218153297
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.06329652518171108
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.1415353330458332
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.20016118754841022
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.44757402181532974
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.6329652518171108
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 0.8951480436306595
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 2.001611875484102
Var(g): 0.4006450100078985
Var(h): 0.0
sig= 6.329652518171108
Var(g): 0.40439837055916317
Var(h): 0.0
sig= 0.020109658638553844
Var(g): 0.40439837055916317
Var(h): 0.0
sig= 0.04496656372012227
Var(g): 0.40439837055916317
Var(h): 0.0
sig= 0.06359232426631088
Var(g): 0.40439837055916317
Var(h): 0.0
sig=

 35%|███▌      | 7/20 [00:00<00:01,  9.54it/s]

Var(g): 0.40439837055916317
Var(h): 0.0
sig= 2.0109658638553842
Var(g): 0.40439837055916317
Var(h): 0.0
sig= 6.359232426631088
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.02025984082148006
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.04530238129015459
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.06406724202833376
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.14325870830628581
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.20259840821480057
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.45302381290154586
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.6406724202833377
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 0.9060476258030917
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 2.0259840821480055
Var(g): 0.4104611501117097
Var(h): 0.0
sig= 6.406724202833377
Var(g): 0.3861284470706293
Var(h): 0.0
sig= 0.019650151324369727
Var(g): 0.3861284470706293
Var(h): 0.0
sig= 0.04393907412944823
Var(g): 0.3861284470706293
Var(h): 0.0
sig= 0.06213923455198248
Var(g): 0.3861284470706293
Var(h): 0.0
sig= 0

 45%|████▌     | 9/20 [00:00<00:01,  9.95it/s]

Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.043306531908669295
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.061244684764583314
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.136947278394154
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.19367269843509652
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.43306531908669293
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.6124468476458331
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 0.8661306381733859
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 1.936726984350965
Var(g): 0.3750911411913183
Var(h): 0.0
sig= 6.124468476458331
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.01990550635929367
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.044510065345935
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.06294673807433399
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.140753185296085
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.1990550635929367
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.44510065345934996
Var(g): 0.3962291834198807
Var(h): 0.0
sig= 0.62946

 60%|██████    | 12/20 [00:01<00:00, 10.21it/s]

Var(g): 0.3759694341287809
Var(h): 0.0
sig= 0.1938993125642226
Var(g): 0.3759694341287809
Var(h): 0.0
sig= 0.4335720436840808
Var(g): 0.3759694341287809
Var(h): 0.0
sig= 0.6131634644438471
Var(g): 0.3759694341287809
Var(h): 0.0
sig= 0.8671440873681616
Var(g): 0.3759694341287809
Var(h): 0.0
sig= 1.938993125642226
Var(g): 0.3759694341287809
Var(h): 0.0
sig= 6.131634644438471
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.019703274788806402
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.04405786180712893
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.06230722569680133
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.1393231921474695
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.19703274788806405
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.4405786180712893
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.6230722569680133
Var(g): 0.388219037403214
Var(h): 0.0
sig= 0.8811572361425786
Var(g): 0.388219037403214
Var(h): 0.0
sig= 1.9703274788806402
Var(g): 0.388219037403214
Var(h): 0.0
sig= 6.230722569680133


 70%|███████   | 14/20 [00:01<00:00,  9.40it/s]

Var(g): 0.3939852501305951
Var(h): 0.0
sig= 0.4438385123727971
Var(g): 0.3939852501305951
Var(h): 0.0
sig= 0.6276824437011084
Var(g): 0.3939852501305951
Var(h): 0.0
sig= 0.8876770247455942
Var(g): 0.3939852501305951
Var(h): 0.0
sig= 1.9849061693959116
Var(g): 0.3939852501305951
Var(h): 0.0
sig= 6.276824437011084
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.020869712332669522
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.04666609544671476
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.06599582508374136
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.14757115111843142
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.20869712332669524
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.46666095446714756
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.6599582508374134
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 0.9333219089342951
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 2.0869712332669526
Var(g): 0.4355448928483784
Var(h): 0.0
sig= 6.599582508374135
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 0.019

 80%|████████  | 16/20 [00:01<00:00,  9.31it/s]

Var(g): 0.36139939135050136
Var(h): 0.0
sig= 0.13442458691595474
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 0.1901050739329441
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 0.42508786818168626
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 0.6011650283828072
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 0.8501757363633725
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 1.901050739329441
Var(g): 0.36139939135050136
Var(h): 0.0
sig= 6.011650283828072
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.019932192706077857
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.044569737831415566
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.06303112771260112
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.14094188626384693
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.19932192706077856
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.4456973783141557
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.6303112771260112
Var(g): 0.39729230607222327
Var(h): 0.0
sig= 0.8913947566283114
Var(g): 0.39729230607222327
Var(h

 85%|████████▌ | 17/20 [00:01<00:00,  9.45it/s]

Var(g): 0.3982667011275953
Var(h): 0.0
sig= 0.06310837512783825
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 0.1411146167354033
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 0.19956620483628867
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 0.44624360002558877
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 0.6310837512783825
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 0.8924872000511775
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 1.9956620483628869
Var(g): 0.3982667011275953
Var(h): 0.0
sig= 6.3108375127838245
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.02010823219177801
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.04496337408816522
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.06358781344553825
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.14218667340479865
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.2010823219177801
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.4496337408816522
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.6358781344553825
Var(g): 0.4043410018784575
Var(h): 0.0
sig= 0.89

100%|██████████| 20/20 [00:02<00:00,  9.99it/s]

Var(g): 0.39746900709512367
Var(h): 0.0
sig= 0.19936624766873748
Var(g): 0.39746900709512367
Var(h): 0.0
sig= 0.4457964822063559
Var(g): 0.39746900709512367
Var(h): 0.0
sig= 0.6304514311944447
Var(g): 0.39746900709512367
Var(h): 0.0
sig= 0.8915929644127119
Var(g): 0.39746900709512367
Var(h): 0.0
sig= 1.9936624766873747
Var(g): 0.39746900709512367
Var(h): 0.0
sig= 6.304514311944447
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.020135631881735016
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.0450246416574715
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.06367445887298483
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.14238041847050872
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.20135631881735017
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.45024641657471504
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.6367445887298483
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 0.9004928331494301
Var(g): 0.40544367127674363
Var(h): 0.0
sig= 2.0135631881735017
Var(g): 0.40544367127674363
Var(h)


