In [2]:
import numpy as np
from scipy import stats
import pandas as pd
import random
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [3]:
def fake_response(df):
    
    ###Designs from Anthey's paper
    #design 1
    n_x_1 = 1/2*df['x1'] + df['x2']
    k_x_1 = 1/2*df['x1']
    
    df['design1_y'] = n_x_1 + 1/2*(2*df['treatment'] - 1) * k_x_1 + df['error']
    
    #design 2
    n_x_2 = 1/2 * (df['x1'] + df['x2']) + df['x3'] + df['x4'] + df['x5'] + df['x6']
    x1_pos = df['x1'].apply(lambda x: x if x > 0 else 0)
    x2_pos = df['x2'].apply(lambda x: x if x > 0.5 else 0)
    k_x_2 = x1_pos + x2_pos
    df['design2_y'] = n_x_2 + 1/2*(2*df['treatment'] - 1) * k_x_2 + df['error'] 
    
    #design 3
    n_x_3 = 1/2 * (df['x1'] + df['x2']) + df['x3'] + df['x4'] + df['x5'] + df['x6']
    k_x_3 = (1/2)*df['x1'] + df['x2']**2
    df['design3_y'] = n_x_3 + (1/2)*(2*df['treatment'] - 1) * k_x_3 + df['error'] 
    
    
    return df

In [62]:
# create a fake data
fake_data = pd.DataFrame()

np.random.seed(123)
fake_data['x1'] = np.random.uniform(-4, 4, 8000)
fake_data['x2'] = np.random.normal(0, 0.5, 8000)
fake_data['x3'] = np.random.normal(1, 0.25, 8000)
fake_data['x4'] = np.random.normal(0.2, 0.75, 8000)
fake_data['x5'] = np.random.normal(0, 1.2, 8000)
fake_data['x6'] = np.random.normal(1.3, 2, 8000)
fake_data['x7'] = np.random.normal(0.95, 0.95, 8000)
fake_data['x8'] = np.random.normal(0, 1.5, 8000)
fake_data['x9'] = np.random.normal(2, 1, 8000)
fake_data['x10'] = np.random.normal(1, 2, 8000)
       

fake_data['treatment'] = np.concatenate((np.zeros(4000),np.ones(4000)), axis = 0)
fake_data['error'] = np.random.normal(0, 0.01, 8000)

fake_data.to_csv('large_unif.csv')
#fake_data = fake_response(fake_data)

In [59]:
fake_data.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,treatment,error
0,0.828739,3.076542,1.043453,-1.7893,-0.434404,3.925991,0.327626,2.239195,2.49481,3.436106,0.0,-0.002854
1,4.994691,-1.982354,0.765615,-0.179238,-0.270762,2.710324,1.723318,-0.351066,1.654482,2.873706,0.0,0.012634
2,3.565957,3.621342,1.10627,1.577282,-2.051984,-0.826568,0.404221,-1.613913,1.179329,3.381251,0.0,-0.01424
3,-0.012589,1.239896,1.009101,-0.578784,-0.082949,2.04707,2.719219,1.635631,3.268785,1.992967,0.0,0.006432
4,1.842799,2.978143,0.979962,-0.640457,-1.830551,-0.71667,-0.027173,1.902436,1.045111,-1.310024,0.0,-0.013631


In [41]:
fake_data.to_csv('fake_data_x2_rand.csv')

In [45]:
# x2 = 0.25
array_25 = np.array([0.25] * 1000)
fake_data_25 = fake_data.copy()
fake_data_25['x2'] = array_25
fake_data_25 = fake_response(fake_data_25)
fake_data_25.to_csv('fake_data_x2_25.csv')

In [46]:
fake_data_25.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,treatment,error,design1_y,design2_y,design3_y
0,-2.171261,0.25,0.556444,-0.137949,-0.565531,1.937527,1.165561,1.676773,1.67708,3.263077,0.0,-0.01241,-0.305225,0.817451,1.329016
1,1.994691,0.25,0.699656,0.657193,1.300887,4.322208,-1.328626,-0.00859,2.50116,0.35698,0.0,-0.003129,0.745543,7.101814,7.569236
2,0.565957,0.25,1.274064,1.080308,-0.455068,-0.973254,0.524535,2.277591,2.135932,1.784295,0.0,-0.008489,0.383,1.042562,1.152801
3,-3.012589,0.25,1.215259,0.853861,-0.434729,2.58492,1.137515,-1.1003,2.397014,-0.083996,0.0,0.02378,-0.479368,2.861796,3.583693
4,-1.157201,0.25,0.619908,1.628543,-0.817285,-0.955763,1.545997,-2.611219,1.733013,-2.026772,0.0,0.006575,-0.032725,0.028377,0.286427


In [47]:
# x2 = 0.25
array_75 = np.array([0.75] * 1000)
fake_data_75 = fake_data.copy()
fake_data_75['x2'] = array_75
fake_data_75 = fake_response(fake_data_75)
fake_data_75.to_csv('fake_data_x2_75.csv')

In [50]:
fake_data_75.head()

Unnamed: 0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,treatment,error,design1_y,design2_y,design3_y
0,-2.171261,0.75,0.556444,-0.137949,-0.565531,1.937527,1.165561,1.676773,1.67708,3.263077,0.0,-0.01241,0.194775,0.692451,1.329016
1,1.994691,0.75,0.699656,0.657193,1.300887,4.322208,-1.328626,-0.00859,2.50116,0.35698,0.0,-0.003129,1.245543,6.976814,7.569236
2,0.565957,0.75,1.274064,1.080308,-0.455068,-0.973254,0.524535,2.277591,2.135932,1.784295,0.0,-0.008489,0.883,0.917562,1.152801
3,-3.012589,0.75,1.215259,0.853861,-0.434729,2.58492,1.137515,-1.1003,2.397014,-0.083996,0.0,0.02378,0.020632,2.736796,3.583693
4,-1.157201,0.75,0.619908,1.628543,-0.817285,-0.955763,1.545997,-2.611219,1.733013,-2.026772,0.0,0.006575,0.467275,-0.096623,0.286427
