In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import random
from src.dataGenerator import DataGenerator01

In [2]:
config = {
    "Nold": 500, 
    "oldTreatRate": 0.1, 
    
    "Nyoung": 500,
    "youngTreatRate": 1-0.1, 
}
dataGenerator = DataGenerator01(config)
Nold = config["Nold"]
oldTreatRate = config["oldTreatRate"]
Nyoung = config["Nyoung"]
youngTreatRate = config["youngTreatRate"]

In [9]:
# parameters
noiseScale = 1
numDataset = 50

In [8]:
for i, interactCoeff in enumerate([2 * (10 ** x) for x in np.linspace(-2, 2, numDataset)]):
    coefficients = {
    "A": 1, 
    "B": 1, 
    "C": 1, 
    "D": interactCoeff,
    }
    
    # Age (binary, 0 is young and 1 is old)
    youngAge = dataGenerator.getAge(ageType=-1)
    oldAge = dataGenerator.getAge(ageType=1)

    youngAgeTag = dataGenerator.getAgeTag(ageType=-1)
    oldAgeTag = dataGenerator.getAgeTag(ageType=1)

    # Sex (binary, 0 is female and 1 is male)
    youngSex = dataGenerator.getSex(age=youngAge)
    oldSex = dataGenerator.getSex(age=oldAge)

    # Treatment
    youngT = dataGenerator.getTreatment(sex=youngSex, ageType=-1)
    oldT = dataGenerator.getTreatment(sex=oldSex, ageType=1)
    
    oldData = np.vstack((oldT, oldAge, oldAgeTag, oldSex)).T
    oldData = pd.DataFrame(oldData)
    oldData = oldData.rename(columns={0:'T', 1:'Age', 2:'AgeTag', 3:'Sex'})

    youngData = np.vstack((youngT, youngAge, youngAgeTag, youngSex)).T
    youngData = pd.DataFrame(youngData)
    youngData = youngData.rename(columns={0:'T', 1:'Age', 2:'AgeTag', 3:'Sex'})

    
    
    oldData['yo1'] = dataGenerator.getOutcome1(oldData, coefficients)
    oldData['yo1CF'] = dataGenerator.getOutcome1(oldData, coefficients, CF=True)
    oldData['y1'] = dataGenerator.addNoise(oldData['yo1'], scale=noiseScale)
    oldData['y1CF'] = dataGenerator.addNoise(oldData['yo1CF'], scale=noiseScale)

    youngData['yo1'] = dataGenerator.getOutcome1(youngData, coefficients)
    youngData['y1'] = dataGenerator.addNoise(youngData['yo1'], scale=noiseScale)
    youngData['yo1CF'] = dataGenerator.getOutcome1(youngData, coefficients, CF=True)
    youngData['y1CF'] = dataGenerator.addNoise(youngData['yo1CF'], scale=noiseScale)
    
    
    oldData['yo2'] = dataGenerator.getOutcome2(oldData, coefficients)
    oldData['yo2CF'] = dataGenerator.getOutcome2(oldData, coefficients, CF=True)
    oldData['y2'] = dataGenerator.addNoise(oldData['yo2'], scale=noiseScale)
    oldData['y2CF'] = dataGenerator.addNoise(oldData['yo2CF'], scale=noiseScale)

    youngData['yo2'] = dataGenerator.getOutcome2(youngData, coefficients)
    youngData['y2'] = dataGenerator.addNoise(youngData['yo2'], scale=noiseScale)
    youngData['yo2CF'] = dataGenerator.getOutcome2(youngData, coefficients, CF=True)
    youngData['y2CF'] = dataGenerator.addNoise(youngData['yo2CF'], scale=noiseScale)
    
    
    oldData['yo3'] = dataGenerator.getOutcome3(oldData, coefficients)
    oldData['yo3CF'] = dataGenerator.getOutcome3(oldData, coefficients, CF=True)
    oldData['y3'] = dataGenerator.addNoise(oldData['yo3'], scale=noiseScale)
    oldData['y3CF'] = dataGenerator.addNoise(oldData['yo3CF'], scale=noiseScale)

    youngData['yo3'] = dataGenerator.getOutcome3(youngData, coefficients)
    youngData['y3'] = dataGenerator.addNoise(youngData['yo3'], scale=noiseScale)
    youngData['yo3CF'] = dataGenerator.getOutcome3(youngData, coefficients, CF=True)
    youngData['y3CF'] = dataGenerator.addNoise(youngData['yo3CF'], scale=noiseScale)
    
    
    totalData = pd.concat([oldData, youngData], axis=0).sample(frac=1)
    
    if doSaveCSV:
        oldData.to_csv("massiveData/oldData_intCoef{}.csv".format(txtSubname))
        youngData.to_csv("massiveData/youngData_intCoef{}.csv".format(txtSubname))
        totalData.to_csv("massiveData/totalData_intCoef{}.csv".format(txtSubname))
        np.savetxt('massiveData/coef{}.txt'.format(txtSubname), np.array([A, B, C, D]))
        print("Data saved")

0.02
0.024135852812786577
0.029126969550024872
0.035150212497095844
0.04241901775840381
0.051190958453990715
0.061776871929549636
0.0745518744062988
0.08996865337938888
0.10857350878647719
0.1310257113719102
0.15812086421815394
0.19081909526999877
0.2302790798652894
0.2778990988746275
0.3353665873622015
0.40471792954503133
0.4884106189097302
0.5894103405103619
0.7112960612446257
0.8583868520257553
1.035894935846242
1.2501103850547939
1.508624012670923
1.8205963559830436
2.1970822839751167
2.651422731180216
3.199717439212115
3.8613954577664993
4.659903621030744
5.623537395948456
6.786443543790653
8.189830124760846
9.883426722647666
11.927246633189286
14.393713460023028
17.37022747502704
20.962262683093705
25.297104337105917
30.528359343504636
36.84139938653429
44.45992965052387
53.653915905594495
64.74915085635287
78.13879874109226
94.2973272691478
113.79732058036586
137.32976900085995
165.72855457093684
200.0
