In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import random
from src.dataGenerator import DataGenerator01

In [2]:
config = {
    "Nold": 500, 
    "oldTreatRate": 0.1, 
    
    "Nyoung": 500,
    "youngTreatRate": 1-0.1, 
}
dataGenerator = DataGenerator01(config)
Nold = config["Nold"]
oldTreatRate = config["oldTreatRate"]
Nyoung = config["Nyoung"]
youngTreatRate = config["youngTreatRate"]

In [3]:
# parameters
noiseScale = 0
numDataset = 50
doSaveCSV = True

In [4]:
for i, interactCoeff in enumerate([(10 ** x) for x in np.linspace(-2, 2, numDataset)]):
    coefficients = {
    "A": 1, 
    "B": 1, 
    "C": 1, 
    "D": interactCoeff,
    }
    
    # Age (binary, 0 is young and 1 is old)
    youngAge = dataGenerator.getAge(ageType=-1)
    oldAge = dataGenerator.getAge(ageType=1)

    youngAgeTag = dataGenerator.getAgeTag(ageType=-1)
    oldAgeTag = dataGenerator.getAgeTag(ageType=1)

    # Sex (binary, 0 is female and 1 is male)
    youngSex = dataGenerator.getSex(age=youngAge)
    oldSex = dataGenerator.getSex(age=oldAge)

    # Treatment
    youngT = dataGenerator.getTreatment(sex=youngSex, ageType=-1)
    oldT = dataGenerator.getTreatment(sex=oldSex, ageType=1)
    
    oldData = np.vstack((oldT, oldAge, oldAgeTag, oldSex)).T
    oldData = pd.DataFrame(oldData)
    oldData = oldData.rename(columns={0:'T', 1:'Age', 2:'AgeTag', 3:'Sex'})

    youngData = np.vstack((youngT, youngAge, youngAgeTag, youngSex)).T
    youngData = pd.DataFrame(youngData)
    youngData = youngData.rename(columns={0:'T', 1:'Age', 2:'AgeTag', 3:'Sex'})

    
    
    oldData['yo1'] = dataGenerator.getOutcome1(oldData, coefficients)
    oldData['yo1CF'] = dataGenerator.getOutcome1(oldData, coefficients, CF=True)
    oldData['y1'] = dataGenerator.addNoise(oldData['yo1'], scale=noiseScale)
    oldData['y1CF'] = dataGenerator.addNoise(oldData['yo1CF'], scale=noiseScale)

    youngData['yo1'] = dataGenerator.getOutcome1(youngData, coefficients)
    youngData['y1'] = dataGenerator.addNoise(youngData['yo1'], scale=noiseScale)
    youngData['yo1CF'] = dataGenerator.getOutcome1(youngData, coefficients, CF=True)
    youngData['y1CF'] = dataGenerator.addNoise(youngData['yo1CF'], scale=noiseScale)
    
    
    oldData['yo2'] = dataGenerator.getOutcome2(oldData, coefficients)
    oldData['yo2CF'] = dataGenerator.getOutcome2(oldData, coefficients, CF=True)
    oldData['y2'] = dataGenerator.addNoise(oldData['yo2'], scale=noiseScale)
    oldData['y2CF'] = dataGenerator.addNoise(oldData['yo2CF'], scale=noiseScale)

    youngData['yo2'] = dataGenerator.getOutcome2(youngData, coefficients)
    youngData['y2'] = dataGenerator.addNoise(youngData['yo2'], scale=noiseScale)
    youngData['yo2CF'] = dataGenerator.getOutcome2(youngData, coefficients, CF=True)
    youngData['y2CF'] = dataGenerator.addNoise(youngData['yo2CF'], scale=noiseScale)
    
    
    oldData['yo3'] = dataGenerator.getOutcome3(oldData, coefficients)
    oldData['yo3CF'] = dataGenerator.getOutcome3(oldData, coefficients, CF=True)
    oldData['y3'] = dataGenerator.addNoise(oldData['yo3'], scale=noiseScale)
    oldData['y3CF'] = dataGenerator.addNoise(oldData['yo3CF'], scale=noiseScale)

    youngData['yo3'] = dataGenerator.getOutcome3(youngData, coefficients)
    youngData['y3'] = dataGenerator.addNoise(youngData['yo3'], scale=noiseScale)
    youngData['yo3CF'] = dataGenerator.getOutcome3(youngData, coefficients, CF=True)
    youngData['y3CF'] = dataGenerator.addNoise(youngData['yo3CF'], scale=noiseScale)
    
    
    totalData = pd.concat([oldData, youngData], axis=0).sample(frac=1)
    
    if doSaveCSV:
        txtSubname = "{:06d}".format(i)
        print(txtSubname)
        #oldData.to_csv("massiveData/oldData_intCoef{}.csv".format(txtSubname))
        #youngData.to_csv("massiveData/youngData_intCoef{}.csv".format(txtSubname))
        totalData.to_csv("massiveData/totalData_intCoef{}.csv".format(txtSubname))
        np.savetxt('massiveData/coef_intCoef{}.txt'.format(txtSubname), 
                   np.array([coefficients["A"], 
                             coefficients["B"], 
                             coefficients["C"], 
                             coefficients["D"]]))
        print("Data saved")

000000
Data saved
000001
Data saved
000002
Data saved
000003
Data saved
000004
Data saved
000005
Data saved
000006
Data saved
000007
Data saved
000008
Data saved
000009
Data saved
000010
Data saved
000011
Data saved
000012
Data saved
000013
Data saved
000014
Data saved
000015
Data saved
000016
Data saved
000017
Data saved
000018
Data saved
000019
Data saved
000020
Data saved
000021
Data saved
000022
Data saved
000023
Data saved
000024
Data saved
000025
Data saved
000026
Data saved
000027
Data saved
000028
Data saved
000029
Data saved
000030
Data saved
000031
Data saved
000032
Data saved
000033
Data saved
000034
Data saved
000035
Data saved
000036
Data saved
000037
Data saved
000038
Data saved
000039
Data saved
000040
Data saved
000041
Data saved
000042
Data saved
000043
Data saved
000044
Data saved
000045
Data saved
000046
Data saved
000047
Data saved
000048
Data saved
000049
Data saved
