# Simulated datasets

In [1]:
from torchctrnn.data.simulations import GlucoseData
import pandas as pd

In [5]:
sim = GlucoseData()
df = sim.simulate(10,seed=1234) 
df.head()

Unnamed: 0,t,glucose_t,glucose_t_obs,obs,insulin_t,dextrose_t,id
0,0.0,125.848949,125.848949,True,0.0,0.0,0
1,0.1,128.216933,128.216933,False,0.0,0.0,0
2,0.2,130.586911,130.586911,False,0.0,0.0,0
3,0.3,131.379341,131.379341,False,0.0,0.0,0
4,0.4,133.056805,133.056805,False,0.0,0.0,0


In [3]:
df.loc[df.obs == True,:].head()

Unnamed: 0,t,glucose_t,glucose_t_obs,obs,insulin_t,dextrose_t,id
0,0.0,125.848949,125.848949,True,0.0,0.0,0
48,4.8,145.004618,145.004618,True,0.05,0.0,0
78,7.8,144.195128,144.195128,True,0.05,0.0,0
102,10.2,143.600091,143.600091,True,0.05,0.0,0
130,13.0,142.457832,142.457832,True,0.05,3.732156,0


## Data size


In [4]:
# 1000,5000 and 10000 trajectories
data_sizes = [1000,5000,10000]
seeds = [1,12,123]
for i,size in enumerate(data_sizes):
    for j in range(0,3):
        sim = GlucoseData()
        df = sim.simulate(int(size*1.2),seed=seeds[j]) 
        df.loc[df.obs == True,'glucose_t_obs_next'] = df.loc[df.obs == True,:].groupby('id')['glucose_t_obs'].shift(-1)
        df.loc[df.obs == True,'t1'] = df.loc[df.obs == True,:].groupby('id')['t'].shift(-1)
        df.loc[df.obs == True,'n'] = df.loc[df.obs == True,:].groupby(['id']).cumcount().add(1)
        df.loc[df.obs == True,'msk'] = 0
        df.loc[df.obs == True,'msk0'] = 0
        df['rn'] = range(0,df.shape[0])
        df.to_csv('../../data/simulation_'+str(size)+'_v'+str(j)+'.csv',index=False)
        print('saved data...')
df.head()

saved data...
saved data...
saved data...
saved data...
saved data...
saved data...
saved data...
saved data...
saved data...


Unnamed: 0,t,glucose_t,glucose_t_obs,obs,insulin_t,dextrose_t,id,glucose_t_obs_next,t1,n,msk,msk0,rn
0,0.0,128.435437,128.435437,True,0.0,3.736966,0,146.53569,3.9,1.0,0.0,0.0,0
1,0.1,129.660232,129.660232,False,0.0,3.736966,0,,,,,,1
2,0.2,129.685736,129.685736,False,0.0,3.736966,0,,,,,,2
3,0.3,131.520292,131.520292,False,0.0,3.736966,0,,,,,,3
4,0.4,131.374251,131.374251,False,0.0,3.736966,0,,,,,,4


In [2]:
sim = GlucoseData()
df = sim.simulate(int(1000*1.2),seed=666) 
df.loc[df.obs == True,'glucose_t_obs_next'] = df.loc[df.obs == True,:].groupby('id')['glucose_t_obs'].shift(-1)
df.loc[df.obs == True,'t1'] = df.loc[df.obs == True,:].groupby('id')['t'].shift(-1)
df.loc[df.obs == True,'n'] = df.loc[df.obs == True,:].groupby(['id']).cumcount().add(1)
df.loc[df.obs == True,'msk'] = 0
df.loc[df.obs == True,'msk0'] = 0
df['rn'] = range(0,df.shape[0])
df.to_csv('../../data/simulation_val.csv',index=False)

## Measurement error


In [10]:
# 5000 trajectories
seeds = [1,12,123]
for i in range(0,3):
    sim = GlucoseData(measurement_error = 0.05)
    df = sim.simulate(int(5000*1.2),seed=seeds[i]) 
    df.loc[df.obs == True,'glucose_t_obs_next'] = df.loc[df.obs == True,:].groupby('id')['glucose_t_obs'].shift(-1)
    df.loc[df.obs == True,'t1'] = df.loc[df.obs == True,:].groupby('id')['t'].shift(-1)
    df.loc[df.obs == True,'n'] = df.loc[df.obs == True,:].groupby(['id']).cumcount().add(1)
    df.loc[df.obs == True,'msk'] = 0
    df.loc[df.obs == True,'msk0'] = 0
    df['rn'] = range(0,df.shape[0])
    df.to_csv('../../data/simulation_error_v'+str(i)+'.csv',index=False)
    print('saved data...')
df.head()

saved data...
saved data...
saved data...


Unnamed: 0,t,glucose_t,glucose_t_obs,obs,insulin_t,dextrose_t,id,glucose_t_obs_next,t1,n,msk,msk0,rn
0,0.0,128.435437,136.827793,True,0.0,3.736966,0,139.481208,3.9,1.0,0.0,0.0,0
1,0.1,129.660232,128.755337,False,0.0,3.736966,0,,,,,,1
2,0.2,129.685736,136.189992,False,0.0,3.736966,0,,,,,,2
3,0.3,131.520292,123.871668,False,0.0,3.736966,0,,,,,,3
4,0.4,131.374251,145.824785,False,0.0,3.736966,0,,,,,,4


In [3]:
sim = GlucoseData(measurement_error = 0.05)
df = sim.simulate(int(1000*1.2),seed=666) 
df.loc[df.obs == True,'glucose_t_obs_next'] = df.loc[df.obs == True,:].groupby('id')['glucose_t_obs'].shift(-1)
df.loc[df.obs == True,'t1'] = df.loc[df.obs == True,:].groupby('id')['t'].shift(-1)
df.loc[df.obs == True,'n'] = df.loc[df.obs == True,:].groupby(['id']).cumcount().add(1)
df.loc[df.obs == True,'msk'] = 0
df.loc[df.obs == True,'msk0'] = 0
df['rn'] = range(0,df.shape[0])
df.to_csv('../../data/simulation_val_error.csv',index=False)

## Non stationary


In [6]:
# 5000 trajectories
seeds = [1,12,123]
for i in range(0,3):
    sim = GlucoseData(nonstationary = 1)
    df = sim.simulate(int(5000*1.2),seed=seeds[i])
    df.loc[df.obs == True,'glucose_t_obs_next'] = df.loc[df.obs == True,:].groupby('id')['glucose_t_obs'].shift(-1)
    df.loc[df.obs == True,'t1'] = df.loc[df.obs == True,:].groupby('id')['t'].shift(-1)
    df.loc[df.obs == True,'n'] = df.loc[df.obs == True,:].groupby(['id']).cumcount().add(1)
    df.loc[df.obs == True,'msk'] = 0
    df.loc[df.obs == True,'msk0'] = 0
    df['rn'] = range(0,df.shape[0])
    df.to_csv('../../data/simulation_nonstationary_v'+str(i)+'.csv',index=False)
    print('saved data...')
df.head()

saved data...
saved data...
saved data...


Unnamed: 0,t,glucose_t,glucose_t_obs,obs,insulin_t,dextrose_t,id,glucose_t_obs_next,t1,n,msk,msk0,rn
0,0.0,128.435437,128.435437,True,0.0,3.736966,0,146.503102,3.9,1.0,0.0,0.0,0
1,0.1,128.975385,128.975385,False,0.0,3.736966,0,,,,,,1
2,0.2,129.446539,129.446539,False,0.0,3.736966,0,,,,,,2
3,0.3,130.564418,130.564418,False,0.0,3.736966,0,,,,,,3
4,0.4,131.58672,131.58672,False,0.0,3.736966,0,,,,,,4
