In [26]:
import numpy as np
import pandas as pd

In [27]:
def simulWeib(X, T=None):
    N = X.shape[0] # Number of samples
    D = X.shape[1] # Number of features
    
    # Weibull latent event times
    scale1 = 20 + 5*(np.sum(X, axis=1))
    ltime1 = np.random.weibull(scale1, size=N)
    
    scale0 = np.exp(3 + 0.1*(np.sum(X, axis=1)))
    ltime0 = np.random.weibull(scale0, size=N)
    
    # Censoring times
    rate = 0.007
    c1 = np.random.exponential(scale=1/rate, size=N)
    c0 = np.random.exponential(scale=1/rate, size=N)
    
    # Follow-up times and event indicators
    time1 = np.minimum(ltime1, c1) + np.random.uniform(size=N)
    time0 = np.minimum(ltime0, c0) + np.random.uniform(size=N)
    
    status1 = (ltime1 <= c1).astype(int)
    status0 = (ltime0 <= c0).astype(int)
    
    # Generate Treatment Assignments
    #if T is None:
        
    
    # Survival time
    time = np.zeros(N)
    time[T == 1] = time1[T == 1]
    time[T == 0] = time0[T == 0]
    
    # Status
    status = np.zeros(N)
    status[T == 1] = status1[T == 1]
    status[T == 0] = status0[T == 0]
    
    # Create dataframes
    X_df = pd.DataFrame(X, columns=['X{}'.format(i) for i in range(D)])
    T_df = pd.DataFrame(T, columns=['Treatment'])
    status_df = pd.DataFrame(status, columns=['Status'])
    time_df = pd.DataFrame(time, columns=['Survival Time'])
    
    data = pd.concat([X_df, T_df, status_df, time_df], axis=1)
    return data

In [28]:
#N = 100
#num_features = 25
#X = np.random.choice([0,1], size=[N, num_features])
#T = np.random.choice([0,1], size=N)
#data = simulWeib(X, T)
#data.shape

### Apply to UNOS

In [29]:
from unos import UNOS_data

In [30]:
unos_generator = unos.UNOS_data('unos/unos_sample.csv')
unos_df = unos_generator.draw_sample(enable_feature_scaling=False)

In [39]:
unos_df.shape

(60399, 50)

In [34]:
T = unos_df['Treatment'].as_matrix()
X = unos_df.drop(['Treatment', 'Response', 'TE'], axis=1).as_matrix()

In [35]:
synth_unos = simulWeib(X, T)

In [40]:
synth_unos.head(n=20)


Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,...,X40,X41,X42,X43,X44,X45,X46,Treatment,Status,Survival Time
0,57.0,1.0,3.1997,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,0.016002,0.125,0.0,1.0,1.048877
1,45.0,0.0,4.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,0.093023,0.085366,0.0,1.0,1.451665
2,36.0,1.0,3.0498,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,1.0,1.0,0.037031,0.0,0.0,1.0,1.546866
3,56.0,0.0,3.6831,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,1.0,2.0,0.01875,0.42857,0.0,1.0,1.750283
4,56.0,1.0,1.8499,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,2.0,1.0,1.0,0.12353,0.25068,0.0,1.0,1.154453
5,59.0,1.0,1.3499,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,2.0,2.0,0.040986,0.092815,0.0,1.0,1.078638
6,52.0,1.0,3.1499,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,2.0,1.0,0.016949,0.10336,0.0,1.0,1.594905
7,64.0,1.0,0.96655,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,2.0,1.0,0.059964,0.060066,0.0,1.0,1.445498
8,55.0,1.0,4.3496,1.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,2.0,2.0,1.0,0.04,0.50743,0.0,1.0,1.152734
9,27.0,1.0,2.4829,0.0,1.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,1.0,1.0,1.0,0.068667,0.10169,0.0,1.0,1.202784
