In [7]:
import numpy as np
import pandas as pd

In [46]:
def simulWeib(X, T):
    N = X.shape[0] # Number of samples
    D = X.shape[1] # Number of features
    
    # Weibull latent event times
    scale1 = 20 + 5*(np.sum(X, axis=1))
    ltime1 = np.random.weibull(scale1, size=N)
    
    scale0 = np.exp(3 + 0.1*(np.sum(X, axis=1)))
    ltime0 = np.random.weibull(scale0, size=N)
    
    # Censoring times
    rate = 0.007
    c1 = np.random.exponential(scale=1/rate, size=N)
    c0 = np.random.exponential(scale=1/rate, size=N)
    
    # Follow-up times and event indicators
    time1 = np.minimum(ltime1, c1) + np.random.uniform(size=N)
    time0 = np.minimum(ltime0, c0) + np.random.uniform(size=N)
    
    status1 = (ltime1 <= c1).astype(int)
    status0 = (ltime0 <= c0).astype(int)
    
    # Survival time
    time = np.zeros(N)
    time[T == 1] = time1[T == 1]
    time[T == 0] = time0[T == 0]
    
    # Status
    status = np.zeros(N)
    status[T == 1] = status1[T == 1]
    status[T == 0] = status0[T == 0]
    
    # Create dataframes
    X_df = pd.DataFrame(X, columns=['X{}'.format(i) for i in range(D)])
    T_df = pd.DataFrame(T, columns=['Treatment'])
    status_df = pd.DataFrame(status, columns=['Status'])
    time_df = pd.DataFrame(time, columns=['Survival Time'])
    
    data = pd.concat([X_df, T_df, status_df, time_df], axis=1)
    return data

In [51]:
N = 100
num_features = 10
X = np.random.choice([0,1], size=[N, num_features])
T = np.random.choice([0,1], size=N)
data = simulWeib(X, T)
data

Unnamed: 0,X0,X1,X2,X3,X4,X5,X6,X7,X8,X9,Treatment,Status,Survival Time
0,1,1,0,0,1,1,1,0,0,1,1,1.0,1.294874
1,1,0,1,0,0,1,0,0,1,0,1,1.0,1.545900
2,0,0,0,0,1,1,1,0,1,0,0,1.0,1.163750
3,1,0,1,0,1,1,0,0,0,1,0,1.0,1.881994
4,1,1,1,1,1,1,1,1,0,0,1,1.0,1.708370
5,1,0,1,1,1,0,0,0,0,1,1,1.0,1.849345
6,1,0,0,1,0,0,0,1,1,1,1,1.0,1.858597
7,1,0,0,1,1,0,0,1,0,0,1,1.0,1.137049
8,1,0,0,1,0,1,0,0,0,0,0,1.0,1.671482
9,0,1,1,1,1,0,1,1,1,0,0,1.0,1.270862
