In [16]:
from utils import simulation
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from concurrent.futures import ProcessPoolExecutor
from tqdm import tqdm

### Simulations Parameter ##############
n = 1500
seed = 42
n_sim = 1000
B_1 = 0

jk_ab_calc = False
boot_var_calc = False
ijk_std_calc = False
train_models = False
B_RF = 1000

########## Varying Parameters ##########


tau = 37

params_data_creation =   { 'shape_weibull': 1.5, 
                                'scale_weibull_base':   5597.308204063027      , 
                                'rate_censoring':  0.038465201478012315   ,
                                'b_bloodp': -0.405, 'b_diab': -0.4, 'b_age': -0.05, 'b_bmi': -0.01, 'b_kreat': -0.2, 
                                'n': n, 'seed': seed, 'tau': tau}




params_rf = {  'n_estimators':B_RF,                        
                'max_depth':4,
                'min_samples_split':5,
                'max_features': 'log2',
                'random_state':  seed,
                'bootstrap':     True,  }
X_erwartung = pd.DataFrame({'bmi': [25], 'blood_pressure': [0], 'kreatinkinase': [np.exp(5+1/2)], 'diabetes': [0], 'age': [50]})


######## Start Simulation   ########
with ProcessPoolExecutor() as executor:
    
    ### Array to store the results
    portion_events_after_cut_train = np.zeros(n_sim)
    portion_censored_after_cut_train = np.zeros(n_sim)
    portion_no_events_after_cut_train = np.zeros(n_sim)
    portion_events_after_cut_test = np.zeros(n_sim)
    portion_censored_after_cut_test = np.zeros(n_sim)
    portion_no_events_after_cut_test = np.zeros(n_sim)
    wb_mse_ipcw = np.zeros(n_sim)
    wb_cindex_ipcw = np.zeros(n_sim)
    wb_y_pred_X_point = np.zeros(n_sim)
    rf_mse_ipcw = np.zeros(n_sim)
    rf_y_pred_X_point = np.zeros(n_sim)
    ijk_var_pred_X_point = np.zeros(n_sim)
    ijk_var_biased_X_point = np.zeros(n_sim)
    bootstrap_var_pred_X_point = np.zeros(n_sim)
    jk_ab_var_pred_X_point = np.zeros(n_sim)
    

    futures = [
        executor.submit(
            simulation,
            seed=seed+i,
            tau=tau, 
            data_generation_weibull_parameters=params_data_creation,
            params_rf=params_rf, 
            X_pred_point=X_erwartung,
            B_first_level = B_1,
            boot_std_calc = boot_var_calc,
            ijk_std_calc = ijk_std_calc,
            train_models = train_models,
            jk_ab_calc = jk_ab_calc
        )
        for i in range(n_sim)
    ]

    for i, future in enumerate(tqdm(futures, desc="Simulations", unit="simulation")):
        _portion_events_after_cut_train, _portion_censored_after_cut_train, _portion_no_events_after_cut_train, \
        _portion_events_after_cut_test, _portion_censored_after_cut_test, _portion_no_events_after_cut_test, \
        _wb_mse_ipcw, _wb_cindex_ipcw, _wb_y_pred_X_point, \
        _rf_mse_ipcw, _rf_y_pred_X_point, _ijk_biased_var_pred_X_point,_ijk_correction, _bootstrap_var_pred_X_point, _jk_ab_var_pred_X_point  = future.result()



        #Event-Stats Results
        portion_events_after_cut_train[i] = _portion_events_after_cut_train
        portion_censored_after_cut_train[i] = _portion_censored_after_cut_train
        portion_no_events_after_cut_train[i] = _portion_no_events_after_cut_train
        portion_events_after_cut_test[i] = _portion_events_after_cut_test
        portion_censored_after_cut_test[i] = _portion_censored_after_cut_test
        portion_no_events_after_cut_test[i] = _portion_no_events_after_cut_test
        
        #Evaluation Results
        wb_mse_ipcw[i] = _wb_mse_ipcw
        wb_cindex_ipcw[i] = _wb_cindex_ipcw
        rf_mse_ipcw[i] = _rf_mse_ipcw

        #Prediction Results
        wb_y_pred_X_point[i] = _wb_y_pred_X_point[0]
        rf_y_pred_X_point[i] = _rf_y_pred_X_point[0]

        # Standard Deviation Estimates
        jk_ab_var_pred_X_point[i] = _jk_ab_var_pred_X_point
        bootstrap_var_pred_X_point[i] = _bootstrap_var_pred_X_point


def print_stats_data():

    print('Event-Stats Results:')
    print('Train:')
    print(f'Portion of events after cut:     {round(np.mean(portion_events_after_cut_train)*100,2)}%,   n={round(n*0.7*np.mean(portion_events_after_cut_train),0)}')
    print(f'Portion of no events after cut:  {round(np.mean(portion_no_events_after_cut_train)*100,2)}%,    n={round(n*0.7*np.mean(portion_no_events_after_cut_train),0)}')
    print(f'Portion of censored after cut:   {round(np.mean(portion_censored_after_cut_train)*100,2)}%,   n={round(n*0.7*np.mean(portion_censored_after_cut_train),0)}\n')
    print('Test:')
    print(f'Portion of events after cut:     {round(np.mean(portion_events_after_cut_test)*100,2)}%,   n={round(n*0.7*np.mean(portion_events_after_cut_test),0)}')
    print(f'Portion of no events after cut:  {round(np.mean(portion_no_events_after_cut_test)*100,2)}%,    n={round(n*0.7*np.mean(portion_no_events_after_cut_test),0)}')
    print(f'Portion of censored after cut:   {round(np.mean(portion_censored_after_cut_test)*100,2)}%,   n={round(n*0.7*np.mean(portion_censored_after_cut_test),0)}')
    print('\n')
print_stats_data()

Simulations: 100%|██████████| 1000/1000 [00:04<00:00, 204.28simulation/s]


Event-Stats Results:
Train:
Portion of events after cut:     12.0%,   n=126.0
Portion of no events after cut:  18.26%,    n=192.0
Portion of censored after cut:   69.75%,   n=732.0

Test:
Portion of events after cut:     11.98%,   n=126.0
Portion of no events after cut:  18.26%,    n=192.0
Portion of censored after cut:   69.77%,   n=733.0




In [7]:
def find_parameters(target_event_portion, target_censored_portion, initial_scale, initial_rate):
    scale = initial_scale
    rate = initial_rate
    tolerance = 0.001  # Toleranzschwelle für die Anteile
    max_iterations = 500
    iteration = 0

    while iteration < max_iterations:
        # Aktualisieren der Parameter
        params_data_creation['scale_weibull_base'] = scale
        params_data_creation['rate_censoring'] = rate

        # Führen Sie eine einzelne Simulation durch
        result = simulation(
            seed=seed,
            tau=tau,
            data_generation_weibull_parameters=params_data_creation,
            params_rf=params_rf,
            X_pred_point=X_erwartung,
            B_first_level=B_1,
            boot_std_calc=boot_var_calc,
            ijk_std_calc=ijk_std_calc,
            train_models=train_models,
            jk_ab_calc=jk_ab_calc
        )

        # Extrahieren der Anteile
        event_portion = result[0]  # Portion of events after cut (Annahme)
        censored_portion = result[1]  # Portion of censored after cut (Annahme)

        # Überprüfung der Anteile
        if abs(event_portion - target_event_portion) < tolerance and abs(censored_portion - target_censored_portion) < tolerance:
            print(f"Gewünschte Anteile erreicht bei  \nscale_weibull_base=  {scale} \nund rate_censoring=  {rate}")
            break
        
        ss = 0.01
        
        # Anpassen der Parameter basierend auf der Differenz
        if event_portion < target_event_portion:
            scale -= scale * ss  # Verringern Sie scale_weibull_base um 5%
        else:
            scale += scale * ss  # Erhöhen Sie scale_weibull_base um 5%

        if censored_portion < target_censored_portion:
            rate += rate * ss  # Erhöhen Sie rate_censoring um 5%
        else:
            rate -= rate * ss  # Verringern Sie rate_censoring um 5%

        iteration += 1

    if iteration == max_iterations:
        print("Maximale Anzahl von Iterationen erreicht. Gewünschte Anteile nicht gefunden.")
        
    return scale, rate
        
########################################################################


s,r = find_parameters(0.115, 0.698, 5500, 0.04)


########################################################
### Simulations Parameter ##############
n = 1500
seed = 42
n_sim = 1000
B_1 = 200

jk_ab_calc = False
boot_var_calc = False
ijk_std_calc = False
train_models = False
B_RF = 2000

########## Varying Parameters ##########


tau = 37

params_data_creation = { 'shape_weibull': 1, 
                        
                        'scale_weibull_base': s         , 
                        'rate_censoring':r  ,
                        
                        'b_bloodp': -0.405, 'b_diab': -0.4, 'b_age': -0.05, 'b_bmi': -0.01, 'b_kreat': -0.2, 
                        'n': n, 'seed': seed, 'tau': tau}



params_rf = {  'n_estimators':B_RF,                        
                'max_depth':3,
                'min_samples_split':5,
                'max_features': 'log2',
                'random_state':  seed,
                'bootstrap':     True,  }
X_erwartung = pd.DataFrame({'bmi': [25], 'blood_pressure': [0], 'kreatinkinase': [np.exp(5+1/2)], 'diabetes': [0], 'age': [50]})

######## Start Simulation   ########
with ProcessPoolExecutor() as executor:
    
    ### Array to store the results
    portion_events_after_cut_train = np.zeros(n_sim)
    portion_censored_after_cut_train = np.zeros(n_sim)
    portion_no_events_after_cut_train = np.zeros(n_sim)
    portion_events_after_cut_test = np.zeros(n_sim)
    portion_censored_after_cut_test = np.zeros(n_sim)
    portion_no_events_after_cut_test = np.zeros(n_sim)
    wb_mse_ipcw = np.zeros(n_sim)
    wb_cindex_ipcw = np.zeros(n_sim)
    wb_y_pred_X_point = np.zeros(n_sim)
    rf_mse_ipcw = np.zeros(n_sim)
    rf_y_pred_X_point = np.zeros(n_sim)
    ijk_var_pred_X_point = np.zeros(n_sim)
    bootstrap_var_pred_X_point = np.zeros(n_sim)
    jk_ab_var_pred_X_point = np.zeros(n_sim)

    futures = [
        executor.submit(
            simulation,
            seed=seed+i,
            tau=tau, 
            data_generation_weibull_parameters=params_data_creation,
            params_rf=params_rf, 
            X_pred_point=X_erwartung,
            B_first_level = B_1,
            boot_std_calc = boot_var_calc,
            ijk_std_calc = ijk_std_calc,
            train_models = train_models,
            jk_ab_calc = jk_ab_calc
        )
        for i in range(n_sim)
    ]

    for i, future in enumerate(tqdm(futures, desc="Simulations", unit="simulation")):
        _portion_events_after_cut_train, _portion_censored_after_cut_train, _portion_no_events_after_cut_train, \
         _portion_events_after_cut_test, _portion_censored_after_cut_test, _portion_no_events_after_cut_test, \
        _wb_mse_ipcw, _wb_cindex_ipcw, _wb_y_pred_X_point, \
        _rf_mse_ipcw, _rf_y_pred_X_point, _ijk_var_pred_X_point, _bootstrap_var_pred_X_point, _jk_ab_var_pred_X_point  = future.result()

        #Event-Stats Results
        portion_events_after_cut_train[i] = _portion_events_after_cut_train
        portion_censored_after_cut_train[i] = _portion_censored_after_cut_train
        portion_no_events_after_cut_train[i] = _portion_no_events_after_cut_train
        portion_events_after_cut_test[i] = _portion_events_after_cut_test
        portion_censored_after_cut_test[i] = _portion_censored_after_cut_test
        portion_no_events_after_cut_test[i] = _portion_no_events_after_cut_test
        
        #Evaluation Results
        wb_mse_ipcw[i] = _wb_mse_ipcw
        wb_cindex_ipcw[i] = _wb_cindex_ipcw
        rf_mse_ipcw[i] = _rf_mse_ipcw

        #Prediction Results
        wb_y_pred_X_point[i] = _wb_y_pred_X_point[0]
        rf_y_pred_X_point[i] = _rf_y_pred_X_point[0]

        # Standard Deviation Estimates
        ijk_var_pred_X_point[i]  = _ijk_var_pred_X_point
        jk_ab_var_pred_X_point[i] = _jk_ab_var_pred_X_point
        bootstrap_var_pred_X_point[i] = _bootstrap_var_pred_X_point
def print_stats_data():

    print('Event-Stats Results:')
    print('Train:')
    print(f'Portion of events after cut:     {round(np.mean(portion_events_after_cut_train)*100,2)}%,   n={round(n*0.7*np.mean(portion_events_after_cut_train),0)}')
    print(f'Portion of no events after cut:  {round(np.mean(portion_no_events_after_cut_train)*100,2)}%,    n={round(n*0.7*np.mean(portion_no_events_after_cut_train),0)}')
    print(f'Portion of censored after cut:   {round(np.mean(portion_censored_after_cut_train)*100,2)}%,   n={round(n*0.7*np.mean(portion_censored_after_cut_train),0)}\n')
    print('Test:')
    print(f'Portion of events after cut:     {round(np.mean(portion_events_after_cut_test)*100,2)}%,   n={round(n*0.7*np.mean(portion_events_after_cut_test),0)}')
    print(f'Portion of no events after cut:  {round(np.mean(portion_no_events_after_cut_test)*100,2)}%,    n={round(n*0.7*np.mean(portion_no_events_after_cut_test),0)}')
    print(f'Portion of censored after cut:   {round(np.mean(portion_censored_after_cut_test)*100,2)}%,   n={round(n*0.7*np.mean(portion_censored_after_cut_test),0)}')
    print('\n')
print_stats_data()


Gewünschte Anteile erreicht bei  
scale_weibull_base=  9115.851814783131 
und rate_censoring=  0.04021055606963396


Simulations: 100%|██████████| 1000/1000 [00:05<00:00, 167.16simulation/s]


Event-Stats Results:
Train:
Portion of events after cut:     12.12%,   n=127.0
Portion of no events after cut:  17.58%,    n=185.0
Portion of censored after cut:   70.3%,   n=738.0

Test:
Portion of events after cut:     12.09%,   n=127.0
Portion of no events after cut:  17.58%,    n=185.0
Portion of censored after cut:   70.33%,   n=738.0


