In [50]:
import pandas as pd
from lifelines import CoxPHFitter
from src.helper_methods import *
from src.pipe_store import *
from src.constants import *
from lifelines import CoxPHFitter

%matplotlib inline 
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:
RELEVANT_DESA_BAD = {    
    '71TD', '144QL', '70DRA', '80TLR', '70DA', '67F', '76ED', '76ET', '158T', '45EV'
}

# Data Loading

### Total Cohort

In [63]:
path = '~/Repos/STRIDE/STRIDE-Analytics/data/20210614-mismatch_ep_db-extended.pickle'
num_col = ['DonorAge_NOTR', 'DialysisYears', 'RecipientAge_NOTR', 'CIPHour_DBD', 'CIPHour_DCD']

df = (
    data_loading(path)
    .pipe(keeping_features,
        'TransplantID',
        'Failure',
        'Survival[Y]',
        'EpvsHLA_Donor',
        '#DESA',
        'DESA', 
        'DonorAge_NOTR',
        'DialysisYears',
        'RecipientAge_NOTR',
        'CIPHour_DBD', 
        'CIPHour_DCD' ,
        'Donor_Type',
        'IL2rMoAb_T0'
    )
    .pipe(integer_encoder, 'IL2rMoAb_T0')
    .pipe(polynomial_power2, 'DonorAge_NOTR', 'RecipientAge_NOTR')
    .pipe(set_time_event_label, E='Failure', T='Survival[Y]')
    .pipe(eng_immunological_features, antibody_epitope=True)
    .pipe(feature_scaler, num_col + ['DonorAge_NOTR_power2', 'RecipientAge_NOTR_power2'], scaler='standard')
    .pipe(censoring_deaths)
    # .pipe(setting_prediction_horizon, 15)
    .rename(columns={'IL2rMoAb_T0':'IL2rMoAb'})
)

df = df.merge(hla_mm, on='TransplantID')

df = (
    df
    .pipe(feature_scaler, ['HLA_Mismatch'], scaler='maxmin')
    .drop('TransplantID', axis=1)
)

df = create_treatment_grups(df, [RELEVANT_DESA_BAD])

Step: data_loading | Shape: (4690, 24) | Computation Time: 0.014925s
Step: integer_encoder | Shape: (4690, 13) | Computation Time: 0.001575s
Step: set_time_event_label | Shape: (4690, 15) | Computation Time: 0.001561s
Step: eng_immunological_features | Shape: (4690, 19) | Computation Time: 0.03651s
Step: censoring_deaths | Shape: (4690, 19) | Computation Time: 0.004296s


In [64]:
df

Unnamed: 0,#DESA,DESA,DonorAge_NOTR,DialysisYears,RecipientAge_NOTR,CIPHour_DBD,CIPHour_DCD,Donor_Type,IL2rMoAb,DonorAge_NOTR_power2,...,T,No_DESA,Relevant_DESA_Bad,Class_I,Class_II,Class_I_II,HLA_Mismatch,Specific_DESA,Other_DESA,Groups
0,0,{},-0.019920,-0.183046,0.596332,-0.190025,-0.433789,Deceased,0,-0.200724,...,12.191781,1,0,0,0,0,0.000000,0,0,1
1,0,{},0.645513,-0.263943,-0.936160,0.857348,-0.433789,Deceased,0,0.579189,...,17.010959,1,0,0,0,0,0.333333,0,0,1
2,0,{},-0.019920,0.868621,-1.284454,2.027942,-0.433789,Deceased,0,-0.200724,...,6.956164,1,0,0,0,0,0.000000,0,0,1
3,0,{},-1.417332,-1.113366,-0.378890,1.535061,-0.433789,Deceased,1,-1.320455,...,4.509589,1,0,0,0,0,0.500000,0,0,1
4,0,{},0.712057,1.070865,-0.518208,-0.929348,1.549608,Deceased,0,0.665934,...,0.010959,1,0,0,0,0,0.166667,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4685,0,{},0.445883,-0.506635,-0.169914,1.452914,-0.433789,Deceased,1,0.328503,...,12.126027,1,0,0,0,0,0.166667,0,0,1
4686,0,{},0.645513,-0.425738,-0.587867,0.771094,-0.433789,Deceased,1,0.579189,...,10.879452,1,0,0,0,0,0.666667,0,0,1
4687,0,{},-1.550418,0.302339,0.039062,0.482210,-0.433789,Deceased,1,-1.390488,...,18.010959,1,0,0,0,0,0.500000,0,0,1
4688,0,{},-0.153007,-0.102148,-1.563089,-0.929348,3.273799,Deceased,1,-0.337606,...,11.816438,1,0,0,0,0,0.666667,0,0,1


In [65]:
# hla_mm = pd.read_pickle('~/Repos/STRIDE/desa_paper/data/hla_mm.pickle')
# df = df.merge(hla_mm, on='TransplantID')
# df_new = (
#     df
#     .pipe(feature_scaler, ['HLA_Mismatch'], scaler='maxmin')
#     .drop('TransplantID', axis=1)
# )

In [55]:
confounders = [
    'RecipientAge_NOTR', 
    'RecipientAge_NOTR_power2',
    'DonorAge_NOTR',
    'DonorAge_NOTR_power2',
    'IL2rMoAb', 
    'CIPHour_DBD', 'CIPHour_DCD', 
    'DialysisYears',
    'HLA_Mismatch',
    # 'No_DESA', 
    'Specific_DESA', 
    # 'Other_DESA'
]

In [56]:
formula = ' + '.join(confounders)

df_l = df_new
cph = CoxPHFitter()
cph.fit(df_l, duration_col='T', event_col='E', formula=formula)
len = df_l['Specific_DESA'].sum()
print(f' Bad DESA size in data set {len}')
cph.print_summary()

 Bad DESA size in data set 76


  return summary_df[columns].to_latex(float_format="%." + str(self.decimals) + "f")


0,1
model,lifelines.CoxPHFitter
duration col,'T'
event col,'E'
baseline estimation,breslow
number of observations,4690
number of events observed,2397
partial log-likelihood,-18682.40
time fit was run,2023-02-11 10:24:04 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
CIPHour_DBD,0.19,1.2,0.02,0.14,0.23,1.15,1.26,0.0,7.65,<0.005,45.55
CIPHour_DCD,0.2,1.22,0.02,0.16,0.24,1.17,1.27,0.0,8.88,<0.005,60.43
DialysisYears,0.09,1.1,0.02,0.06,0.13,1.06,1.14,0.0,4.81,<0.005,19.34
DonorAge_NOTR,-0.12,0.89,0.1,-0.31,0.07,0.73,1.07,0.0,-1.23,0.22,2.19
DonorAge_NOTR_power2,0.34,1.4,0.09,0.15,0.52,1.16,1.69,0.0,3.56,<0.005,11.37
HLA_Mismatch,0.33,1.38,0.09,0.16,0.49,1.17,1.64,0.0,3.79,<0.005,12.71
IL2rMoAb,0.2,1.23,0.06,0.1,0.31,1.1,1.37,0.0,3.68,<0.005,12.06
RecipientAge_NOTR,-0.48,0.62,0.1,-0.69,-0.28,0.5,0.76,0.0,-4.6,<0.005,17.86
RecipientAge_NOTR_power2,0.75,2.12,0.1,0.55,0.95,1.74,2.59,0.0,7.39,<0.005,42.67
Specific_DESA,0.82,2.26,0.13,0.56,1.07,1.75,2.92,0.0,6.27,<0.005,31.34

0,1
Concordance,0.64
Partial AIC,37384.80
log-likelihood ratio test,638.00 on 10 df
-log2(p) of ll-ratio test,431.52


|Total Cohrot (4690)| #Tx  | Bad DESA (HR) | 95% CI|
| ------------- | --------------| -----------|-----------|
| All           |         76|     2.31  | 1.79-2.98

# Deceased Donors

In [67]:
path = '~/Repos/STRIDE/STRIDE-Analytics/data/20210614-mismatch_ep_db-extended.pickle'
donor_type ='Deceased'
status = 'All'
num_col = ['DonorAge_NOTR', 'DialysisYears', 'RecipientAge_NOTR', 'CIPHour_DBD', 'CIPHour_DCD']

df = (
    data_loading(path)
    .pipe(start_pipeline, status, donor_type)
    .pipe(keeping_features,
        'TransplantID', 
        'Failure',
        'Survival[Y]',
        'EpvsHLA_Donor',
        '#DESA',
        'DESA', 
        'DonorAge_NOTR',
        'DialysisYears',
        'RecipientAge_NOTR',
        'CIPHour_DBD', 
        'CIPHour_DCD' ,
        'Donor_Type',
        'IL2rMoAb_T0'
    )
    .pipe(integer_encoder, 'IL2rMoAb_T0')
    .pipe(polynomial_power2, 'DonorAge_NOTR', 'RecipientAge_NOTR')
    .pipe(set_time_event_label, E='Failure', T='Survival[Y]')
    .pipe(eng_immunological_features, antibody_epitope=True)
    .pipe(feature_scaler, num_col + ['DonorAge_NOTR_power2', 'RecipientAge_NOTR_power2'], scaler='standard')
    .pipe(censoring_deaths)
    # .pipe(setting_prediction_horizon, 15)
    .rename(columns={'IL2rMoAb_T0':'IL2rMoAb'})
)

df = df.merge(hla_mm, on='TransplantID')

df = (
    df
    .pipe(feature_scaler, ['HLA_Mismatch'], scaler='maxmin')
    .drop('TransplantID', axis=1)
)

df = create_treatment_grups(df, [RELEVANT_DESA_BAD])

Step: data_loading | Shape: (4690, 24) | Computation Time: 0.02023s
- selceted cohort --> Donor Type: Deceased, Epitope Antibody Presence: All
Step: start_pipeline | Shape: (3235, 23) | Computation Time: 0.009218s
Step: integer_encoder | Shape: (3235, 13) | Computation Time: 0.001085s
Step: set_time_event_label | Shape: (3235, 15) | Computation Time: 0.002528s
Step: eng_immunological_features | Shape: (3235, 19) | Computation Time: 0.036479s
Step: censoring_deaths | Shape: (3235, 19) | Computation Time: 0.003124s


# Structural Causal Models

# Checking the Cox Proportional Hazard Assumptions

https://lifelines.readthedocs.io/en/latest/jupyter_notebooks/Proportional%20hazard%20assumption.html?highlight=cox%20assumptions

The proportional hazard assumption is that all individuals have the same hazard function, but a unique scaling factor infront. So the shape of the hazard function is the same for all individuals, and only a scalar multiple changes per individual.

# Deceased Donors

In [69]:
confounders = [
    'RecipientAge_NOTR', 
    'RecipientAge_NOTR_power2',
    'DonorAge_NOTR',
    'DonorAge_NOTR_power2',
    'IL2rMoAb', 
    'CIPHour_DBD', 'CIPHour_DCD', 
    'DialysisYears',
    'HLA_Mismatch',
    # 'No_DESA', 
    'Specific_DESA', 
    # 'Other_DESA'
]

In [72]:
df_total = df.copy(deep=True)
df_late = df_total[df_total['T'] >= 1]
df_early = df_total[df_total['T'] < 1]
df_class_I = df_total[df_total['Class_I'] == 1]
df_class_II = df_total[df_total['Class_II'] == 1]
df_class_I_II = df_total[df_total['Class_I_II'] == 1]

formula = ' + '.join(confounders)
# # # for df_l, title in [(df_total, 'Total Data Set'), (df_late, 'Late Failure'), (df_early, 'Early Failure')]:
df_l = df_early
cph = CoxPHFitter()
cph.fit(df_l, duration_col='T', event_col='E', formula=formula)
len = df_l['Specific_DESA'].sum()
print(f' Bad DESA size in data set {len}')
cph.print_summary()

 Bad DESA size in data set 19


  return summary_df[columns].to_latex(float_format="%." + str(self.decimals) + "f")


0,1
model,lifelines.CoxPHFitter
duration col,'T'
event col,'E'
baseline estimation,breslow
number of observations,402
number of events observed,387
partial log-likelihood,-1931.11
time fit was run,2023-02-11 10:37:21 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
CIPHour_DBD,-0.11,0.9,0.08,-0.28,0.05,0.76,1.06,0.0,-1.32,0.19,2.42
CIPHour_DCD,0.09,1.1,0.08,-0.06,0.24,0.95,1.27,0.0,1.23,0.22,2.18
DialysisYears,-0.04,0.96,0.06,-0.16,0.07,0.85,1.08,0.0,-0.73,0.47,1.1
DonorAge_NOTR,-0.02,0.98,0.26,-0.52,0.48,0.6,1.62,0.0,-0.07,0.95,0.08
DonorAge_NOTR_power2,0.03,1.03,0.24,-0.43,0.5,0.65,1.64,0.0,0.14,0.89,0.17
HLA_Mismatch,0.03,1.03,0.25,-0.46,0.52,0.63,1.68,0.0,0.12,0.91,0.14
IL2rMoAb,0.34,1.4,0.15,0.05,0.63,1.05,1.87,0.0,2.29,0.02,5.51
RecipientAge_NOTR,0.03,1.03,0.3,-0.57,0.62,0.57,1.86,0.0,0.09,0.93,0.11
RecipientAge_NOTR_power2,-0.13,0.88,0.3,-0.71,0.45,0.49,1.57,0.0,-0.44,0.66,0.6
Specific_DESA,0.47,1.59,0.24,-0.0,0.93,1.0,2.54,0.0,1.95,0.05,4.29

0,1
Concordance,0.59
Partial AIC,3882.23
log-likelihood ratio test,21.08 on 10 df
-log2(p) of ll-ratio test,5.61


|Deceased donors (3235)| #Tx with Bad DESA | Bad DESA (HR) | 95% CI|
| ------------- | --------------| -----------|-----------|
| All           |         55|      2.40  | 1.81-3.20
| Late failures (< 1 y)| 36 |  2.11  | 1.46-3.03
| Early failures (>1 y)| 19  |   1.59|1-2.54

# Living Donor

In [84]:
confounders = [
    'RecipientAge_NOTR', 
    'RecipientAge_NOTR_power2',
    'DonorAge_NOTR',
    'DonorAge_NOTR_power2',
    'IL2rMoAb', 
    # 'CIPHour_DBD', 'CIPHour_DCD', 
    # 'No_DESA', 
    'Specific_DESA', 
    'DialysisYears',
    # 'Other_DESA'
]

In [85]:
path = '~/Repos/STRIDE/STRIDE-Analytics/data/20210614-mismatch_ep_db-extended.pickle'

donor_type ='Living'
status = 'All'
num_col = ['DonorAge_NOTR', 'DialysisYears', 'RecipientAge_NOTR']

df = (
    data_loading(path)
    .pipe(start_pipeline, status, donor_type)
    .pipe(keeping_features,
        'TransplantID',
        'Failure',
        'Survival[Y]',
        'EpvsHLA_Donor',
        '#DESA',
        'DESA', 
        'DonorAge_NOTR',
        'DialysisYears',
        'RecipientAge_NOTR',
        'Donor_Type',
        'IL2rMoAb_T0'
    )
    .pipe(integer_encoder, 'IL2rMoAb_T0')
    .pipe(polynomial_power2, 'DonorAge_NOTR', 'RecipientAge_NOTR')
    .pipe(set_time_event_label, E='Failure', T='Survival[Y]')
    .pipe(eng_immunological_features, antibody_epitope=True)
    .pipe(feature_scaler, num_col + ['DonorAge_NOTR_power2', 'RecipientAge_NOTR_power2'], scaler='standard')
    .pipe(censoring_deaths)
    .pipe(setting_prediction_horizon, 15)
    .rename(columns={'IL2rMoAb_T0':'IL2rMoAb'})
)
df = create_treatment_grups(df, [RELEVANT_DESA_BAD])


Step: data_loading | Shape: (4690, 24) | Computation Time: 0.012961s
- selceted cohort --> Donor Type: Living, Epitope Antibody Presence: All
Step: start_pipeline | Shape: (1455, 23) | Computation Time: 0.006128s
Step: integer_encoder | Shape: (1455, 10) | Computation Time: 0.000668s
Step: set_time_event_label | Shape: (1455, 12) | Computation Time: 0.001571s
Step: eng_immunological_features | Shape: (1455, 16) | Computation Time: 0.013484s
Step: censoring_deaths | Shape: (1455, 16) | Computation Time: 0.001641s
Step: setting_prediction_horizon | Shape: (1455, 16) | Computation Time: 0.002349s


In [86]:
df_total = df.copy(deep=True)
df_late = df_total[df_total['T'] >= 1]
df_early = df_total[df_total['T'] < 1]
df_class_I = df_total[df_total['Class_I'] == 1]
df_class_II = df_total[df_total['Class_II'] == 1]
df_class_I_II = df_total[df_total['Class_I_II'] == 1]

formula = ' + '.join(confounders)
# for df_l, title in [(df_total, 'Total Data Set'), (df_late, 'Late Failure'), (df_early, 'Early Failure')]:
df_l = df_total
cph = CoxPHFitter()
cph.fit(df_l, duration_col='T', event_col='E', formula=formula, fit_options={'step_size':0.6})
len = df_l['Specific_DESA'].sum()
print(f' Bad DESA size in data set {len}')
cph.print_summary()

 Bad DESA size in data set 21


  return summary_df[columns].to_latex(float_format="%." + str(self.decimals) + "f")


0,1
model,lifelines.CoxPHFitter
duration col,'T'
event col,'E'
baseline estimation,breslow
number of observations,1455
number of events observed,513
partial log-likelihood,-3497.97
time fit was run,2023-02-11 10:44:45 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
DialysisYears,0.11,1.11,0.04,0.03,0.19,1.03,1.2,0.0,2.57,0.01,6.63
DonorAge_NOTR,-0.31,0.73,0.29,-0.87,0.26,0.42,1.29,0.0,-1.07,0.28,1.82
DonorAge_NOTR_power2,0.45,1.57,0.28,-0.1,1.0,0.9,2.72,0.0,1.6,0.11,3.19
IL2rMoAb,-0.27,0.77,0.11,-0.49,-0.05,0.61,0.95,0.0,-2.38,0.02,5.84
RecipientAge_NOTR,-0.85,0.43,0.2,-1.25,-0.45,0.29,0.64,0.0,-4.15,<0.005,14.88
RecipientAge_NOTR_power2,1.1,3.02,0.2,0.72,1.49,2.05,4.45,0.0,5.58,<0.005,25.28
Specific_DESA,0.8,2.22,0.29,0.23,1.37,1.25,3.95,0.0,2.73,0.01,7.3

0,1
Concordance,0.61
Partial AIC,7009.94
log-likelihood ratio test,108.51 on 7 df
-log2(p) of ll-ratio test,65.53


In [22]:
df_early.E.value_counts()

1    66
0     2
Name: E, dtype: int64

|Living donors (1455)| #Tx with Bad DESA | Bad DESA (HR) | 95% CI|
| ------------- | --------------| -----------|-----------|
| All           |        21 |       2.22      | 1.25-3.95	
| Early failures (< 1 y)| 3  |      17.97	  | 4.35-74.34
| Late failures (>1 y)| 18      |  1.96	 | 1.01-3.8

# Potential Outcomes

## Finding Inverse Probability Wieghts (IPW)

In [46]:
df_total = df.copy(deep=True)
df_late = df_total[df_total['T'] > 1]
df_early = df_total[df_total['T'] < 1]
confounders = [
    'RecipientAge_NOTR', 
    'RecipientAge_NOTR_power2',
    'DonorAge_NOTR',
    'DonorAge_NOTR_power2',
    'IL2rMoAb', 
    'CIPHour_DBD', 'CIPHour_DCD', 
]
treatments = ['No_DESA', 'Other_DESA', 'Specific_DESA']
df_weight = find_ipw(df, confounders, treatments, scaler=None, verbose=False)

cph = CoxPHFitter()
cols = [
    # 'No_DESA',
    'Specific_DESA',
    'Other_DESA',
    'E', 'T', 
    'w'
]
cph.fit(
    df_weight[cols], 
    weights_col='w',
    duration_col='T', event_col='E', robust=True)
print(cph.print_summary())

PatsyError: Error evaluating factor: NameError: name 'CIPHour_DCD' is not defined
    No_DESA ~ RecipientAge_NOTR + RecipientAge_NOTR_power2 + DonorAge_NOTR + DonorAge_NOTR_power2 + IL2rMoAb + CIPHour_DBD + CIPHour_DCD
                                                                                                                             ^^^^^^^^^^^

In [None]:
cph.check_assumptions(df_weight[cols], p_value_threshold=0.05, show_plots=True)

Proportional hazard assumption looks okay.


  It's important to know that the naive variance estimates of the coefficients are biased. Instead use Monte Carlo to
  estimate the variances. See paper "Variance estimation when using inverse probability of treatment weighting (IPTW) with survival analysis"
  or "Adjusted Kaplan-Meier estimator and log-rank test with inverse probability of treatment weighting for survival data."
                  
  for variable in self.params_.index & (columns or self.params_.index):


[]

In [None]:
from lifelines.statistics import proportional_hazard_test
results = proportional_hazard_test(cph, df_weight[cols], time_transform='rank')
results.print_summary(decimals=3, model="untransformed variables")

  return self.summary.to_latex()


0,1
time_transform,rank
null_distribution,chi squared
degrees_of_freedom,1
model,<lifelines.CoxPHFitter: fitted with 9688.73 to...
test_name,proportional_hazard_test

Unnamed: 0,test_statistic,p,-log2(p)
Other_DESA,0.03,0.87,0.2
Specific_DESA,0.12,0.73,0.45


### Living Donor

In [None]:
path = '~/Repos/STRIDE/STRIDE-Analytics/data/20210614-mismatch_ep_db-extended.pickle'
donor_type ='Living'
status = 'All'
num_col = ['DonorAge_NOTR', 'DialysisYears', 'RecipientAge_NOTR', 'CIPHour_DBD', 'CIPHour_DCD']

df = (
    data_loading(path)
    .pipe(start_pipeline, status, donor_type)
    .pipe(keeping_features,
        'Failure',
        'Survival[Y]',
        'EpvsHLA_Donor',
        '#DESA',
        'DESA', 
        'DonorAge_NOTR',
        'DialysisYears',
        'RecipientAge_NOTR',
        'CIPHour_DBD', 
        'CIPHour_DCD' ,
        # 'TypeCadaveric_NOTR', 
        'Donor_Type',
        'IL2rMoAb_T0'
    )
    .pipe(integer_encoder, 'IL2rMoAb_T0')
    .pipe(features_from_antibody_epitopes)
    .pipe(set_time_event_label, E='Failure', T='Survival[Y]')
    # .pipe(feature_scaler, num_col, scaler='standard')
    .pipe(censoring_deaths)
    .pipe(setting_prediction_horizon, 15)
    .rename(columns={'IL2rMoAb_T0':'IL2rMoAb'})
)
df['DESA_3more'] = df['#DESA'].apply(lambda x: 1 if x > 2 else 0)

Step: data_loading | Shape: (4690, 24) | Computation Time: 0.01847s
- selceted cohort --> Donor Type: Living, Epitope Antibody Presence: All
Step: start_pipeline | Shape: (1455, 23) | Computation Time: 0.013016s
Step: integer_encoder | Shape: (1455, 12) | Computation Time: 0.000888s
Step: set_time_event_label | Shape: (1455, 16) | Computation Time: 0.004486s
Step: censoring_deaths | Shape: (1455, 16) | Computation Time: 0.002584s
Step: setting_prediction_horizon | Shape: (1455, 16) | Computation Time: 0.005771s


In [None]:
df.sample()

Unnamed: 0,#DESA,DESA,DonorAge_NOTR,DialysisYears,RecipientAge_NOTR,CIPHour_DBD,CIPHour_DCD,Donor_Type,IL2rMoAb,No_DESA,Relevant_DESA_Bad,Relevant_DESA_Good,Class_I,Class_II,E,T,DESA_3more
838,0,{},20,2.7,44,0.0,0.0,Living,0,1,0,0,0,0,0,15.0,0


In [None]:
df_total = df.copy(deep=True)
df_late = df_total[df_total['T'] > 1]
df_early = df_total[df_total['T'] < 1]

# for df, title in [(df_total, 'Total Data Set'), (df_late, 'Late Failure'), (df_early, 'Early Failure')]:
df = df_total
confounders = [
    'RecipientAge_NOTR', 'RecipientAge_NOTR*RecipientAge_NOTR', 
    'DonorAge_NOTR', 'DonorAge_NOTR*DonorAge_NOTR', 'IL2rMoAb', 
    'CIPHour_DBD', 'CIPHour_DCD', 
]

df_treat_group = create_treatment_grups(df, [RELEVANT_DESA_BAD])
# treatments = ['No_DESA', 'Other_DESA', 'Specific_DESA']
treatments = ['No_DESA', 'Other_DESA', 'Specific_DESA', 'DESA_3more']
# df_weight = find_ipw(df_treat_group, confounders, treatments, verbose=False)
df_treat_group
# cph = CoxPHFitter()
# cols = [
#     'DonorAge_NOTR', 'RecipientAge_NOTR',
#     'DialysisYears', 'IL2rMoAb',
#     'CIPHour_DBD', 'CIPHour_DCD', 
#     'No_DESA',
#     'Specific_DESA',
#     'DESA_3more',
#     'E', 'T', 'w',
# ]
# cph.fit(df_weight[cols], weights_col='w', duration_col='T', event_col='E', robust=True)
# # print(title)
# cph.print_summary()


Unnamed: 0,#DESA,DESA,DonorAge_NOTR,DialysisYears,RecipientAge_NOTR,CIPHour_DBD,CIPHour_DCD,TypeCadaveric_NOTR,Donor_Type,IL2rMoAb,...,Relevant_DESA_Bad,Relevant_DESA_Good,Class_I,Class_II,E,T,DESA_3more,Specific_DESA,Other_DESA,Groups
0,0,{},44,2.3,54,9.000000,0.000000,0,Deceased,0,...,0,0,0,0,0,12.191781,0,0,0,1
1,0,{},54,2.1,32,21.750000,0.000000,0,Deceased,0,...,0,0,0,0,0,15.000000,0,0,0,1
2,0,{},44,4.9,27,36.000000,0.000000,0,Deceased,0,...,0,0,0,0,1,6.956164,0,0,0,1
3,0,{},23,0.0,40,30.000000,0.000000,0,Deceased,1,...,0,0,0,0,0,4.509589,0,0,0,1
4,0,{},55,5.4,38,0.000000,17.216667,1,Deceased,0,...,0,0,0,0,1,0.010959,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4685,0,{},51,1.5,43,29.000000,0.000000,0,Deceased,1,...,0,0,0,0,0,12.126027,0,0,0,1
4686,0,{},54,1.7,37,20.700000,0.000000,0,Deceased,1,...,0,0,0,0,1,10.879452,0,0,0,1
4687,0,{},21,3.5,46,17.183333,0.000000,0,Deceased,1,...,0,0,0,0,0,15.000000,0,0,0,1
4688,0,{},42,2.5,23,0.000000,32.183333,1,Deceased,1,...,0,0,0,0,1,11.816438,0,0,0,1


# Total Data

In [None]:
path = '~/Repos/STRIDE/STRIDE-Analytics/data/20210614-mismatch_ep_db-extended.pickle'
donor_type = None
status = 'All'
num_col = ['DonorAge_NOTR', 'DialysisYears', 'RecipientAge_NOTR', 'CIPHour_DBD', 'CIPHour_DCD']

df = (
    data_loading(path)
    # .pipe(start_pipeline, status, donor_type)
    .pipe(keeping_features,
        'Failure',
        'Survival[Y]',
        'EpvsHLA_Donor',
        '#DESA',
        'DESA', 
        'DonorAge_NOTR',
        'DialysisYears',
        'RecipientAge_NOTR',
        'CIPHour_DBD', 
        'CIPHour_DCD' ,
        'Donor_Type',
        'IL2rMoAb_T0'
    )
    .pipe(integer_encoder, 'IL2rMoAb_T0')
    .pipe(polynomial_power2, 'DonorAge_NOTR', 'RecipientAge_NOTR')
    .pipe(set_time_event_label, E='Failure', T='Survival[Y]')
    .pipe(eng_immunological_features, antibody_epitope=True)
    .pipe(feature_scaler, num_col + ['DonorAge_NOTR_power2', 'RecipientAge_NOTR_power2'], scaler='standard')
    .pipe(censoring_deaths)
    .pipe(setting_prediction_horizon, 10)
    .rename(columns={'IL2rMoAb_T0':'IL2rMoAb'})
)
df = create_treatment_grups(df, [RELEVANT_DESA_BAD])

Step: data_loading | Shape: (4690, 24) | Computation Time: 0.013059s
Step: integer_encoder | Shape: (4690, 12) | Computation Time: 0.001571s
Step: set_time_event_label | Shape: (4690, 14) | Computation Time: 0.001867s
Step: eng_immunological_features | Shape: (4690, 19) | Computation Time: 0.038885s
Step: censoring_deaths | Shape: (4690, 19) | Computation Time: 0.001222s
Step: setting_prediction_horizon | Shape: (4690, 19) | Computation Time: 0.004254s


In [None]:
df

Unnamed: 0,#DESA,DESA,DonorAge_NOTR,DialysisYears,RecipientAge_NOTR,CIPHour_DBD,CIPHour_DCD,Donor_Type,IL2rMoAb,DonorAge_NOTR_power2,...,T,No_DESA,Relevant_DESA_Bad,Relevant_DESA_Good,Class_I,Class_II,Class_I_II,Specific_DESA,Other_DESA,Groups
0,0,{},-0.019920,-0.183046,0.596332,-0.190025,-0.433789,Deceased,0,-0.200724,...,10.000000,1,0,0,0,0,0,0,0,1
1,0,{},0.645513,-0.263943,-0.936160,0.857348,-0.433789,Deceased,0,0.579189,...,10.000000,1,0,0,0,0,0,0,0,1
2,0,{},-0.019920,0.868621,-1.284454,2.027942,-0.433789,Deceased,0,-0.200724,...,6.956164,1,0,0,0,0,0,0,0,1
3,0,{},-1.417332,-1.113366,-0.378890,1.535061,-0.433789,Deceased,1,-1.320455,...,4.509589,1,0,0,0,0,0,0,0,1
4,0,{},0.712057,1.070865,-0.518208,-0.929348,1.549608,Deceased,0,0.665934,...,0.010959,1,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4685,0,{},0.445883,-0.506635,-0.169914,1.452914,-0.433789,Deceased,1,0.328503,...,10.000000,1,0,0,0,0,0,0,0,1
4686,0,{},0.645513,-0.425738,-0.587867,0.771094,-0.433789,Deceased,1,0.579189,...,10.000000,1,0,0,0,0,0,0,0,1
4687,0,{},-1.550418,0.302339,0.039062,0.482210,-0.433789,Deceased,1,-1.390488,...,10.000000,1,0,0,0,0,0,0,0,1
4688,0,{},-0.153007,-0.102148,-1.563089,-0.929348,3.273799,Deceased,1,-0.337606,...,10.000000,1,0,0,0,0,0,0,0,1


In [None]:
confounders = [
    'RecipientAge_NOTR', 
    'RecipientAge_NOTR_power2',
    'DonorAge_NOTR',
    'DonorAge_NOTR_power2',
    'IL2rMoAb', 
    'CIPHour_DBD', 'CIPHour_DCD', 
    'Donor_Type', 
    'DialysisYears',

]
treatments = ['No_DESA', 'Other_DESA', 'Specific_DESA']
df_weight = find_ipw(df, confounders, treatments, scaler=None, verbose=False)

cph = CoxPHFitter()
cols = [
    # 'No_DESA',
    'Specific_DESA',
    'Other_DESA',
    'E', 'T', 
    'w'
]
cph.fit(
    df_weight[cols], 
    weights_col='w',
    duration_col='T', event_col='E', robust=True)
print(cph.print_summary())

  return summary_df[columns].to_latex(float_format="%." + str(self.decimals) + "f")


0,1
model,lifelines.CoxPHFitter
duration col,'T'
event col,'E'
weights col,'w'
robust variance,True
baseline estimation,breslow
number of observations,14035.4
number of events observed,4191.42
partial log-likelihood,-38510.38
time fit was run,2022-06-28 12:17:43 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
Specific_DESA,1.03,2.8,0.12,0.8,1.26,2.22,3.53,0.0,8.72,<0.005,58.28
Other_DESA,0.15,1.16,0.14,-0.13,0.43,0.88,1.53,0.0,1.06,0.29,1.8

0,1
Concordance,0.54
Partial AIC,77024.76
log-likelihood ratio test,932.44 on 2 df
-log2(p) of ll-ratio test,672.62


None


In [None]:
df.Relevant_DESA_Bad.value_counts()

0    4494
1     196
Name: Relevant_DESA_Bad, dtype: int64