# Preprocessing and Feature Selection for RSF model for Living Donors

## Importing packages

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

from sklearn import set_config
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn.impute import SimpleImputer
from sksurv.datasets import load_gbsg2
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sksurv.ensemble import RandomSurvivalForest
from sklearn.inspection import permutation_importance
from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.metrics import concordance_index_ipcw
from sksurv.linear_model import CoxPHSurvivalAnalysis

set_config(display="text") 

In [2]:
pd.set_option('display.max_columns', 1200)
pd.set_option('display.max_rows', 300)

## Loading data into DataFrames and joining them

In [3]:
kidpan_df = pd.read_csv('csv_data/Kidney_Pancreas_full.csv')
living_df = pd.read_csv('csv_data/Living_Donor.csv')

  kidpan_df = pd.read_csv('csv_data/Kidney_Pancreas_full.csv')
  living_df = pd.read_csv('csv_data/Living_Donor.csv')


In [4]:
kidpan_living = pd.merge(kidpan_df, living_df, on="DONOR_ID", how="inner")
del kidpan_df
del living_df

## Dropping duplicate columns

In [5]:
duplicated_columns = []
for col in kidpan_living.columns:
    if col.endswith('_x') and col.rstrip('_x') + '_y' in kidpan_living.columns:
        duplicated_columns.append(col)
duplicated_columns

['_id_x',
 'GENDER_x',
 'ABO_x',
 'CITIZENSHIP_x',
 'REGION_x',
 'AGE_DON_x',
 'ETHCAT_DON_x',
 'CMV_IGG_x',
 'CMV_IGM_x',
 'HBV_CORE_x',
 'HBV_SUR_ANTIGEN_x',
 'PX_STAT_x',
 'AGE_BIN_x',
 'EDUCATION_x',
 'LIV_DON_TY_x',
 'STATUS_LDR_x',
 'VAL_DT_LDR_x',
 'KI_CREAT_PREOP_x',
 'KI_PROC_TY_x',
 'CITIZEN_COUNTRY_x']

In [6]:
different_duplicates = []
for col in duplicated_columns:
    if (kidpan_living[col].equals(kidpan_living[col.rstrip('_x') + '_y'])):
        print(col.rstrip('_x'), "identical")
        kidpan_living.rename(columns={col.rstrip('_x') + '_y': col.rstrip('_x')}, inplace=True)
        kidpan_living.drop(col, axis=1, inplace=True)
    else:
        print(col.rstrip('_x'), "different")
        different_duplicates.append(col.rstrip('_x'))

_id different
GENDER different
ABO different
CITIZENSHIP different
REGION different
AGE_DON identical
ETHCAT_DON different
CMV_IGG different
CMV_IGM different
HBV_CORE different
HBV_SUR_ANTIGEN different
PX_STAT different
AGE_BIN different
EDUCATION different
LIV_DON_TY different
STATUS_LDR identical
VAL_DT_LDR identical
KI_CREAT_PREOP identical
KI_PROC_TY identical
CITIZEN_COUNTRY different


In [None]:
kidpan_living.sort_index(axis=1).head(10)

Unnamed: 0,A1,A2,A2A2B_ELIGIBILITY,ABO_DON,ABO_MAT,ABO_x,ABO_y,ACADEMIC_LEVEL_TCR,ACADEMIC_LEVEL_TRR,ACADEMIC_PRG_TCR,ACADEMIC_PRG_TRR,ACTIVATE_DATE,ACUTE_REJ_EPI_KI,ACUTE_REJ_EPI_PA,ADMISSION_DATE,ADMIT_DATE_DON,AGE,AGE_BIN_x,AGE_BIN_y,AGE_DIAB,AGE_DON,AGE_GROUP,AMIS,AMYLASE,ANAST_LK_PA,ANTIBODY_TESTED,ANTIHYPE_DON,ARGININE_DON,ART_RECON,B1,B2,BILIARY_COMP,BILIARY_COMP_GRADE,BIOPSY_LI,BIOP_ISLET_PA,BLEED_PA,BLOOD_INF_CONF_DON,BLOOD_INF_DON,BLOOD_SUGAR_DIET_PA,BLOOD_SUGAR_MEDICATION_PA,BLOOD_SUGAR_MED_RESUMED_DATE_PA,BMIS,BMI_CALC,BMI_DON_CALC,BMI_TCR,BP_POSTOP_DIAST,BP_POSTOP_SYST,BP_PREOP_DIAST,BP_PREOP_SYST,BUN_DON,BW4,BW6,C1,C2,CANCER_FREE,CANCER_FREE_INT_DON,CANCER_SITE_DON,CARDARREST_NEURO,CDC_RISK_HIV_DON,CITIZENSHIP_DON,CITIZENSHIP_x,CITIZENSHIP_y,CITIZEN_COUNTRY_DON,CITIZEN_COUNTRY_x,CITIZEN_COUNTRY_y,CLIN_INFECT_DON,CMV_DON,CMV_IGG_DON,CMV_IGG_x,CMV_IGG_y,CMV_IGM_DON,CMV_IGM_x,CMV_IGM_y,CMV_NUCLEIC,CMV_NUCLEIC_DON,CMV_OLD_LIV_DON,CMV_STATUS,CMV_TEST_DON,CMV_TOTAL,COD,COD2_KI,COD2_PA,COD3_KI,COD3_PA,COD_CAD_DON,COD_KI,COD_PA,COD_WL,COLD_ISCH_KI,COMPL_ABSC,COMPL_ANASLK,COMPL_PANCREA,COMPOSITE_DEATH_DATE,CONTIN_ALCOHOL_OLD_DON,CONTIN_CIG_DON,CONTIN_COCAINE_DON,CONTIN_IV_DRUG_OLD_DON,CONTIN_OTH_DRUG_DON,CONTROLLED_DON,CONVERT_OPEN_KI,CONVERT_OPEN_LU,CORE_COOL_DON,CREAT1Y,CREAT6M,CREAT_CLEAR,CREAT_CLEAR_DATE,CREAT_DON,CREAT_TRR,CTR_CODE,CURRENT_PRA,C_PEPTIDE,C_PEPTIDEDATE,C_PEPTIDE_PA_TCR,C_PEPTIDE_PA_TRR,DA1,DA2,DATA_TRANSPLANT,DATA_WAITLIST,DAYSWAIT_ALLOC,DAYSWAIT_CHRON,DAYSWAIT_CHRON_KI,DAYSWAIT_CHRON_PA,DB1,DB2,DBW4,DBW6,DC1,DC2,DDAVP_DON,DDP1,DDP2,DDPA1,DDPA2,DDQ1,DDQ2,DDQA1,DDQA2,DDR1,DDR2,DDR51,DDR51_2,DDR52,DDR52_2,DDR53,DDR53_2,DEATH_CIRCUM_DON,DEATH_DATE,DEATH_DT,DEATH_MECH_DON,DGN2_TCR,DGN_TCR,DIAB,DIABDUR_DON,DIABETES,DIABETES_DON,DIAG_KI,DIAG_PA,DIALYSIS_DATE,DIAL_DATE,DIAL_TRR,DIET_DON,DISCHARGE_DATE,DISTANCE,DIURETICS_DON,DOBUT_DON_OLD,DONATION,DONATION_DON,DONOR_ID,DON_DATE,DON_ORG,DON_ORG2,DON_RETYP,DON_TY,DOPAMINE_DON_OLD,DQ1,DQ2,DR1,DR2,DR51,DR51_2,DR52,DR52_2,DR53,DR53_2,DRMIS,DRUGTRT_COPD,DUCT_MGMT,DUR_ABSTINENCE,DWFG_KI,EBV_DNA_DON,EBV_IGG,EBV_IGG_CAD_DON,EBV_IGG_DON,EBV_IGM,EBV_IGM_CAD_DON,EBV_IGM_DON,EBV_SEROSTATUS,EBV_TEST_DON,EBV_TOTAL,ECD_DONOR,EDUCATION_DON,EDUCATION_x,EDUCATION_y,END_BMI_CALC,END_CPRA,END_CPRA_DETAIL,END_DATE,END_EPTS,END_OPO_CTR_CODE,END_STAT,END_STAT_KI,END_STAT_PA,ENTERIC_DRAIN,ENTERIC_DRAIN_DT,ETHCAT,ETHCAT_DON_x,ETHCAT_DON_y,ETHNICITY,EXH_PERIT_ACCESS,EXH_VASC_ACCESS,EXTRACRANIAL_CANCER_DON,FAILDATE_KI,FAILDATE_PA,FFP_UNITS,FIN_RESIST_TX,FIRST_WK_DIAL,FREE_DON,FUNC_STAT,FUNC_STAT_TCR,FUNC_STAT_TRF,FUNC_STAT_TRR,GENDER_DON,GENDER_x,GENDER_y,GFR,GFR_DATE,GRF_FAIL_CAUSE_TY_KI,GRF_FAIL_CAUSE_TY_PA,GRF_PLACEM,GRF_STAT_KI,GRF_STAT_PA,GRF_VASC_THROMB_PA,GSTATUS_KI,GSTATUS_PA,GTIME_KI,GTIME_PA,HAPLO_TY_MATCH_DON,HBA1C_PA_TCR,HBA1C_PA_TRR,HBSAB_DON,HBV_CORE_DON,HBV_CORE_x,HBV_CORE_y,HBV_DNA,HBV_DNA_DON,HBV_NAT,HBV_NAT_DON,HBV_SURF_TOTAL,HBV_SUR_ANTIGEN_DON,HBV_SUR_ANTIGEN_x,HBV_SUR_ANTIGEN_y,HBV_TEST_DON,HCV_ANTIBODY,HCV_ANTIBODY_DON,HCV_NAT,HCV_NAT_DON,HCV_RIBA,HCV_RIBA_DON,HCV_RNA,HCV_RNA_DON,HCV_SEROSTATUS,HCV_TEST_DON,HEALTH_INS,HEPARIN_DON,HEP_C_ANTI_DON,HGT_CM_CALC,HGT_CM_DON_CALC,HGT_CM_TCR,HIST_ALCOHOL_OLD_DON,HIST_CANCER,HIST_CANCER_DON,HIST_CIG,HIST_CIG_DON,HIST_COCAINE_DON,HIST_DIABETES_DON,HIST_HYPER,HIST_HYPERTENS_DON,HIST_INSULIN_DEP_DON,HIST_IV_DRUG_OLD_DON,HIST_OTH_DRUG_DON,HIV_NAT,HIV_NAT_DON,HIV_SEROSTATUS,HLAMIS,HMO_PPO_DON,HOME_STATE,HOME_STATE_DON,HTLV1_OLD_DON,HTLV2_OLD_DON,HYPERTENSION,HYPERTENS_DUR_DON,HYPER_DIET,HYPER_DIUR,HYPER_MEDS,INACT_REASON_CD,INFECT_PA,INIT_AGE,INIT_AGE_BIN,INIT_BMI_CALC,INIT_CPRA,INIT_CURRENT_PRA,INIT_DATE,INIT_DISCHARGE_DT,INIT_EPTS,INIT_HGT_CM,INIT_OPO_CTR_CODE,INIT_PEAK_PRA,INIT_STAT,INIT_WGT_KG,INOTROP_AGENTS,INOTROP_SUPPORT_DON,INO_PROCURE_AGENT_1,INO_PROCURE_AGENT_2,INO_PROCURE_AGENT_3,INSULIN_DEP_DON,INSULIN_DON,INSULIN_DOSAGE_OLD_PA,INSULIN_DOSAGE_PA,INSULIN_DURATION_PA,INSULIN_DUR_DON,INSULIN_PA,INSULIN_RESUMED_DATE_PA,INTRACRANIAL_CANCER_DON,INTRAOP_COMP,KDPI,KDRI_MED,KDRI_RAO,KIDNEY_RECOV,KI_CREAT_POSTOP,KI_CREAT_PREOP,KI_PROC_TY,Kidney_Followup,Kidney_Malig_Followup,Kidney_Pancreas_Followup,Kidney_Pancreas_HLA,Kidney_Pancreas_Immuno_Discharge,Kidney_Pancreas_Immuno_Followup,Kidney_Pancreas_Malig_Followup,Kidney_Pancreas_PRA,Kidney_Pancreas_WL_History,LIPASE,LISTING_CTR_CODE,LIVER_RECOV,LIV_DON_TY_x,LIV_DON_TY_y,LI_PROC_TY,LOS,LT_KI_BIOPSY,LT_KI_GLOMERUL,LT_ONE_WEEK_DON,LUNG_RECOV,LU_COMP,LU_COMP_REASON,LU_PROC_TY,L_FIN_FLOW_RATE_TX,L_FIN_RESIST_TX,Living_Donor_Follow,MACRO_FAT,MALIG,MALIG_TCR_KI,MALIG_TCR_PA,MALIG_TRR,MALIG_TY,MALIG_TY_TRR,MARITAL_STAT,MAX_KDPI_IMPORT_NON_ZERO_ABDR,MAX_KDPI_IMPORT_ZERO_ABDR,MAX_KDPI_LOCAL_NON_ZERO_ABDR,MAX_KDPI_LOCAL_ZERO_ABDR,MEDICAID_DON,MEDICARE_DON,MED_COND_TRR,METHOD_BLOOD_SUGAR_CONTROL_PA,MICRO_FAT,MULTIORG,NON_AUTO_BLOOD,NON_HRT_DON,NPKID,NPPAN,NUM_PREV_TX,ON_DIALYSIS,OPER_TECH,OPO_CTR_CODE,ORGAN,ORG_RECOVERY_DT,ORG_REC_ON,OTHER_HYPERTENS_MED_DON,OTHER_INF_CONF_DON,OTHER_INF_DON,OTH_COMP_KI,OTH_COMP_KI_INTER,OTH_COMP_LI,OTH_COMP_LI_INTER,OTH_GOVT_DON,OTH_INTER_PROC_KI,OTH_INTER_PROC_KI_DT,OTH_INTER_PROC_LI,OTH_INTER_PROC_LI_DT,PACK_YRS,PANCREATIT_PA,PAYBACK,PA_PRESERV_TM,PEAK_PRA,PERIP_VASC,PERM_STATE,PERM_STATE_TRR,PHYSICAL_CAPACITY,PK_DA1,PK_DA2,PK_DB1,PK_DB2,PK_DDR1,PK_DDR2,PLATELETS_UNITS,POSTOP_ALBUM,POSTOP_ALK_PHOS,POSTOP_BILI,POSTOP_CREAT_LI,POSTOP_INR,POSTOP_SGOT_AST,POSTOP_SGPT_ALT,POSTOP_TEST_DT,POSTOP_URINE_PROTEIN,POSTOP_URINE_RATIO,PRBC_UNITS,PREDON_HGT,PREDON_WGT,PREOP_ALBUM,PREOP_ALK_PHOS,PREOP_BILI,PREOP_CREAT_LI,PREOP_FEF_AFTER,PREOP_FEF_BEFORE,PREOP_FEV1_AFTER,PREOP_FEV1_BEFORE,PREOP_FVC_AFTER,PREOP_FVC_BEFORE,PREOP_INR,PREOP_LUNG_CAP,PREOP_PAO2,PREOP_SGOT_AST,PREOP_SGPT_ALT,PREOP_TLC_AFTER,PREOP_TLC_BEFORE,PREOP_URINE_PROTEIN,PREOP_URINE_RATIO,PRETREAT_MED_DON_OLD,PREV_KI_DATE,PREV_KI_TX,PREV_MALIG_TY,PREV_PA_TX,PREV_PREG,PREV_TX,PREV_TX_ANY,PREV_TX_ANY_N,PRE_AVG_INSULIN_USED_OLD_TRR,PRE_AVG_INSULIN_USED_TRR,PRE_TX_TXFUS,PRIV_INS_DON,PRI_PAYMENT_CTRY_DON,PRI_PAYMENT_CTRY_TCR_KI,PRI_PAYMENT_CTRY_TCR_PA,PRI_PAYMENT_CTRY_TRR_KI,PRI_PAYMENT_CTRY_TRR_PA,PRI_PAYMENT_DON,PRI_PAYMENT_TCR_KI,PRI_PAYMENT_TCR_PA,PRI_PAYMENT_TRR_KI,PRI_PAYMENT_TRR_PA,PROTEIN_URINE,PRVTXDIF_KI,PRVTXDIF_PA,PSTATUS,PTIME,PT_CODE,PT_DIURETICS_DON,PT_STEROIDS_DON,PT_T3_DON,PT_T4_DON,PULM_INF_CONF_DON,PULM_INF_DON,PUMP_KI,PX_NON_COMPL_PA,PX_STAT_DATE,PX_STAT_x,PX_STAT_y,Pancreas_Followup,Pancreas_Malig_Followup,RA1,RA2,RB1,RB2,RDA1,RDA2,RDB1,RDB2,RDDR1,RDDR2,RDR1,RDR2,READMISSION_KI,READMISSION_KI_DT,READMISSION_KI_REASON,READMISSION_LI,READMISSION_LI_DT,READMISSION_LI_REASON,READMISSION_LU,READMISSION_LU_DT,READMISSION_LU_REASON,RECOVERY_DATE,RECOV_COUNTRY,RECOV_FACILITY_CODE,RECOV_OUT_US,REC_ON_ICE,REC_ON_PUMP,REFERRAL_DATE,REGION_x,REGION_y,REJCNF_KI,REJCNF_PA,REJTRT_KI,REJTRT_PA,REJ_ACUTE_PA,REJ_BIOPSY,REJ_CHRONIC_PA,REJ_HYPER_PA,REM_CD,REOPERATION_KI,REOPERATION_LI,REOP_BILIARY,REOP_BILIARY_DT,REOP_BLEED_KI,REOP_BLEED_KI_DT,REOP_BLEED_LI,REOP_BLEED_LI_DT,REOP_BOWEL_KI,REOP_BOWEL_KI_DT,REOP_BOWEL_LI,REOP_BOWEL_LI_DT,REOP_HERNIA_KI,REOP_HERNIA_KI_DT,REOP_HERNIA_LI,REOP_HERNIA_LI_DT,REOP_LI_FAIL,REOP_LI_FAIL_DT,REOP_OTH_KI,REOP_OTH_KI_DT,REOP_OTH_LI,REOP_OTH_LI_DT,REOP_VASC_KI,REOP_VASC_KI_DT,REOP_VASC_LI,REOP_VASC_LI_DT,RESUM_MAINT_DIAL,RESUM_MAINT_DIAL_DT,RESUSCIT_DUR,RETXDATE_KI,RETXDATE_PA,RT_KI_BIOPSY,RT_KI_GLOMERUL,R_FIN_FLOW_RATE_TX,R_FIN_RESIST_TX,SELF_DON,SERUM_CREAT,SGOT_DON,SGPT_DON,SHARE_TY,SKIN_CANCER_DON,STATUS_DDR,STATUS_LDR,STATUS_TCR,STATUS_TRR,SURG_INCIS,TATTOOS,TBILI_DON,TOBACCO_USE,TOT_SERUM_ALBUM,TRR_ID_CODE,TRTREJ1Y_KI,TRTREJ1Y_PA,TRTREJ6M_KI,TRTREJ6M_PA,TXHRT,TXINT,TXKID,TXLIV,TXLNG,TXPAN,TXVCA,TX_DATE,TX_PROCEDUR_TY_KI,TX_PROCEDUR_TY_PA,TX_TYPE,URINE_INF_CONF_DON,URINE_INF_DON,USE_WHICH_PRA,VAL_DT_DDR,VAL_DT_LDR,VAL_DT_TCR,VAL_DT_TRR,VASC_COMP_KI,VASC_COMP_KI_INTER,VASC_COMP_LI,VASC_COMP_LI_INTER,VASC_MGMT,VASODIL_DON,VDRL_DON,VEN_EXT_GRF,VIRUSES_TESTED,WARM_ISCH_TM_DON,WGT_KG,WGT_KG_CALC,WGT_KG_DON_CALC,WGT_KG_TCR,WLHL,WLHR,WLIN,WLKI,WLKP,WLLI,WLLU,WLPA,WLPI,WLVC,WL_ID_CODE,WL_ORG,WORK_INCOME,WORK_INCOME_TCR,WORK_INCOME_TRR,WT_QUAL_DATE,YR_ENTRY_US,YR_ENTRY_US_TCR,_id_x,_id_y
0,3.0,25.0,,O,2.0,A,O,,,,,{'$date': '1994-02-28T00:00:00Z'},,,,,43.0,35-49,35-49,,44.0,A,1.0,,,,,,,7.0,27.0,,,,,,,,,,,0.0,20.1,,23.4375,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,P,U,Y,,,,,,,,,,,3.0,,,,,,,,,,,,,,1.4,1.7,,,,6.7,1023,0.0,,,,,1.0,25.0,Y,Y,,247.0,247.0,,7.0,27.0,95.0,95.0,1.0,7.0,,99.0,99.0,,,2.0,6.0,,,1.0,13.0,0.0,,0.0,,95.0,,,,,,,3011.0,,,,,3011.0,,,,N,,{'$date': '1994-11-07T00:00:00Z'},0.0,,,N,,126127.0,{'$date': '1994-11-02T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,1.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,N,,,,,,,,,,,,,,,23.4,,,{'$date': '1994-11-02T00:00:00Z'},,9331,4010.0,4010.0,,,,1,1.0,1,0,U,U,,,,,,N,,,1.0,,1.0,F,F,F,,,,,,Y,,,1.0,,5219.0,,3.0,,,,ND,ND,ND,,,,,,N,N,N,Y,N,N,,,ND,ND,,,N,Y,,,,167.0,,160.0,,,,,,,,,,,,,,,ND,1.0,U,MI,MI,,,,,,,,,,42.0,35-49,23.4,,0.0,{'$date': '1994-02-28T00:00:00Z'},,,160.0,9331,0.0,4010.0,60.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 4, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1231888', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 3, 'RA2': 25, 'RB...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'CYCLOSPORIN_MAINT_PREV': 1, 'CYCLOSPORIN_AN...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",,,1023,0,4.0,4.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,N,,Unknown,KI,11/02/1994,,,,,,,,,,,,,,,,N,,0.0,,MI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,7.0,N,N,,,,N,,,,,,,,,,1.0,,,,,0.0,5219.0,111004,,,,,,,,,{'$date': '2009-02-15T00:00:00Z'},R,,,,3.0,25.0,7.0,27.0,,,,,,,1.0,13.0,,,,,,,,,,{'$date': '1994-11-02T00:00:00Z'},,1023,N,,,,10,10.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2009-02-15T00:00:00Z'},,,,,,,1.9,,,3.0,,,V,V,V,,,,,,A55017,N,,N,,,,L,,,,,{'$date': '1994-11-02T00:00:00Z'},101.0,,,,,C,,22MAY1995:00:00:00,13APR1994:00:00:00.000,17JUL1995:00:00:00.000,,,,,,,,,Y,,,56.0,,60.0,,,,,,,,,,,342925.0,KI,,,,,,,{'$oid': '63ce89206951740a82013606'},126127
1,2.0,11.0,,O,1.0,O,O,,,,,{'$date': '1994-02-28T00:00:00Z'},,,,,45.0,35-49,65+,,66.0,A,0.0,,,,,,,62.0,0.0,,,,,,,,,,,1.0,23.3,,22.9591,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,N,U,Y,,,,,,,,,,,0.0,,,,{'$date': '2008-03-14T00:00:00Z'},,,,,,,,,,2.4,2.1,,,,,24800,25.0,,,,,11.0,2.0,Y,Y,,37.0,37.0,,62.0,51.0,95.0,95.0,3.0,1.0,,99.0,99.0,,,1.0,2.0,,,13.0,3.0,99.0,,95.0,,96.0,,,,,,,3011.0,,,,,3011.0,,,{'$date': '1991-04-01T00:00:00Z'},Y,,,0.0,,,N,,233643.0,{'$date': '1994-04-06T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,4.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,23.0,,,{'$date': '1994-04-06T00:00:00Z'},,14353,4010.0,4010.0,,,,1,1.0,1,0,U,U,,,,,,N,,,998.0,,1.0,M,F,M,,,,,,Y,,,0.0,,1459.0,,3.0,,,,N,N,N,,,,,,N,N,N,Y,N,N,,,ND,ND,,,N,Y,,,,162.0,,163.0,,,,,,,,,,,,,,,ND,2.0,U,DE,DE,,,,,,,,,,45.0,35-49,23.0,,0.0,{'$date': '1994-02-28T00:00:00Z'},,,163.0,14353,0.0,4010.0,61.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'PX_STAT': 'L', 'PT_CODE': 142007, 'PX_STAT_...","[{'TRR_FOL_ID_CODE': 'A1291268', 'PX_STAT': 'L...",,"{'RT_DON_RETYP': 'N', 'RA1': 11, 'RA2': 2, 'RB...","{'ALG_IND': 0, 'ALG_MAINT': 0, 'ALG_ANTIREJ': ...","[{'SANDIMMUNE_MAINT_PREV': 0, 'SANDIMMUNE_MAIN...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",,,24800,0,1.0,1.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,N,,Unknown,KI,04/06/1994,,,,,,,,,,,,,,,,N,,25.0,,DE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,7.0,N,N,,,,Y,,,,,,,,,,13.0,,,,,1.0,5091.0,142007,,,,,,,,,{'$date': '1998-04-04T00:00:00Z'},L,,,,11.0,2.0,62.0,97.0,,,,,,,13.0,4.0,,,,,,,,,,{'$date': '1994-04-06T00:00:00Z'},,24800,N,,,,2,2.0,Y,,Y,,,Y,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.1,,,3.0,,,V,V,V,,,,,,A483757,Y,,Y,,,,L,,,,,{'$date': '1994-04-06T00:00:00Z'},101.0,,,,,C,,28FEB1995:00:00:00,28JUN1994:00:00:00.000,20JUN1994:00:00:00.000,,,,,,,,,Y,,,61.2,,61.0,,,,,,,,Y,,,149893.0,KI,,,,,,,{'$oid': '63ce89206951740a82013611'},233643
2,2.0,26.0,,O,1.0,O,O,,,,,,,,,,43.0,35-49,18-34,,19.0,A,1.0,,,,,,,7.0,27.0,,,,,,,,,,,2.0,25.8,,27.7551,,,,,,0.0,0.0,0.0,0.0,,,,,,2.0,1.0,2.0,,,,,,,U,,,U,,,,P,U,Y,,,,,,,,,,,3.0,,,,,,,,,,,,,,1.6,1.6,,,,,14446,,,,,,2.0,30.0,Y,Y,,689.0,689.0,,16.0,70.0,96.0,95.0,97.0,97.0,,99.0,99.0,,,4.0,7.0,,,4.0,8.0,96.0,,95.0,,95.0,,,,,,,3034.0,,,,,3040.0,,,{'$date': '1993-10-01T00:00:00Z'},Y,,{'$date': '1996-01-24T00:00:00Z'},0.0,,,,,288425.0,{'$date': '1996-01-18T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,,,,N,,,,,,,,,,,,,,,27.8,,,{'$date': '1996-01-18T00:00:00Z'},,20243,4010.0,4010.0,,,,4,4.0,4,1,U,U,,{'$date': '2010-04-26T00:00:00Z'},,,,N,,,998.0,,1.0,F,M,F,,,10.0,,,N,,,1.0,,5212.0,,1.0,,,,N,N,N,,,,,,N,N,N,Y,N,N,,,ND,ND,,,N,Y,,,,175.0,,175.0,,,,,,,,,,,,,,,,5.0,U,CA,CA,,,,,,,,,,41.0,35-49,27.8,,,{'$date': '1994-02-28T00:00:00Z'},,,175.0,20243,,4010.0,85.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1226948', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 26, 'RB...","{'STEROIDS_IND': 1, 'STEROIDS_MAINT': 1, 'STER...","[{'PROGRAF_MAINT_PREV': 1, 'PROGRAF_MAINT_CUR'...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...","[{'CHG_TY': 'D', 'UNOS_CAND_STAT_CD': 4010, 'C...",,14446,0,6.0,6.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,,,Unknown,KI,01/18/1996,,,,,,,,,,,,,,,,N,,,,CA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,,N,N,,,,Y,,,,,,,,,,13.0,,,,,0.0,7657.0,343794,,,,,,,,,{'$date': '2017-01-04T00:00:00Z'},R,,,,2.0,26.0,7.0,27.0,,,,,,,2.0,6.0,,,,,,,,,,{'$date': '1996-01-18T00:00:00Z'},,14446,N,,,,5,5.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2017-01-04T00:00:00Z'},,,,,,,1.7,,,3.0,,,V,V,V,,,,,,A328848,,,,,,,L,,,,,{'$date': '1996-01-18T00:00:00Z'},101.0,,,,,,,03JUN1996:00:00:00,12MAY1994:00:00:00.000,16JUL1996:00:00:00.000,,,,,,,,,Y,,,79.0,,85.0,,,,,,,,,,,317487.0,KI,,,,,,,{'$oid': '63ce89206951740a82013619'},288425
3,2.0,30.0,,B,1.0,B,B,,,,,,,,,,22.0,18-34,35-49,,47.0,A,0.0,,,,,,,13.0,46.0,,,,,,,,,,,0.0,15.4,,16.2982,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,P,U,Y,,,,,,,,,,,0.0,,,,,,,,,,,,,,1.1,1.4,,,,,7905,90.0,,,,,2.0,97.0,Y,Y,,548.0,548.0,,46.0,97.0,95.0,95.0,1.0,3.0,,99.0,99.0,,,6.0,3.0,,,8.0,9.0,99.0,,96.0,,95.0,,,,,,,3041.0,,,,,3041.0,,,{'$date': '1993-11-01T00:00:00Z'},Y,,{'$date': '1995-09-10T00:00:00Z'},0.0,,,,,3444.0,{'$date': '1995-08-31T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,9.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,16.3,,,{'$date': '1995-08-31T00:00:00Z'},,25172,4010.0,4010.0,,,,5,5.0,5,0,U,U,,{'$date': '2003-05-14T00:00:00Z'},,,,N,,,998.0,,1.0,M,F,M,,,10.0,,,N,,,1.0,,2813.0,,3.0,,,,ND,ND,ND,,,,,,N,N,N,Y,N,N,,,ND,ND,,,,Y,,,,167.0,,168.0,,,,,,,,,,,,,,,ND,1.0,U,WI,WI,,,,,,,,,,21.0,18-34,16.3,,90.0,{'$date': '1994-03-01T00:00:00Z'},,,168.0,25172,90.0,4010.0,46.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'WORK...","[{'TRR_FOL_ID_CODE': 'A1151031', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 30, 'RB...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'TRR_FOL_ID_CODE': 'A1151031', 'PX_STAT_DATE...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...","[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4010, 'C...",,7905,0,1.0,1.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,,,Unknown,KI,08/31/1995,,,,,,,,,,,,,,,,N,,90.0,,WI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,8.0,N,N,,,,Y,,,,,,,,,,14.0,,,,,0.0,7689.0,189347,,,,,,,,,{'$date': '2016-09-18T00:00:00Z'},R,,,,2.0,30.0,13.0,46.0,,,,,,,9.0,12.0,,,,,,,,,,{'$date': '1995-08-31T00:00:00Z'},,7905,N,,,,7,7.0,N,,Y,,,N,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,05/14/2003,,{'$date': '2016-09-18T00:00:00Z'},,,,,,,1.1,,,3.0,,,V,V,V,,,,,,A441197,Y,,Y,,,,L,,,,,{'$date': '1995-08-31T00:00:00Z'},101.0,,,,,,,13NOV1995:14:45:51,14APR1994:00:00:00.000,09NOV1995:00:00:00.000,,,,,,,,,Y,,,43.0,,46.0,,,,,,,,,,,757694.0,KI,,,,,,,{'$oid': '63ce89206951740a8201362d'},3444
4,2.0,3.0,,O,1.0,O,O,,,,,,,,,,53.0,50-64,50-64,,51.0,A,0.0,,,,,,,27.0,35.0,,,,,,,,,,,2.0,29.8,,29.8028,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,P,U,Y,,,,,,,,999.0,,,0.0,,,,{'$date': '2004-06-01T00:00:00Z'},,,,,,,,,,2.6,2.4,,,,,7905,3.0,,,,,2.0,3.0,Y,Y,,1738.0,1738.0,,8.0,50.0,96.0,95.0,99.0,99.0,,99.0,99.0,,,2.0,97.0,,,7.0,17.0,96.0,,95.0,,95.0,,,,,,,3039.0,,,,,3012.0,,,{'$date': '1998-06-01T00:00:00Z'},Y,,{'$date': '1998-12-08T00:00:00Z'},0.0,,,N,,114036.0,{'$date': '1998-12-03T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,4.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,29.8,,,{'$date': '1998-12-03T00:00:00Z'},,25172,4999.0,4999.0,,,,1,1.0,1,0,U,U,,{'$date': '2004-04-02T00:00:00Z'},,,,N,,,1.0,998.0,2.0,F,M,F,0.0,,10.0,,,N,,,1.0,,1947.0,,1.0,,,,ND,ND,ND,,,,,,N,N,N,Y,N,N,,,ND,ND,,,N,Y,,,,185.0,,185.0,,,,,,,,,,,,,,,,3.0,U,WI,WI,,,,,,,,,,49.0,35-49,29.8,,3.0,{'$date': '1994-03-01T00:00:00Z'},,,185.0,25172,3.0,4010.0,102.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1032909', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 3, 'RB1...","{'NEORAL_IND': 1, 'NEORAL_DAYS': 5, 'NEORAL_MA...","[{'NEORAL_MAINT_PREV': 1, 'NEORAL_MAINT_CUR': ...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...","[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4999, 'C...",,7905,0,7.0,7.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,N,,Unknown,KI,12/03/1998,,,,,,,,,,,,,,,,N,,3.0,,WI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,,N,N,,,,Y,,,,,,,,,,1.0,,,,,1.0,2007.0,193993,,,,,,,,,{'$date': '2004-06-01T00:00:00Z'},D,,,,2.0,3.0,27.0,35.0,,,,,,,4.0,7.0,,,,,,,,,,{'$date': '1998-12-03T00:00:00Z'},,7905,N,,,,7,7.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,04/02/2004,,,,,,,,,1.6,,,3.0,,,V,V,V,,,,,,A432439,N,,,,,,L,,,,,{'$date': '1998-12-03T00:00:00Z'},101.0,,,,,P,,03MAR1999:00:00:00,14APR1994:00:00:00.000,21JAN1999:00:00:00.000,,,,,,,,,Y,,,102.0,,102.0,,,,,,,,,,,546473.0,KI,,,,,,,{'$oid': '63ce89206951740a82013630'},114036
5,25.0,11.0,,A,1.0,A,A,,,,,{'$date': '1994-03-02T00:00:00Z'},,,,,29.0,18-34,18-34,,24.0,A,0.0,,,,,,,35.0,18.0,,,,,,,,,,,1.0,30.9,,30.8329,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,N,U,Y,,,,,,,,,,,1.0,,,,{'$date': '2012-04-25T00:00:00Z'},,,,,,,,,,1.1,1.2,,,,,24800,2.0,,,,,25.0,11.0,Y,Y,,224.0,224.0,,35.0,39.0,96.0,95.0,4.0,97.0,,99.0,99.0,,,1.0,7.0,,,1.0,11.0,96.0,,95.0,,96.0,,,,,,,3034.0,,,,,3034.0,,,{'$date': '1993-06-01T00:00:00Z'},Y,,{'$date': '1994-11-03T00:00:00Z'},0.0,,,N,,145365.0,{'$date': '1994-10-12T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,1.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,30.8,,,{'$date': '1994-10-12T00:00:00Z'},,14353,4010.0,4010.0,,,,1,1.0,1,0,U,U,,{'$date': '2003-04-02T00:00:00Z'},,,,N,,,998.0,,1.0,F,F,F,,,10.0,,,N,,,1.0,,3094.0,,3.0,,,,N,N,N,,,,,,N,N,N,Y,N,N,,,N,N,,,N,Y,,,,157.0,,157.0,,,,,,,,,,,,,,,ND,2.0,U,MD,MD,,,,,,,,,,28.0,18-34,30.8,,0.0,{'$date': '1994-03-02T00:00:00Z'},,,157.0,14353,0.0,4010.0,76.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 4, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1302337', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 25, 'RA2': 11, 'R...","{'CYCLOSPORIN_IND': 1, 'CYCLOSPORIN_MAINT': 1,...","[{'SANDIMMUNE_MAINT_PREV': 1, 'SANDIMMUNE_MAIN...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",,,24800,0,4.0,4.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,N,,Unknown,KI,10/12/1994,,,,,,,,,,,,,,,,N,,6.0,,MD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,8.0,N,N,,,,Y,,,,,,,,,,13.0,,,,,1.0,6405.0,521828,,,,,,,,,{'$date': '2003-11-21T00:00:00Z'},L,,,,25.0,11.0,35.0,18.0,,,,,,,1.0,10.0,,,,,,,,,,{'$date': '1994-10-12T00:00:00Z'},,24800,N,,,,2,2.0,Y,,Y,,,Y,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,04/02/2003,,,,,,,,,1.7,,,3.0,,,V,V,V,,,,,,A440915,Y,,Y,,,,L,,,,,{'$date': '1994-10-12T00:00:00Z'},101.0,,,,,C,,05APR1995:00:00:00,10MAY1994:00:00:00.000,11JAN1995:00:00:00.000,,,,,,,,,Y,,,76.1,,76.0,,,,,,,,,,,453221.0,KI,,,,,,,{'$oid': '63ce89206951740a82013663'},145365
6,11.0,24.0,,O,1.0,O,O,,,,,{'$date': '1994-03-02T00:00:00Z'},,,,,19.0,18-34,35-49,,45.0,A,0.0,,,,,,,38.0,62.0,,,,,,,,,,,0.0,22.2,,22.2222,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,N,U,Y,,,,,,,,,,,1.0,,,,,,,,,,,,,,1.2,1.7,,,,,4464,3.0,,,,,11.0,24.0,Y,Y,,63.0,63.0,,38.0,62.0,95.0,95.0,7.0,8.0,,99.0,99.0,,,6.0,7.0,,,11.0,15.0,99.0,,95.0,,0.0,,,,,,,,,,,,3001.0,,,{'$date': '1993-12-01T00:00:00Z'},Y,,{'$date': '1994-05-16T00:00:00Z'},0.0,,,N,,314451.0,{'$date': '1994-05-04T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,12.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,22.2,,,{'$date': '1994-05-04T00:00:00Z'},,14911,4010.0,4010.0,,,,5,7.0,7,0,U,U,,{'$date': '1995-12-17T00:00:00Z'},,,,N,,,2.0,,2.0,F,F,F,,,,,,N,,,1.0,,592.0,,3.0,,,,N,N,N,,,,,,N,N,N,Y,N,N,,,U,U,,,N,Y,,,,147.0,,150.0,,,,,,,,,,,,,,,ND,1.0,U,NC,NC,,,,,,,,,,18.0,18-34,22.2,,3.0,{'$date': '1994-03-02T00:00:00Z'},,,150.0,14911,3.0,4010.0,50.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 4, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1740283', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 11, 'RA2': 24, 'R...","{'CYCLOSPORIN_IND': 1, 'CYCLOSPORIN_MAINT': 1,...","[{'STEROIDS_MAINT_PREV': 1, 'STEROIDS_MAINT_CU...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",,,4464,0,1.0,1.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,N,,Unknown,KI,05/04/1994,,,,,,,,,,,,,,,,N,,3.0,,NC,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,8.0,N,N,,,,U,,,,,,,,,,13.0,,,,,0.0,1937.0,300779,,,,,,,,,{'$date': '1999-08-23T00:00:00Z'},A,,,,11.0,24.0,38.0,62.0,,,,,,,12.0,15.0,,,,,,,,,,{'$date': '1994-05-04T00:00:00Z'},,4464,N,,,,11,11.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,12/17/1995,,,,,,,,,1.5,,,3.0,,,V,V,V,,,,,,A447503,N,,,,,,L,,,,,{'$date': '1994-05-04T00:00:00Z'},101.0,,,,,C,,20JAN1995:00:00:00,10MAY1994:00:00:00.000,02DEC1994:00:00:00.000,,,,,,,,,Y,,,48.0,,50.0,,,,,,,,,,,683043.0,KI,,,,,,,{'$oid': '63ce89206951740a82013665'},314451
7,11.0,24.0,,A,1.0,A,A,,,,,,,,,,20.0,18-34,35-49,,41.0,A,1.0,,,,,,,51.0,62.0,,,,,,,,,,,1.0,19.9,,19.0451,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,P,U,Y,,,,,,,,,,,1.0,,,,,,,,,,,,,,1.5,1.5,,,,,19964,0.0,,,,,3.0,11.0,Y,Y,,448.0,448.0,,51.0,45.0,95.0,95.0,4.0,7.0,,99.0,99.0,,,7.0,2.0,,,4.0,7.0,96.0,,96.0,,95.0,,,,,,,3034.0,,,,,3041.0,,,{'$date': '1993-07-01T00:00:00Z'},Y,,{'$date': '1995-05-30T00:00:00Z'},0.0,,,,,315062.0,{'$date': '1995-05-24T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,19.0,,,{'$date': '1995-05-24T00:00:00Z'},,1426,4010.0,4010.0,,,,1,1.0,1,0,U,U,,,,,,N,,,1.0,1.0,1.0,F,M,F,,,,,,Y,,,1.0,,6681.0,,3.0,,,,N,N,N,,,,,,N,N,N,Y,N,N,,,N,N,,,N,Y,,,,172.0,,173.0,,,,,,,,,,,,,,,N,3.0,U,GA,GA,,,,,,,,,,18.0,18-34,19.0,,0.0,{'$date': '1994-03-02T00:00:00Z'},,,173.0,1426,0.0,4010.0,57.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1013243', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 11, 'RA2': 24, 'R...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'NEORAL_MAINT_PREV': 1, 'NEORAL_MAINT_CUR': ...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...","[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4999, 'C...",,19964,0,1.0,1.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,,,Unknown,KI,05/24/1995,,,,,,,,,,,,,,,,N,,1.0,,GA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,,N,N,,,,Y,,,,,,,,,,13.0,,,,,0.0,6681.0,386552,,,,,,,,,{'$date': '2013-09-07T00:00:00Z'},R,,,,11.0,24.0,51.0,62.0,,,,,,,4.0,97.0,,,,,,,,,,{'$date': '1995-05-24T00:00:00Z'},,19964,N,,,,3,3.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2013-09-07T00:00:00Z'},,,,,,,1.2,,,3.0,,,V,V,V,,,,,,A234998,N,,N,,,,L,,,,,{'$date': '1995-05-24T00:00:00Z'},101.0,,,,,,,20SEP1995:09:30:36,20APR1994:00:00:00.000,17JUL1995:00:00:00.000,,,,,,,,,Y,,,59.0,,57.0,,,,,,,,,,,170182.0,KI,,,,,,,{'$oid': '63ce89206951740a8201366a'},315062
8,34.0,30.0,,O,2.0,B,O,,,,,{'$date': '1994-03-02T00:00:00Z'},,,,,28.0,18-34,35-49,,37.0,A,0.0,,,,,,,13.0,42.0,,,,,,,,,,,0.0,21.2,,21.1938,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,N,U,Y,,,,,,,,,,,1.0,,,,,,,,,,,,,,1.4,1.5,,,,,22103,47.0,,,,,34.0,30.0,Y,Y,,181.0,181.0,,13.0,42.0,95.0,95.0,7.0,97.0,,99.0,99.0,,,1.0,4.0,,,2.0,8.0,96.0,,96.0,,96.0,,,,,,,3006.0,1.0,,,,3006.0,,,{'$date': '1993-12-01T00:00:00Z'},Y,,{'$date': '1994-09-13T00:00:00Z'},0.0,,,N,,173328.0,{'$date': '1994-08-30T00:00:00.000Z'},RKI,,Y,L,,0.0,0.0,2.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,N,,,N,,,,,,,,,,,,,998.0,,21.2,,,{'$date': '1994-08-30T00:00:00Z'},,11377,4010.0,4010.0,,,,4,1.0,1,1,U,U,,,,,,N,,,2.0,,1.0,M,F,M,,,,,,Y,,,1.0,,2395.0,,5.0,,,,N,N,N,,,,,,N,N,N,Y,N,N,,,N,N,,,N,Y,,,,165.0,,165.0,,,,,,,,,,,,,,,ND,0.0,U,TX,TX,,,,,,,,,,28.0,18-34,21.2,,48.0,{'$date': '1994-03-02T00:00:00Z'},,,165.0,11377,43.0,4010.0,57.7,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 4, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A109712', 'PX_STAT': 'A'...",,"{'RT_DON_RETYP': 'Y', 'RA1': 34, 'RA2': 30, 'R...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'NEORAL_MAINT_PREV': 0, 'NEORAL_MAINT_CUR': ...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",,,22103,0,4.0,4.0,,,,,N,0,,,,,,,,U,U,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,N,,Unknown,KI,08/30/1994,,,,,,,,,,,,,,,,N,,47.0,N,TX,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,7.0,N,N,,,,N,,,,,,,,14.0,,14.0,,,,,0.0,2395.0,64784,,,,,,,,,{'$date': '2001-03-21T00:00:00Z'},R,,,,34.0,30.0,13.0,42.0,34.0,30.0,13.0,42.0,2.0,8.0,2.0,8.0,,,,,,,,,,{'$date': '1994-08-30T00:00:00Z'},,22103,N,,,,4,4.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2001-03-21T00:00:00Z'},,,,,,,1.6,,,3.0,,,V,V,V,,,,,,A274607,N,,N,,,,R,,,,,{'$date': '1994-08-30T00:00:00Z'},102.0,,,,,C,,11OCT1995:08:20:09,01NOV1995:00:00:00.000,09NOV1995:00:00:00.000,,,,,,,,,Y,,,57.7,,57.7,,,,,,,,,,,736166.0,KI,,,,,,,{'$oid': '63ce89206951740a82013674'},173328
9,2.0,31.0,,O,1.0,O,O,,,,,,,,,,49.0,35-49,18-34,,25.0,A,1.0,,,,,,,7.0,18.0,,,,,,,,,,,1.0,24.8,,23.7228,,,,,,0.0,0.0,0.0,0.0,,,,,,1.0,1.0,1.0,,,,,,,U,,,U,,,,N,U,Y,,,,,,,,,,,1.0,,,,{'$date': '2005-09-06T00:00:00Z'},,,,,,,,,,1.6,1.3,,,,,11253,0.0,,,,,1.0,31.0,Y,Y,,497.0,497.0,,8.0,7.0,96.0,95.0,97.0,7.0,,99.0,99.0,,,1.0,3.0,,,3.0,4.0,96.0,,95.0,,95.0,,,,,,,3039.0,,,,,3039.0,,,{'$date': '1990-10-01T00:00:00Z'},Y,,{'$date': '1995-07-18T00:00:00Z'},0.0,,,,,261474.0,{'$date': '1995-07-12T00:00:00.000Z'},LKI,,N,L,,0.0,0.0,4.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,,,,N,,,,,,,,,,,,,,,23.7,,,{'$date': '1995-07-12T00:00:00Z'},,20429,4999.0,4999.0,,,,4,4.0,4,1,U,U,,,,,,N,,,2.0,,1.0,M,M,M,,,,,,Y,,,0.0,,2193.0,,3.0,,,,ND,ND,ND,,,,,,N,N,N,Y,N,N,,,ND,ND,,,N,Y,,,,172.0,,173.0,,,,,,,,,,,,,,,ND,3.0,U,FL,FL,,,,,,,,,,47.0,35-49,23.7,,3.0,{'$date': '1994-03-02T00:00:00Z'},,,173.0,20429,4.0,4010.0,71.0,,,,,,,,,,,,,,,,,,,1,,,,"[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1108267', 'PX_STAT': 'A...",,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 31, 'RB...","{'CYCLOSPORIN_IND': 1, 'CYCLOSPORIN_MAINT': 1,...","[{'SANDIMMUNE_MAINT_PREV': 1, 'SANDIMMUNE_MAIN...",,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...","[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4999, 'C...",,11253,0,2.0,2.0,,,,,N,0,,,,,,,,U,,,U,,,,,,,,,,3.0,,,,,,0.0,0.0,0.0,,,Unknown,KI,07/12/1995,,,,,,,,,,,,,,,,N,,4.0,,FL,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,N,,,,N,N,,,,Y,,,,,,,,,,13.0,,,,,1.0,3709.0,49256,,,,,,,,,{'$date': '2001-07-13T00:00:00Z'},L,,,,2.0,31.0,18.0,7.0,,,,,,,12.0,4.0,,,,,,,,,,{'$date': '1995-07-12T00:00:00Z'},,11253,N,,,,3,3.0,N,,N,,,,,,15.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.4,,,3.0,,,V,V,V,,,,,,A438394,,,N,,,,L,,,,,{'$date': '1995-07-12T00:00:00Z'},101.0,,,,,,,19SEP1995:13:08:03,13APR1994:00:00:00.000,13DEC1995:00:00:00.000,,,,,,,,,Y,,,73.4,,71.0,,,,,,,,,,,61775.0,KI,,,,,,,{'$oid': '63ce89206951740a82013677'},261474


#### Manually handling varying duplicates

In [8]:
cols_with_identical_DON_y= ['CITIZENSHIP', 'CMV_IGG', 'CMV_IGM', 'EDUCATION', 'GENDER', 'ABO', 'HBV_CORE',
                           'HBV_SUR_ANTIGEN', ]
for col in cols_with_identical_DON_y:
    is_equal = kidpan_living[col + '_DON'].equals(kidpan_living[col+'_y'])
    print(col, is_equal )
    if (is_equal):
        kidpan_living.drop(col+'_y', axis=1, inplace=True)
        kidpan_living.rename(columns={col+'_x': col+'_REC'}, inplace=True)
        different_duplicates.remove(col)


CITIZENSHIP True
CMV_IGG True
CMV_IGM True
EDUCATION True
GENDER True
ABO True
HBV_CORE True
HBV_SUR_ANTIGEN True


In [9]:
kidpan_living['ETHCAT_DON_x'] = kidpan_living['ETHCAT_DON_x'].astype(int)
is_equal = kidpan_living['ETHCAT_DON_x'].equals(kidpan_living['ETHCAT_DON_y'])
print('ETHCAT_DON', is_equal)
if(is_equal):
    kidpan_living.drop('ETHCAT_DON_y', axis=1, inplace=True)
    kidpan_living.rename(columns={'ETHCAT_DON_x': 'ETHCAT_DON'}, inplace=True)
    different_duplicates.remove('ETHCAT_DON')
    

ETHCAT_DON True


In [10]:
kidpan_living.drop('LIV_DON_TY_y', axis=1, inplace=True)
kidpan_living.rename(columns={'LIV_DON_TY_x': 'LIV_DON_TY'}, inplace=True)
different_duplicates.remove('LIV_DON_TY')

In [11]:
columns_to_just_rename = ["AGE_BIN", 'REGION']
for col in columns_to_just_rename:
    kidpan_living.rename(columns={col+'_x': col+'_REC',
                                  col+'_y': col+'_DON'}, inplace=True)
    different_duplicates.remove(col)

In [12]:
print(different_duplicates)
for col in different_duplicates:
    kidpan_living.drop(col+'_x', axis=1, inplace=True)
    kidpan_living.drop(col+'_y', axis=1, inplace=True)
kidpan_living

['_id', 'PX_STAT', 'CITIZEN_COUNTRY']


Unnamed: 0,WL_ORG,NUM_PREV_TX,A1,A2,B1,B2,DR1,DR2,GENDER_REC,ABO_REC,WGT_KG_TCR,HGT_CM_TCR,BMI_TCR,CITIZENSHIP_REC,PERM_STATE,FUNC_STAT_TCR,DGN_TCR,INIT_WGT_KG,INIT_HGT_CM,REM_CD,DAYSWAIT_CHRON,END_STAT,INIT_AGE,END_DATE,INIT_DATE,ETHNICITY,ETHCAT,PT_CODE,INIT_BMI_CALC,END_BMI_CALC,COMPOSITE_DEATH_DATE,WLKI,REGION_REC,BW4,BW6,C1,C2,DR51,DR51_2,DR52,DR52_2,DR53,DR53_2,DQ1,DQ2,WL_ID_CODE,DATA_TRANSPLANT,DATA_WAITLIST,CTR_CODE,OPO_CTR_CODE,INIT_OPO_CTR_CODE,END_OPO_CTR_CODE,LISTING_CTR_CODE,Kidney_Pancreas_WL_History,INIT_AGE_BIN,CURRENT_PRA,PEAK_PRA,USE_WHICH_PRA,DONATION,ON_DIALYSIS,INIT_CURRENT_PRA,INIT_PEAK_PRA,INIT_STAT,ACTIVATE_DATE,EXH_PERIT_ACCESS,EXH_VASC_ACCESS,PREV_TX,PREV_KI_TX,FUNC_STAT_TRR,MALIG_TRR,PRI_PAYMENT_TRR_KI,TX_DATE,FIRST_WK_DIAL,SERUM_CREAT,PRE_TX_TXFUS,TXKID,DON_RETYP,RESUM_MAINT_DIAL_DT,DA1,DA2,DB1,DB2,DDR1,DDR2,RA1,RA2,RB1,RB2,RDR1,RDR2,AMIS,BMIS,DRMIS,HLAMIS,NPKID,NPPAN,DDAVP_DON,CMV_DON,HBV_CORE_DON,HBV_SUR_ANTIGEN_DON,ETHCAT_DON,COD_CAD_DON,DEATH_CIRCUM_DON,DEATH_MECH_DON,CITIZENSHIP_DON,HEP_C_ANTI_DON,ABO_DON,DON_TY,GENDER_DON,HOME_STATE_DON,NON_HRT_DON,ANTIHYPE_DON,BLOOD_INF_DON,BUN_DON,CREAT_DON,DOBUT_DON_OLD,DOPAMINE_DON_OLD,HTLV1_OLD_DON,HTLV2_OLD_DON,PRETREAT_MED_DON_OLD,PT_DIURETICS_DON,PT_STEROIDS_DON,PT_T3_DON,PT_T4_DON,PULM_INF_DON,SGOT_DON,SGPT_DON,TBILI_DON,URINE_INF_DON,VASODIL_DON,VDRL_DON,CLIN_INFECT_DON,CONTIN_ALCOHOL_OLD_DON,CONTIN_CIG_DON,CONTIN_IV_DRUG_OLD_DON,CONTIN_OTH_DRUG_DON,EXTRACRANIAL_CANCER_DON,HIST_ALCOHOL_OLD_DON,CANCER_SITE_DON,HIST_CIG_DON,HIST_COCAINE_DON,HIST_HYPERTENS_DON,HIST_IV_DRUG_OLD_DON,INTRACRANIAL_CANCER_DON,HIST_CANCER_DON,HIST_DIABETES_DON,HIST_OTH_DRUG_DON,SKIN_CANCER_DON,DIABETES_DON,HEPARIN_DON,HGT_CM_DON_CALC,WGT_KG_DON_CALC,BMI_DON_CALC,KDPI,KDRI_MED,KDRI_RAO,END_STAT_KI,CREAT1Y,DIAL_DATE,FAILDATE_KI,ABO_MAT,AGE,DISTANCE,RESUM_MAINT_DIAL,DIAL_TRR,DIAG_KI,COLD_ISCH_KI,GRF_STAT_KI,GRF_FAIL_CAUSE_TY_KI,DWFG_KI,GTIME_KI,GSTATUS_KI,DAYSWAIT_CHRON_KI,TX_PROCEDUR_TY_KI,TRTREJ6M_KI,ORGAN,CMV_IGG_REC,CMV_IGM_REC,HBV_CORE_REC,HBV_SUR_ANTIGEN_REC,HCV_SEROSTATUS,HIV_SEROSTATUS,CMV_STATUS,PREV_TX_ANY,MED_COND_TRR,PX_STAT_DATE,SHARE_TY,PSTATUS,PTIME,PAYBACK,ECD_DONOR,AGE_GROUP,MALIG,HGT_CM_CALC,WGT_KG_CALC,BMI_CALC,STATUS_TCR,STATUS_TRR,STATUS_DDR,VAL_DT_DDR,VAL_DT_TCR,VAL_DT_TRR,LT_ONE_WEEK_DON,REJCNF_KI,REJTRT_KI,TRR_ID_CODE,DISCHARGE_DATE,RECOV_OUT_US,PROTEIN_URINE,INOTROP_AGENTS,CARDARREST_NEURO,INOTROP_SUPPORT_DON,TATTOOS,LT_KI_BIOPSY,RT_KI_BIOPSY,REFERRAL_DATE,RECOVERY_DATE,DONOR_ID,Kidney_Pancreas_HLA,Kidney_Pancreas_Immuno_Discharge,Kidney_Pancreas_Immuno_Followup,Kidney_Followup,Kidney_Malig_Followup,Kidney_Pancreas_PRA,AGE_BIN_REC,PREV_PREG,CREAT6M,PRVTXDIF_KI,COD_KI,PREV_TX_ANY_N,PREV_KI_DATE,EDUCATION_REC,DGN2_TCR,DIAB,DRUGTRT_COPD,PERIP_VASC,AGE_DIAB,MALIG_TCR_KI,PRI_PAYMENT_TCR_KI,MALIG_TCR_PA,PRI_PAYMENT_TCR_PA,PREV_PA_TX,TXPAN,TRTREJ1Y_KI,PRI_PAYMENT_TRR_PA,ART_RECON,DUCT_MGMT,GRF_PLACEM,PA_PRESERV_TM,VASC_MGMT,VEN_EXT_GRF,END_STAT_PA,DIAG_PA,GRF_STAT_PA,GTIME_PA,GSTATUS_PA,COD_PA,DAYSWAIT_CHRON_PA,TX_PROCEDUR_TY_PA,TX_TYPE,REJ_BIOPSY,SURG_INCIS,OPER_TECH,Kidney_Pancreas_Followup,Pancreas_Followup,Kidney_Pancreas_Malig_Followup,Pancreas_Malig_Followup,WLKP,CREAT_TRR,HAPLO_TY_MATCH_DON,CMV_OLD_LIV_DON,CMV_TEST_DON,HBV_TEST_DON,HCV_TEST_DON,HCV_RIBA_DON,HCV_ANTIBODY_DON,LIV_DON_TY,RETXDATE_KI,HMO_PPO_DON,FUNC_STAT_TRF,GFR,RDA1,RDA2,RDB1,RDB2,RDDR1,RDDR2,OTHER_INF_DON,PULM_INF_CONF_DON,HYPERTENS_DUR_DON,DIET_DON,DIURETICS_DON,OTHER_HYPERTENS_MED_DON,WLPA,PUMP_KI,CANCER_FREE_INT_DON,DIABDUR_DON,INSULIN_DEP_DON,HIST_INSULIN_DEP_DON,INSULIN_DUR_DON,TRTREJ1Y_PA,TRTREJ6M_PA,COMPL_ABSC,COMPL_ANASLK,COMPL_PANCREA,ADMISSION_DATE,BLOOD_INF_CONF_DON,URINE_INF_CONF_DON,WARM_ISCH_TM_DON,CORE_COOL_DON,INSULIN_PA,INSULIN_RESUMED_DATE_PA,METHOD_BLOOD_SUGAR_CONTROL_PA,FAILDATE_PA,GRF_FAIL_CAUSE_TY_PA,GRF_VASC_THROMB_PA,INFECT_PA,BLEED_PA,ANAST_LK_PA,REJ_ACUTE_PA,BIOP_ISLET_PA,PANCREATIT_PA,REJ_CHRONIC_PA,PK_DA1,PK_DA2,PK_DB1,PK_DB2,PK_DDR1,PK_DDR2,COD2_KI,REJTRT_PA,ENTERIC_DRAIN,ENTERIC_DRAIN_DT,PERM_STATE_TRR,WORK_INCOME_TRR,ACUTE_REJ_EPI_KI,EBV_SEROSTATUS,TOT_SERUM_ALBUM,RETXDATE_PA,COD3_KI,EBV_TEST_DON,HCV_RNA_DON,EDUCATION_DON,MEDICARE_DON,MEDICAID_DON,OTH_GOVT_DON,PRIV_INS_DON,SELF_DON,DONATION_DON,FREE_DON,HBV_DNA_DON,PX_NON_COMPL_PA,CMV_NUCLEIC_DON,CMV_IGG_DON,CMV_IGM_DON,PRI_PAYMENT_DON,CONTROLLED_DON,WORK_INCOME_TCR,ORG_REC_ON,REC_ON_ICE,WLLI,TXLIV,MULTIORG,DEATH_DATE,OTHER_INF_CONF_DON,INO_PROCURE_AGENT_1,LIPASE,AMYLASE,PREV_MALIG_TY,MALIG_TY,WLHR,ACADEMIC_PRG_TCR,ACADEMIC_LEVEL_TCR,ARGININE_DON,INSULIN_DON,CDC_RISK_HIV_DON,COD_WL,WLIN,LT_KI_GLOMERUL,RT_KI_GLOMERUL,TXHRT,CONTIN_COCAINE_DON,EBV_DNA_DON,EBV_IGG_DON,EBV_IGM_DON,RECOV_COUNTRY,WLPI,RESUSCIT_DUR,INO_PROCURE_AGENT_2,PRI_PAYMENT_CTRY_TRR_KI,PRI_PAYMENT_CTRY_TCR_KI,HBSAB_DON,EBV_IGG_CAD_DON,EBV_IGM_CAD_DON,MALIG_TY_TRR,TXINT,WLLU,ACADEMIC_LEVEL_TRR,ACADEMIC_PRG_TRR,REJ_HYPER_PA,PRVTXDIF_PA,REJCNF_PA,COD2_PA,BLOOD_SUGAR_DIET_PA,BLOOD_SUGAR_MEDICATION_PA,BLOOD_SUGAR_MED_RESUMED_DATE_PA,PRE_AVG_INSULIN_USED_OLD_TRR,COD3_PA,ACUTE_REJ_EPI_PA,PRI_PAYMENT_CTRY_TCR_PA,C_PEPTIDE_PA_TRR,HBA1C_PA_TRR,HBV_SURF_TOTAL,HIV_NAT,HCV_NAT,HBV_NAT,PRI_PAYMENT_CTRY_TRR_PA,REC_ON_PUMP,YR_ENTRY_US_TCR,INSULIN_DURATION_PA,LOS,INSULIN_DOSAGE_OLD_PA,CITIZEN_COUNTRY_DON,TXLNG,INO_PROCURE_AGENT_3,WLHL,END_CPRA,HBV_NAT_DON,HCV_NAT_DON,HIV_NAT_DON,PRI_PAYMENT_CTRY_DON,WT_QUAL_DATE,DAYSWAIT_ALLOC,MAX_KDPI_LOCAL_ZERO_ABDR,MAX_KDPI_LOCAL_NON_ZERO_ABDR,MAX_KDPI_IMPORT_ZERO_ABDR,MAX_KDPI_IMPORT_NON_ZERO_ABDR,ANTIBODY_TESTED,DIALYSIS_DATE,INACT_REASON_CD,CREAT_CLEAR,END_EPTS,END_CPRA_DETAIL,ADMIT_DATE_DON,FIN_RESIST_TX,CREAT_CLEAR_DATE,GFR_DATE,A2A2B_ELIGIBILITY,L_FIN_FLOW_RATE_TX,L_FIN_RESIST_TX,INIT_CPRA,R_FIN_FLOW_RATE_TX,R_FIN_RESIST_TX,C_PEPTIDE_PA_TCR,INSULIN_DOSAGE_PA,C_PEPTIDE,C_PEPTIDEDATE,TXVCA,INIT_EPTS,HBA1C_PA_TCR,WLVC,PRE_AVG_INSULIN_USED_TRR,AGE_BIN_DON,AGE_DON,DBW6,DDQ1,DON_DATE,DON_ORG,HCV_ANTIBODY,HCV_RIBA,HOME_STATE,KIDNEY_RECOV,LIVER_RECOV,LUNG_RECOV,ORG_RECOVERY_DT,RECOV_FACILITY_CODE,REGION_DON,STATUS_LDR,VAL_DT_LDR,VIRUSES_TESTED,DBW4,DC1,DDR52,REOP_BLEED_KI,REOP_BLEED_LI,REOP_BOWEL_KI,REOP_BOWEL_LI,REOP_HERNIA_KI,REOP_HERNIA_LI,REOP_LI_FAIL,REOP_OTH_KI,REOP_OTH_LI,REOP_VASC_KI,REOP_VASC_LI,DC2,DDQ2,DDR53,DDP1,DDP2,REOP_BILIARY,DDR51,CMV_NUCLEIC,DIABETES,FUNC_STAT,HBV_DNA,HCV_RNA,HEALTH_INS,HIST_CANCER,HIST_CIG,HIST_HYPER,HYPERTENSION,INIT_DISCHARGE_DT,KI_PROC_TY,MARITAL_STAT,NON_AUTO_BLOOD,OTH_COMP_KI,OTH_INTER_PROC_KI,PHYSICAL_CAPACITY,POSTOP_TEST_DT,POSTOP_URINE_PROTEIN,PREDON_HGT,PREDON_WGT,PREOP_URINE_PROTEIN,READMISSION_KI,REOPERATION_KI,TOBACCO_USE,VASC_COMP_KI,WORK_INCOME,BP_PREOP_DIAST,BP_PREOP_SYST,EBV_IGG,EBV_IGM,KI_CREAT_PREOP,WGT_KG,Living_Donor_Follow,BP_POSTOP_DIAST,BP_POSTOP_SYST,KI_CREAT_POSTOP,DUR_ABSTINENCE,PACK_YRS,CONVERT_OPEN_KI,DEATH_DT,CMV_TOTAL,EBV_TOTAL,PREOP_URINE_RATIO,FFP_UNITS,OTH_COMP_KI_INTER,PLATELETS_UNITS,PRBC_UNITS,BILIARY_COMP,BIOPSY_LI,LI_PROC_TY,OTH_COMP_LI,OTH_INTER_PROC_LI,POSTOP_ALBUM,POSTOP_ALK_PHOS,POSTOP_BILI,POSTOP_CREAT_LI,POSTOP_INR,POSTOP_SGOT_AST,POSTOP_SGPT_ALT,PREOP_ALBUM,PREOP_ALK_PHOS,PREOP_BILI,PREOP_CREAT_LI,PREOP_INR,PREOP_SGOT_AST,PREOP_SGPT_ALT,READMISSION_LI,REOPERATION_LI,VASC_COMP_LI,HYPER_DIET,HYPER_DIUR,HYPER_MEDS,CANCER_FREE,OTH_INTER_PROC_KI_DT,READMISSION_KI_DT,READMISSION_KI_REASON,REOP_OTH_KI_DT,YR_ENTRY_US,MACRO_FAT,MICRO_FAT,OTH_COMP_LI_INTER,OTH_INTER_PROC_LI_DT,REOP_BOWEL_KI_DT,REOP_HERNIA_KI_DT,POSTOP_URINE_RATIO,CONVERT_OPEN_LU,INTRAOP_COMP,LU_COMP,LU_COMP_REASON,LU_PROC_TY,PREOP_FEF_BEFORE,PREOP_FEV1_BEFORE,PREOP_FVC_BEFORE,PREOP_PAO2,PREOP_TLC_BEFORE,READMISSION_LU,REOP_BLEED_KI_DT,VASC_COMP_LI_INTER,BILIARY_COMP_GRADE,READMISSION_LI_DT,READMISSION_LI_REASON,VASC_COMP_KI_INTER,DDPA1,DDPA2,DDQA1,DDQA2,DON_ORG2,REOP_BOWEL_LI_DT,REOP_BLEED_LI_DT,REOP_OTH_LI_DT,REOP_VASC_LI_DT,DDR51_2,DDR52_2,DDR53_2,REOP_BILIARY_DT,COD,REOP_VASC_KI_DT,PREOP_LUNG_CAP,PREOP_FEF_AFTER,PREOP_FEV1_AFTER,PREOP_FVC_AFTER,PREOP_TLC_AFTER,READMISSION_LU_DT,READMISSION_LU_REASON,REOP_LI_FAIL_DT,REOP_HERNIA_LI_DT
0,KI,0.0,3.0,25.0,7.0,27.0,1.0,13.0,F,A,60.0000,160.00,23.4375,1.0,MI,1.0,3011.0,60.0000,160.00,15.0,247.0,4010.0,42.0,{'$date': '1994-11-02T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},0,1,111004,23.4,23.4,,,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,342925.0,Y,Y,1023,Unknown,9331,9331,1023,,35-49,0.0,0.0,C,N,N,0.0,0.0,4010.0,{'$date': '1994-02-28T00:00:00Z'},U,U,N,N,1.0,U,1.0,{'$date': '1994-11-02T00:00:00Z'},N,1.9,N,L,N,,1.0,25.0,7.0,27.0,1.0,13.0,3.0,25.0,7.0,27.0,1.0,13.0,1.0,0.0,0.0,1.0,0.0,0.0,,,ND,N,1,,,,1.0,,O,L,F,MI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,1.4,,,2.0,43.0,0.0,,N,3011.0,3.0,Y,,N,5219.0,1.0,247.0,101.0,N,KI,U,U,ND,N,N,ND,U,N,3.0,{'$date': '2009-02-15T00:00:00Z'},3.0,0.0,5219.0,N,,A,U,167.0,56.0,20.1,V,V,,,13APR1994:00:00:00.000,17JUL1995:00:00:00.000,N,N,N,A55017,{'$date': '1994-11-07T00:00:00Z'},N,,,,,,,,,{'$date': '1994-11-02T00:00:00Z'},126127.0,"{'RT_DON_RETYP': 'N', 'RA1': 3, 'RA2': 25, 'RB...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'CYCLOSPORIN_MAINT_PREV': 1, 'CYCLOSPORIN_AN...","[{'CARE_PROVIDED_BY': 4, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1231888', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",35-49,7.0,1.7,,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,,,,,,,,,,,6.7,3.0,P,Y,Y,Y,ND,N,4.0,{'$date': '2009-02-15T00:00:00Z'},U,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,35-49,44.0,95.0,2.0,{'$date': '1994-11-02T00:00:00.000Z'},LKI,N,ND,MI,1,0,0,11/02/1994,1023,10.0,V,22MAY1995:00:00:00,Y,95.0,1.0,0.0,,,,,,,,,,,,7.0,6.0,95.0,99.0,99.0,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,KI,0.0,2.0,11.0,62.0,0.0,4.0,13.0,F,O,61.0000,163.00,22.9591,1.0,DE,998.0,3011.0,61.0000,163.00,15.0,37.0,4010.0,45.0,{'$date': '1994-04-06T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},0,1,142007,23.0,23.0,{'$date': '2008-03-14T00:00:00Z'},,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,149893.0,Y,Y,24800,Unknown,14353,14353,24800,,35-49,25.0,25.0,C,N,N,0.0,0.0,4010.0,{'$date': '1994-02-28T00:00:00Z'},U,U,N,N,1.0,U,13.0,{'$date': '1994-04-06T00:00:00Z'},N,2.1,Y,L,N,,11.0,2.0,62.0,51.0,13.0,3.0,11.0,2.0,62.0,97.0,13.0,4.0,0.0,1.0,1.0,2.0,0.0,0.0,,,N,N,1,,,,1.0,,O,L,M,DE,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,2.4,{'$date': '1991-04-01T00:00:00Z'},,1.0,45.0,0.0,,Y,3011.0,0.0,Y,,N,1459.0,0.0,37.0,101.0,Y,KI,U,U,N,N,N,ND,U,N,3.0,{'$date': '1998-04-04T00:00:00Z'},3.0,1.0,5091.0,N,,A,U,162.0,61.2,23.3,V,V,,,28JUN1994:00:00:00.000,20JUN1994:00:00:00.000,N,Y,Y,A483757,,N,,,,,,,,,{'$date': '1994-04-06T00:00:00Z'},233643.0,"{'RT_DON_RETYP': 'N', 'RA1': 11, 'RA2': 2, 'RB...","{'ALG_IND': 0, 'ALG_MAINT': 0, 'ALG_ANTIREJ': ...","[{'SANDIMMUNE_MAINT_PREV': 0, 'SANDIMMUNE_MAIN...","[{'PX_STAT': 'L', 'PT_CODE': 142007, 'PX_STAT_...","[{'TRR_FOL_ID_CODE': 'A1291268', 'PX_STAT': 'L...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",35-49,7.0,2.1,,,,,,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,Y,,,,,,,,,3.0,N,Y,Y,Y,ND,N,1.0,,U,,,,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,65+,66.0,95.0,1.0,{'$date': '1994-04-06T00:00:00.000Z'},LKI,N,ND,DE,1,0,0,04/06/1994,24800,2.0,V,28FEB1995:00:00:00,Y,95.0,3.0,95.0,,,,,,,,,,,,1.0,2.0,96.0,99.0,99.0,,99.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,KI,0.0,2.0,26.0,7.0,27.0,0.0,0.0,M,O,85.0000,175.00,27.7551,1.0,CA,998.0,3034.0,85.0000,175.00,15.0,689.0,4010.0,41.0,{'$date': '1996-01-18T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},1,4,343794,27.8,27.8,,,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,317487.0,Y,Y,14446,Unknown,20243,20243,14446,"[{'CHG_TY': 'D', 'UNOS_CAND_STAT_CD': 4010, 'C...",35-49,,,,,,,,4010.0,,U,U,N,N,1.0,U,13.0,{'$date': '1996-01-18T00:00:00Z'},N,1.7,Y,L,N,,2.0,30.0,16.0,70.0,4.0,8.0,2.0,26.0,7.0,27.0,2.0,6.0,1.0,2.0,2.0,5.0,0.0,0.0,,,N,N,4,,,,2.0,,O,L,F,CA,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,1.6,{'$date': '1993-10-01T00:00:00Z'},{'$date': '2010-04-26T00:00:00Z'},1.0,43.0,0.0,,Y,3040.0,3.0,N,10.0,N,5212.0,1.0,689.0,101.0,,KI,U,U,N,N,N,,U,N,3.0,{'$date': '2017-01-04T00:00:00Z'},3.0,0.0,7657.0,N,,A,U,175.0,79.0,25.8,V,V,,,12MAY1994:00:00:00.000,16JUL1996:00:00:00.000,N,N,N,A328848,{'$date': '1996-01-24T00:00:00Z'},N,,,,,,,,,{'$date': '1996-01-18T00:00:00Z'},288425.0,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 26, 'RB...","{'STEROIDS_IND': 1, 'STEROIDS_MAINT': 1, 'STER...","[{'PROGRAF_MAINT_PREV': 1, 'PROGRAF_MAINT_CUR'...","[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1226948', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",35-49,,1.6,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,P,Y,Y,Y,ND,N,6.0,{'$date': '2017-01-04T00:00:00Z'},U,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,18-34,19.0,95.0,4.0,{'$date': '1996-01-18T00:00:00.000Z'},LKI,N,ND,CA,1,0,0,01/18/1996,14446,5.0,V,03JUN1996:00:00:00,Y,96.0,97.0,95.0,,,,,,,,,,,,97.0,7.0,95.0,99.0,99.0,,96.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,KI,0.0,2.0,30.0,13.0,46.0,9.0,12.0,F,B,46.0000,168.00,16.2982,1.0,WI,998.0,3041.0,46.0000,168.00,15.0,548.0,4010.0,21.0,{'$date': '1995-08-31T00:00:00Z'},{'$date': '1994-03-01T00:00:00Z'},0,5,189347,16.3,16.3,,,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,757694.0,Y,Y,7905,Unknown,25172,25172,7905,"[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4010, 'C...",18-34,90.0,90.0,,,,90.0,90.0,4010.0,,U,U,N,N,1.0,U,14.0,{'$date': '1995-08-31T00:00:00Z'},N,1.1,Y,L,N,05/14/2003,2.0,97.0,46.0,97.0,8.0,9.0,2.0,30.0,13.0,46.0,9.0,12.0,0.0,0.0,1.0,1.0,0.0,0.0,,,ND,N,5,,,,1.0,,B,L,M,WI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,1.1,{'$date': '1993-11-01T00:00:00Z'},{'$date': '2003-05-14T00:00:00Z'},1.0,22.0,0.0,Y,Y,3041.0,0.0,N,10.0,N,2813.0,1.0,548.0,101.0,Y,KI,U,U,ND,N,,ND,U,N,3.0,{'$date': '2016-09-18T00:00:00Z'},3.0,0.0,7689.0,N,,A,U,167.0,43.0,15.4,V,V,,,14APR1994:00:00:00.000,09NOV1995:00:00:00.000,N,N,Y,A441197,{'$date': '1995-09-10T00:00:00Z'},N,,,,,,,,,{'$date': '1995-08-31T00:00:00Z'},3444.0,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 30, 'RB...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'TRR_FOL_ID_CODE': 'A1151031', 'PX_STAT_DATE...","[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'WORK...","[{'TRR_FOL_ID_CODE': 'A1151031', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",18-34,8.0,1.4,,,,,,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,N,,,,,,,,,3.0,P,Y,Y,Y,ND,N,1.0,{'$date': '2016-09-18T00:00:00Z'},U,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,35-49,47.0,95.0,6.0,{'$date': '1995-08-31T00:00:00.000Z'},LKI,N,ND,WI,1,0,0,08/31/1995,7905,7.0,V,13NOV1995:14:45:51,Y,95.0,1.0,96.0,,,,,,,,,,,,3.0,3.0,95.0,99.0,99.0,,99.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,KI,0.0,2.0,3.0,27.0,35.0,4.0,7.0,M,O,102.0000,185.00,29.8028,1.0,WI,1.0,3039.0,102.0000,185.00,15.0,1738.0,4999.0,49.0,{'$date': '1998-12-03T00:00:00Z'},{'$date': '1994-03-01T00:00:00Z'},0,1,193993,29.8,29.8,{'$date': '2004-06-01T00:00:00Z'},,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,546473.0,Y,Y,7905,Unknown,25172,25172,7905,"[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4999, 'C...",35-49,3.0,3.0,P,N,N,3.0,3.0,4010.0,,U,U,N,N,2.0,U,1.0,{'$date': '1998-12-03T00:00:00Z'},N,1.6,Y,L,N,04/02/2004,2.0,3.0,8.0,50.0,7.0,17.0,2.0,3.0,27.0,35.0,4.0,7.0,0.0,2.0,1.0,3.0,0.0,0.0,,,ND,N,1,,,,1.0,,O,L,F,WI,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4999.0,2.6,{'$date': '1998-06-01T00:00:00Z'},{'$date': '2004-04-02T00:00:00Z'},1.0,53.0,0.0,Y,Y,3012.0,0.0,N,10.0,N,1947.0,1.0,1738.0,101.0,,KI,U,U,ND,N,N,,U,N,3.0,{'$date': '2004-06-01T00:00:00Z'},3.0,1.0,2007.0,N,,A,U,185.0,102.0,29.8,V,V,,,14APR1994:00:00:00.000,21JAN1999:00:00:00.000,N,N,N,A432439,{'$date': '1998-12-08T00:00:00Z'},N,,,,,,,,,{'$date': '1998-12-03T00:00:00Z'},114036.0,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 3, 'RB1...","{'NEORAL_IND': 1, 'NEORAL_DAYS': 5, 'NEORAL_MA...","[{'NEORAL_MAINT_PREV': 1, 'NEORAL_MAINT_CUR': ...","[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1032909', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",50-64,,2.4,,999.0,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,,,,,,,,,,,,1.0,P,Y,Y,Y,ND,N,7.0,,U,998.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,50-64,51.0,95.0,2.0,{'$date': '1998-12-03T00:00:00.000Z'},LKI,N,ND,WI,1,0,0,12/03/1998,7905,7.0,V,03MAR1999:00:00:00,Y,96.0,99.0,95.0,,,,,,,,,,,,99.0,97.0,95.0,99.0,99.0,,96.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174376,KI,0.0,2.0,24.0,7.0,51.0,8.0,11.0,M,A,64.8640,172.72,21.7430,1.0,TX,2090.0,3070.0,64.8640,172.72,15.0,7.0,4099.0,76.0,{'$date': '2022-09-28T00:00:00Z'},{'$date': '2022-09-21T00:00:00Z'},1,4,1482008,21.7,21.7,,,4,95.0,95.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1692592.0,Y,Y,5487,Unknown,11377,11377,5487,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4099, 'I...",65+,,,,N,N,,,4099.0,,N,N,N,N,,,,{'$date': '2022-09-28T00:00:00Z'},,,,L,,,,,,,,,2.0,24.0,7.0,51.0,8.0,11.0,,,,,0.0,0.0,,,,,4,,,,,,O,L,F,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4099.0,,,,2.0,76.0,0.0,,,3070.0,,Y,,N,,0.0,7.0,101.0,,KI,,,,,,,,N,,,3.0,0.0,,N,,A,U,172.7,64.9,21.7,V,E,,,21SEP2022:13:36:29.000,,N,,,A946536,,,,,,,,,,,,645018.0,"{'RA1': 2, 'RA2': 24, 'RB1': 7, 'RB2': 51, 'RB...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",65+,,,,,,,6.0,,3.0,,Y,64.0,N,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.5,,,,,,,,,,,,,,,,,,,,N,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,{'$date': '2022-09-21T00:00:00Z'},7.0,100.0,100.0,100.0,100.0,YN,,12.0,,80%,0.0,,,,{'$date': '2022-09-15T00:00:00Z'},,,,0.0,,,,,,,,80%,,,,65+,66.0,,,{'$date': '2022-09-28T00:00:00.000Z'},LKI,,,,1,0,0,,5487,4.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174377,KI,0.0,2.0,30.0,45.0,53.0,8.0,13.0,M,O,73.0283,172.72,24.4797,,MD,,,73.0283,172.72,15.0,6.0,4010.0,61.0,{'$date': '2022-09-27T00:00:00Z'},{'$date': '2022-09-21T00:00:00Z'},0,2,1482588,24.5,24.5,,,2,95.0,95.0,4.0,16.0,96.0,96.0,2.0,96.0,96.0,96.0,202.0,501.0,1691513.0,Y,Y,24800,Unknown,14353,14353,24800,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4010, 'C...",50-64,,,,N,Y,,,4010.0,,,,N,N,,,,{'$date': '2022-09-27T00:00:00Z'},,,,L,,,,,,,,,2.0,30.0,45.0,53.0,8.0,13.0,,,,,0.0,0.0,,,,,2,,,,,,O,L,M,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,,,,1.0,61.0,0.0,,,,,Y,,N,,0.0,6.0,101.0,,KI,,,,,,,,N,,,3.0,0.0,,N,,A,U,172.7,73.0,24.5,E,E,,,,,N,,,A946375,,,,,,,,,,,,641717.0,"{'RA1': 2, 'RA2': 30, 'RB1': 45, 'RB2': 53, 'R...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",50-64,,,,,,,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,{'$date': '2020-04-02T00:00:00Z'},908.0,100.0,100.0,100.0,100.0,YN,{'$date': '2020-04-02T00:00:00Z'},,,43%,0.0,,,,{'$date': '2019-12-06T00:00:00Z'},,,,0.0,,,,,,,,43%,,,,18-34,27.0,,,{'$date': '2022-09-27T00:00:00.000Z'},LKI,,,,1,0,0,,24800,2.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174378,KI,0.0,2.0,3.0,65.0,27.0,8.0,13.0,F,A,102.5120,154.94,42.7019,,IL,,,102.5120,154.94,15.0,7.0,4010.0,67.0,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-22T00:00:00Z'},0,1,1481914,42.7,42.7,,,7,95.0,95.0,1.0,8.0,96.0,0.0,95.0,0.0,96.0,0.0,7.0,4.0,1692626.0,Y,Y,8587,Unknown,23002,23002,8587,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4010, 'C...",65+,,,,N,Y,,,4010.0,,,,N,N,,,,{'$date': '2022-09-29T00:00:00Z'},,,,L,,,,,,,,,2.0,3.0,65.0,27.0,8.0,13.0,,,,,0.0,0.0,,,,,1,,,,,,A,L,F,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,,,,1.0,67.0,0.0,,,,,Y,,N,,0.0,7.0,101.0,,KI,,,,,,,,N,,,3.0,0.0,,N,,A,U,154.9,102.5,42.7,E,E,,,,,N,,,A946921,,,,,,,,,,,,645038.0,"{'RA1': 2, 'RA2': 3, 'RB1': 65, 'RB2': 27, 'RB...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",65+,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,{'$date': '2021-09-09T00:00:00Z'},385.0,100.0,100.0,100.0,100.0,YN,{'$date': '2021-09-09T00:00:00Z'},,,85%,0.0,,,,{'$date': '2022-09-21T00:00:00Z'},,,,0.0,,,,,,,,62%,,,,50-64,60.0,,,{'$date': '2022-09-29T00:00:00.000Z'},LKI,,,,1,0,0,,8587,7.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174379,KI,0.0,2.0,3.0,18.0,48.0,4.0,15.0,M,O,64.1000,167.64,22.8088,,TX,,,64.1000,167.64,15.0,3.0,4010.0,67.0,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-26T00:00:00Z'},1,4,1483697,22.8,22.8,,,4,96.0,95.0,8.0,12.0,95.0,96.0,96.0,96.0,95.0,96.0,8.0,6.0,1693726.0,Y,Y,17081,Unknown,11377,11377,17081,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4010, 'C...",65+,,,,N,N,,,4010.0,,,,N,N,,,,{'$date': '2022-09-29T00:00:00Z'},,,,L,,,,,,,,,2.0,3.0,18.0,48.0,4.0,15.0,,,,,0.0,0.0,,,,,1,,,,,,O,L,F,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4010.0,,,,1.0,67.0,0.0,,,,,Y,,N,,0.0,3.0,101.0,,KI,,,,,,,,Y,,,3.0,0.0,,N,,A,U,167.6,64.1,22.8,E,E,,,,,N,,,A946658,,,,,,,,,,,,644789.0,"{'RA1': 2, 'RA2': 3, 'RB1': 18, 'RB2': 48, 'RB...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",65+,,,,,1.0,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,Y,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,94.0,94.0,94.0,94.0,YA,,,,62%,0.0,,,,,,,,0.0,,,,,,,,62%,,,,35-49,44.0,,,{'$date': '2022-09-29T00:00:00.000Z'},LKI,,,,1,0,0,,17081,4.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


## Statistics

In [13]:
print("kidpan_living", kidpan_living.shape)

kidpan_living (174381, 628)


#### Checking % of NaN values + Keeping track of columns with more than 50% entries missing

In [14]:
print("How many entries are NaN in kidpan_living")
kidpan_living_columns_to_delete = []
for col in kidpan_living.columns:
    totalRows = kidpan_living.shape[0]
    count = kidpan_living[col].isna().sum()
    percentage = count / totalRows * 100
    if(percentage > 50.00):
        kidpan_living_columns_to_delete.append(col)
    print(f"{col} {percentage:.2f}%")

How many entries are NaN in kidpan_living
WL_ORG 25.64%
NUM_PREV_TX 25.64%
A1 25.64%
A2 25.64%
B1 25.64%
B2 25.64%
DR1 25.64%
DR2 25.64%
GENDER_REC 0.00%
ABO_REC 0.00%
WGT_KG_TCR 3.68%
HGT_CM_TCR 5.05%
BMI_TCR 5.30%
CITIZENSHIP_REC 0.38%
PERM_STATE 0.68%
FUNC_STAT_TCR 0.17%
DGN_TCR 27.94%
INIT_WGT_KG 26.90%
INIT_HGT_CM 27.38%
REM_CD 25.64%
DAYSWAIT_CHRON 25.64%
END_STAT 25.64%
INIT_AGE 25.64%
END_DATE 25.64%
INIT_DATE 25.64%
ETHNICITY 0.00%
ETHCAT 0.00%
PT_CODE 0.00%
INIT_BMI_CALC 27.70%
END_BMI_CALC 26.59%
COMPOSITE_DEATH_DATE 70.42%
WLKI 99.96%
REGION_REC 0.00%
BW4 25.64%
BW6 25.64%
C1 25.64%
C2 25.64%
DR51 25.64%
DR51_2 25.64%
DR52 25.64%
DR52_2 25.64%
DR53 25.64%
DR53_2 25.64%
DQ1 25.64%
DQ2 25.64%
WL_ID_CODE 25.64%
DATA_TRANSPLANT 0.00%
DATA_WAITLIST 0.00%
CTR_CODE 0.00%
OPO_CTR_CODE 0.00%
INIT_OPO_CTR_CODE 0.00%
END_OPO_CTR_CODE 0.00%
LISTING_CTR_CODE 0.00%
Kidney_Pancreas_WL_History 28.48%
INIT_AGE_BIN 25.64%
CURRENT_PRA 71.53%
PEAK_PRA 71.05%
USE_WHICH_PRA 70.60%
DONATION 30.75

HCV_NAT_DON 100.00%
HIV_NAT_DON 100.00%
PRI_PAYMENT_CTRY_DON 99.98%
WT_QUAL_DATE 34.15%
DAYSWAIT_ALLOC 34.30%
MAX_KDPI_LOCAL_ZERO_ABDR 71.98%
MAX_KDPI_LOCAL_NON_ZERO_ABDR 71.99%
MAX_KDPI_IMPORT_ZERO_ABDR 71.99%
MAX_KDPI_IMPORT_NON_ZERO_ABDR 71.99%
ANTIBODY_TESTED 71.87%
DIALYSIS_DATE 64.54%
INACT_REASON_CD 88.69%
CREAT_CLEAR 94.29%
END_EPTS 73.70%
END_CPRA_DETAIL 52.47%
ADMIT_DATE_DON 100.00%
FIN_RESIST_TX 99.97%
CREAT_CLEAR_DATE 94.87%
GFR_DATE 69.75%
A2A2B_ELIGIBILITY 98.45%
L_FIN_FLOW_RATE_TX 100.00%
L_FIN_RESIST_TX 100.00%
INIT_CPRA 55.28%
R_FIN_FLOW_RATE_TX 100.00%
R_FIN_RESIST_TX 100.00%
C_PEPTIDE_PA_TCR 99.74%
INSULIN_DOSAGE_PA 100.00%
C_PEPTIDE 99.95%
C_PEPTIDEDATE 99.95%
TXVCA 100.00%
INIT_EPTS 76.52%
HBA1C_PA_TCR 99.87%
WLVC 100.00%
PRE_AVG_INSULIN_USED_TRR 100.00%
AGE_BIN_DON 0.01%
AGE_DON 0.01%
DBW6 2.46%
DDQ1 2.01%
DON_DATE 0.00%
DON_ORG 0.00%
HCV_ANTIBODY 13.84%
HCV_RIBA 13.84%
HOME_STATE 2.45%
KIDNEY_RECOV 0.00%
LIVER_RECOV 0.00%
LUNG_RECOV 0.00%
ORG_RECOVERY_DT 0.40%
RE

In [15]:
print("Number of columns to be dropped from kidpan_living",len(kidpan_living_columns_to_delete))

Number of columns to be dropped from kidpan_living 360


#### Ensuring we have only living donors and kidney transplantations

In [16]:
kidpan_living["KIDNEY_RECOV"].value_counts()

KIDNEY_RECOV
1    174356
0        25
Name: count, dtype: int64

In [17]:
kidpan_living["LIVER_RECOV"].value_counts()

LIVER_RECOV
0    174381
Name: count, dtype: int64

In [18]:
kidpan_living["DON_ORG"].value_counts()

DON_ORG
LKI     146682
RKI      27641
PAS1        58
Name: count, dtype: int64

In [19]:
kidpan_living["DON_ORG"].isna().sum()

0

In [20]:
kidpan_living["DON_TY"].value_counts()

DON_TY
L    174381
Name: count, dtype: int64

## Dropping unwanted features

#### Dropping columns with with > 50% NaN values

In [21]:
original_columns = kidpan_living.columns
reduced_columns = [x for x in original_columns if x not in kidpan_living_columns_to_delete]
kidpan_living = kidpan_living[reduced_columns]
kidpan_living

Unnamed: 0,WL_ORG,NUM_PREV_TX,A1,A2,B1,B2,DR1,DR2,GENDER_REC,ABO_REC,WGT_KG_TCR,HGT_CM_TCR,BMI_TCR,CITIZENSHIP_REC,PERM_STATE,FUNC_STAT_TCR,DGN_TCR,INIT_WGT_KG,INIT_HGT_CM,REM_CD,DAYSWAIT_CHRON,END_STAT,INIT_AGE,END_DATE,INIT_DATE,ETHNICITY,ETHCAT,PT_CODE,INIT_BMI_CALC,END_BMI_CALC,REGION_REC,BW4,BW6,C1,C2,DR51,DR51_2,DR52,DR52_2,DR53,DR53_2,DQ1,DQ2,WL_ID_CODE,DATA_TRANSPLANT,DATA_WAITLIST,CTR_CODE,OPO_CTR_CODE,INIT_OPO_CTR_CODE,END_OPO_CTR_CODE,LISTING_CTR_CODE,Kidney_Pancreas_WL_History,INIT_AGE_BIN,DONATION,ON_DIALYSIS,INIT_STAT,EXH_PERIT_ACCESS,EXH_VASC_ACCESS,PREV_TX,PREV_KI_TX,FUNC_STAT_TRR,MALIG_TRR,PRI_PAYMENT_TRR_KI,TX_DATE,FIRST_WK_DIAL,SERUM_CREAT,PRE_TX_TXFUS,TXKID,DON_RETYP,DA1,DA2,DB1,DB2,DDR1,DDR2,RA1,RA2,RB1,RB2,RDR1,RDR2,AMIS,BMIS,DRMIS,HLAMIS,NPKID,NPPAN,HBV_CORE_DON,HBV_SUR_ANTIGEN_DON,ETHCAT_DON,CITIZENSHIP_DON,ABO_DON,DON_TY,GENDER_DON,HOME_STATE_DON,CANCER_SITE_DON,HIST_CIG_DON,HIST_HYPERTENS_DON,HIST_CANCER_DON,DIABETES_DON,HGT_CM_DON_CALC,WGT_KG_DON_CALC,BMI_DON_CALC,END_STAT_KI,CREAT1Y,DIAL_DATE,ABO_MAT,AGE,DISTANCE,DIAL_TRR,DIAG_KI,COLD_ISCH_KI,GRF_STAT_KI,DWFG_KI,GTIME_KI,GSTATUS_KI,DAYSWAIT_CHRON_KI,TX_PROCEDUR_TY_KI,TRTREJ6M_KI,ORGAN,CMV_IGG_REC,CMV_IGM_REC,HBV_CORE_REC,HBV_SUR_ANTIGEN_REC,HCV_SEROSTATUS,HIV_SEROSTATUS,CMV_STATUS,PREV_TX_ANY,MED_COND_TRR,PX_STAT_DATE,SHARE_TY,PSTATUS,PTIME,PAYBACK,AGE_GROUP,MALIG,HGT_CM_CALC,WGT_KG_CALC,BMI_CALC,STATUS_TCR,STATUS_TRR,VAL_DT_TCR,VAL_DT_TRR,LT_ONE_WEEK_DON,TRR_ID_CODE,DISCHARGE_DATE,DONOR_ID,Kidney_Pancreas_HLA,Kidney_Pancreas_Immuno_Discharge,Kidney_Pancreas_Immuno_Followup,Kidney_Followup,Kidney_Malig_Followup,Kidney_Pancreas_PRA,AGE_BIN_REC,CREAT6M,EDUCATION_REC,DIAB,DRUGTRT_COPD,PERIP_VASC,MALIG_TCR_KI,PRI_PAYMENT_TCR_KI,TRTREJ1Y_KI,CREAT_TRR,HAPLO_TY_MATCH_DON,CMV_TEST_DON,HBV_TEST_DON,HCV_TEST_DON,HCV_RIBA_DON,HCV_ANTIBODY_DON,LIV_DON_TY,PUMP_KI,ADMISSION_DATE,PERM_STATE_TRR,WORK_INCOME_TRR,ACUTE_REJ_EPI_KI,EBV_SEROSTATUS,TOT_SERUM_ALBUM,EBV_TEST_DON,HCV_RNA_DON,EDUCATION_DON,HBV_DNA_DON,CMV_NUCLEIC_DON,CMV_IGG_DON,CMV_IGM_DON,WORK_INCOME_TCR,ORG_REC_ON,REC_ON_ICE,EBV_IGG_DON,EBV_IGM_DON,LOS,WT_QUAL_DATE,DAYSWAIT_ALLOC,AGE_BIN_DON,AGE_DON,DBW6,DDQ1,DON_DATE,DON_ORG,HCV_ANTIBODY,HCV_RIBA,HOME_STATE,KIDNEY_RECOV,LIVER_RECOV,LUNG_RECOV,ORG_RECOVERY_DT,RECOV_FACILITY_CODE,REGION_DON,STATUS_LDR,VAL_DT_LDR,VIRUSES_TESTED,DBW4,DC1,DDR52,REOP_BLEED_KI,REOP_BLEED_LI,REOP_BOWEL_KI,REOP_BOWEL_LI,REOP_HERNIA_KI,REOP_HERNIA_LI,REOP_LI_FAIL,REOP_OTH_KI,REOP_OTH_LI,REOP_VASC_KI,REOP_VASC_LI,DC2,DDQ2,DDR53,DDP1,DDP2,DDR51,CMV_NUCLEIC,DIABETES,FUNC_STAT,HBV_DNA,HCV_RNA,HEALTH_INS,HIST_CANCER,HIST_CIG,HIST_HYPER,HYPERTENSION,INIT_DISCHARGE_DT,KI_PROC_TY,MARITAL_STAT,NON_AUTO_BLOOD,OTH_COMP_KI,OTH_INTER_PROC_KI,PHYSICAL_CAPACITY,POSTOP_TEST_DT,POSTOP_URINE_PROTEIN,PREDON_HGT,PREDON_WGT,PREOP_URINE_PROTEIN,READMISSION_KI,REOPERATION_KI,TOBACCO_USE,VASC_COMP_KI,WORK_INCOME,BP_PREOP_DIAST,BP_PREOP_SYST,EBV_IGG,EBV_IGM,KI_CREAT_PREOP,WGT_KG,Living_Donor_Follow,BP_POSTOP_DIAST,BP_POSTOP_SYST,KI_CREAT_POSTOP,CONVERT_OPEN_KI
0,KI,0.0,3.0,25.0,7.0,27.0,1.0,13.0,F,A,60.0000,160.00,23.4375,1.0,MI,1.0,3011.0,60.0000,160.00,15.0,247.0,4010.0,42.0,{'$date': '1994-11-02T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},0,1,111004,23.4,23.4,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,342925.0,Y,Y,1023,Unknown,9331,9331,1023,,35-49,N,N,4010.0,U,U,N,N,1.0,U,1.0,{'$date': '1994-11-02T00:00:00Z'},N,1.9,N,L,N,1.0,25.0,7.0,27.0,1.0,13.0,3.0,25.0,7.0,27.0,1.0,13.0,1.0,0.0,0.0,1.0,0.0,0.0,ND,N,1,1.0,O,L,F,MI,,,,,,,,,4010.0,1.4,,2.0,43.0,0.0,N,3011.0,3.0,Y,N,5219.0,1.0,247.0,101.0,N,KI,U,U,ND,N,N,ND,U,N,3.0,{'$date': '2009-02-15T00:00:00Z'},3.0,0.0,5219.0,N,A,U,167.0,56.0,20.1,V,V,13APR1994:00:00:00.000,17JUL1995:00:00:00.000,N,A55017,{'$date': '1994-11-07T00:00:00Z'},126127.0,"{'RT_DON_RETYP': 'N', 'RA1': 3, 'RA2': 25, 'RB...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'CYCLOSPORIN_MAINT_PREV': 1, 'CYCLOSPORIN_AN...","[{'CARE_PROVIDED_BY': 4, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1231888', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",35-49,1.7,,,,,,,N,6.7,3.0,Y,Y,Y,ND,N,4.0,,,,,,,,,,,,,,,,,,,,,,,35-49,44.0,95.0,2.0,{'$date': '1994-11-02T00:00:00.000Z'},LKI,N,ND,MI,1,0,0,11/02/1994,1023,10.0,V,22MAY1995:00:00:00,Y,95.0,1.0,0.0,,,,,,,,,,,,7.0,6.0,95.0,99.0,99.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,KI,0.0,2.0,11.0,62.0,0.0,4.0,13.0,F,O,61.0000,163.00,22.9591,1.0,DE,998.0,3011.0,61.0000,163.00,15.0,37.0,4010.0,45.0,{'$date': '1994-04-06T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},0,1,142007,23.0,23.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,149893.0,Y,Y,24800,Unknown,14353,14353,24800,,35-49,N,N,4010.0,U,U,N,N,1.0,U,13.0,{'$date': '1994-04-06T00:00:00Z'},N,2.1,Y,L,N,11.0,2.0,62.0,51.0,13.0,3.0,11.0,2.0,62.0,97.0,13.0,4.0,0.0,1.0,1.0,2.0,0.0,0.0,N,N,1,1.0,O,L,M,DE,,,,,,,,,4010.0,2.4,{'$date': '1991-04-01T00:00:00Z'},1.0,45.0,0.0,Y,3011.0,0.0,Y,N,1459.0,0.0,37.0,101.0,Y,KI,U,U,N,N,N,ND,U,N,3.0,{'$date': '1998-04-04T00:00:00Z'},3.0,1.0,5091.0,N,A,U,162.0,61.2,23.3,V,V,28JUN1994:00:00:00.000,20JUN1994:00:00:00.000,N,A483757,,233643.0,"{'RT_DON_RETYP': 'N', 'RA1': 11, 'RA2': 2, 'RB...","{'ALG_IND': 0, 'ALG_MAINT': 0, 'ALG_ANTIREJ': ...","[{'SANDIMMUNE_MAINT_PREV': 0, 'SANDIMMUNE_MAIN...","[{'PX_STAT': 'L', 'PT_CODE': 142007, 'PX_STAT_...","[{'TRR_FOL_ID_CODE': 'A1291268', 'PX_STAT': 'L...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",35-49,2.1,,,,,,,Y,,3.0,Y,Y,Y,ND,N,1.0,,,,,,,,,,,,,,,,,,,,,,,65+,66.0,95.0,1.0,{'$date': '1994-04-06T00:00:00.000Z'},LKI,N,ND,DE,1,0,0,04/06/1994,24800,2.0,V,28FEB1995:00:00:00,Y,95.0,3.0,95.0,,,,,,,,,,,,1.0,2.0,96.0,99.0,99.0,99.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,KI,0.0,2.0,26.0,7.0,27.0,0.0,0.0,M,O,85.0000,175.00,27.7551,1.0,CA,998.0,3034.0,85.0000,175.00,15.0,689.0,4010.0,41.0,{'$date': '1996-01-18T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},1,4,343794,27.8,27.8,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,317487.0,Y,Y,14446,Unknown,20243,20243,14446,"[{'CHG_TY': 'D', 'UNOS_CAND_STAT_CD': 4010, 'C...",35-49,,,4010.0,U,U,N,N,1.0,U,13.0,{'$date': '1996-01-18T00:00:00Z'},N,1.7,Y,L,N,2.0,30.0,16.0,70.0,4.0,8.0,2.0,26.0,7.0,27.0,2.0,6.0,1.0,2.0,2.0,5.0,0.0,0.0,N,N,4,2.0,O,L,F,CA,,,,,,,,,4010.0,1.6,{'$date': '1993-10-01T00:00:00Z'},1.0,43.0,0.0,Y,3040.0,3.0,N,N,5212.0,1.0,689.0,101.0,,KI,U,U,N,N,N,,U,N,3.0,{'$date': '2017-01-04T00:00:00Z'},3.0,0.0,7657.0,N,A,U,175.0,79.0,25.8,V,V,12MAY1994:00:00:00.000,16JUL1996:00:00:00.000,N,A328848,{'$date': '1996-01-24T00:00:00Z'},288425.0,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 26, 'RB...","{'STEROIDS_IND': 1, 'STEROIDS_MAINT': 1, 'STER...","[{'PROGRAF_MAINT_PREV': 1, 'PROGRAF_MAINT_CUR'...","[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1226948', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",35-49,1.6,,,,,,,,,1.0,Y,Y,Y,ND,N,6.0,,,,,,,,,,,,,,,,,,,,,,,18-34,19.0,95.0,4.0,{'$date': '1996-01-18T00:00:00.000Z'},LKI,N,ND,CA,1,0,0,01/18/1996,14446,5.0,V,03JUN1996:00:00:00,Y,96.0,97.0,95.0,,,,,,,,,,,,97.0,7.0,95.0,99.0,99.0,96.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,KI,0.0,2.0,30.0,13.0,46.0,9.0,12.0,F,B,46.0000,168.00,16.2982,1.0,WI,998.0,3041.0,46.0000,168.00,15.0,548.0,4010.0,21.0,{'$date': '1995-08-31T00:00:00Z'},{'$date': '1994-03-01T00:00:00Z'},0,5,189347,16.3,16.3,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,757694.0,Y,Y,7905,Unknown,25172,25172,7905,"[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4010, 'C...",18-34,,,4010.0,U,U,N,N,1.0,U,14.0,{'$date': '1995-08-31T00:00:00Z'},N,1.1,Y,L,N,2.0,97.0,46.0,97.0,8.0,9.0,2.0,30.0,13.0,46.0,9.0,12.0,0.0,0.0,1.0,1.0,0.0,0.0,ND,N,5,1.0,B,L,M,WI,,,,,,,,,4010.0,1.1,{'$date': '1993-11-01T00:00:00Z'},1.0,22.0,0.0,Y,3041.0,0.0,N,N,2813.0,1.0,548.0,101.0,Y,KI,U,U,ND,N,,ND,U,N,3.0,{'$date': '2016-09-18T00:00:00Z'},3.0,0.0,7689.0,N,A,U,167.0,43.0,15.4,V,V,14APR1994:00:00:00.000,09NOV1995:00:00:00.000,N,A441197,{'$date': '1995-09-10T00:00:00Z'},3444.0,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 30, 'RB...","{'CYCLOSPORIN_IND': 0, 'CYCLOSPORIN_MAINT': 1,...","[{'TRR_FOL_ID_CODE': 'A1151031', 'PX_STAT_DATE...","[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'WORK...","[{'TRR_FOL_ID_CODE': 'A1151031', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",18-34,1.4,,,,,,,Y,,3.0,Y,Y,Y,ND,N,1.0,,,,,,,,,,,,,,,,,,,,,,,35-49,47.0,95.0,6.0,{'$date': '1995-08-31T00:00:00.000Z'},LKI,N,ND,WI,1,0,0,08/31/1995,7905,7.0,V,13NOV1995:14:45:51,Y,95.0,1.0,96.0,,,,,,,,,,,,3.0,3.0,95.0,99.0,99.0,99.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,KI,0.0,2.0,3.0,27.0,35.0,4.0,7.0,M,O,102.0000,185.00,29.8028,1.0,WI,1.0,3039.0,102.0000,185.00,15.0,1738.0,4999.0,49.0,{'$date': '1998-12-03T00:00:00Z'},{'$date': '1994-03-01T00:00:00Z'},0,1,193993,29.8,29.8,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,546473.0,Y,Y,7905,Unknown,25172,25172,7905,"[{'CHG_TY': 'M', 'UNOS_CAND_STAT_CD': 4999, 'C...",35-49,N,N,4010.0,U,U,N,N,2.0,U,1.0,{'$date': '1998-12-03T00:00:00Z'},N,1.6,Y,L,N,2.0,3.0,8.0,50.0,7.0,17.0,2.0,3.0,27.0,35.0,4.0,7.0,0.0,2.0,1.0,3.0,0.0,0.0,ND,N,1,1.0,O,L,F,WI,,,,,,,,,4999.0,2.6,{'$date': '1998-06-01T00:00:00Z'},1.0,53.0,0.0,Y,3012.0,0.0,N,N,1947.0,1.0,1738.0,101.0,,KI,U,U,ND,N,N,,U,N,3.0,{'$date': '2004-06-01T00:00:00Z'},3.0,1.0,2007.0,N,A,U,185.0,102.0,29.8,V,V,14APR1994:00:00:00.000,21JAN1999:00:00:00.000,N,A432439,{'$date': '1998-12-08T00:00:00Z'},114036.0,"{'RT_DON_RETYP': 'N', 'RA1': 2, 'RA2': 3, 'RB1...","{'NEORAL_IND': 1, 'NEORAL_DAYS': 5, 'NEORAL_MA...","[{'NEORAL_MAINT_PREV': 1, 'NEORAL_MAINT_CUR': ...","[{'CARE_PROVIDED_BY': 1, 'PX_STAT': 'A', 'HOSP...","[{'TRR_FOL_ID_CODE': 'A1032909', 'PX_STAT': 'A...","{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",50-64,2.4,,,,,,,N,,1.0,Y,Y,Y,ND,N,7.0,,,,,,,,,,,,,,,,,,,,,,,50-64,51.0,95.0,2.0,{'$date': '1998-12-03T00:00:00.000Z'},LKI,N,ND,WI,1,0,0,12/03/1998,7905,7.0,V,03MAR1999:00:00:00,Y,96.0,99.0,95.0,,,,,,,,,,,,99.0,97.0,95.0,99.0,99.0,96.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174376,KI,0.0,2.0,24.0,7.0,51.0,8.0,11.0,M,A,64.8640,172.72,21.7430,1.0,TX,2090.0,3070.0,64.8640,172.72,15.0,7.0,4099.0,76.0,{'$date': '2022-09-28T00:00:00Z'},{'$date': '2022-09-21T00:00:00Z'},1,4,1482008,21.7,21.7,4,95.0,95.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1692592.0,Y,Y,5487,Unknown,11377,11377,5487,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4099, 'I...",65+,N,N,4099.0,N,N,N,N,,,,{'$date': '2022-09-28T00:00:00Z'},,,,L,,,,,,,,2.0,24.0,7.0,51.0,8.0,11.0,,,,,0.0,0.0,,,4,,O,L,F,,,,,,,,,,4099.0,,,2.0,76.0,0.0,,3070.0,,Y,N,,0.0,7.0,101.0,,KI,,,,,,,,N,,,3.0,0.0,,N,A,U,172.7,64.9,21.7,V,E,21SEP2022:13:36:29.000,,N,A946536,,645018.0,"{'RA1': 2, 'RA2': 24, 'RB1': 7, 'RB2': 51, 'RB...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",65+,,6.0,3.0,,Y,N,1.0,,,,,,,,,,,,,,,,4.5,,,,,,,,N,,,,,,{'$date': '2022-09-21T00:00:00Z'},7.0,65+,66.0,,,{'$date': '2022-09-28T00:00:00.000Z'},LKI,,,,1,0,0,,5487,4.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174377,KI,0.0,2.0,30.0,45.0,53.0,8.0,13.0,M,O,73.0283,172.72,24.4797,,MD,,,73.0283,172.72,15.0,6.0,4010.0,61.0,{'$date': '2022-09-27T00:00:00Z'},{'$date': '2022-09-21T00:00:00Z'},0,2,1482588,24.5,24.5,2,95.0,95.0,4.0,16.0,96.0,96.0,2.0,96.0,96.0,96.0,202.0,501.0,1691513.0,Y,Y,24800,Unknown,14353,14353,24800,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4010, 'C...",50-64,N,Y,4010.0,,,N,N,,,,{'$date': '2022-09-27T00:00:00Z'},,,,L,,,,,,,,2.0,30.0,45.0,53.0,8.0,13.0,,,,,0.0,0.0,,,2,,O,L,M,,,,,,,,,,4010.0,,,1.0,61.0,0.0,,,,Y,N,,0.0,6.0,101.0,,KI,,,,,,,,N,,,3.0,0.0,,N,A,U,172.7,73.0,24.5,E,E,,,N,A946375,,641717.0,"{'RA1': 2, 'RA2': 30, 'RB1': 45, 'RB2': 53, 'R...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",50-64,,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2020-04-02T00:00:00Z'},908.0,18-34,27.0,,,{'$date': '2022-09-27T00:00:00.000Z'},LKI,,,,1,0,0,,24800,2.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174378,KI,0.0,2.0,3.0,65.0,27.0,8.0,13.0,F,A,102.5120,154.94,42.7019,,IL,,,102.5120,154.94,15.0,7.0,4010.0,67.0,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-22T00:00:00Z'},0,1,1481914,42.7,42.7,7,95.0,95.0,1.0,8.0,96.0,0.0,95.0,0.0,96.0,0.0,7.0,4.0,1692626.0,Y,Y,8587,Unknown,23002,23002,8587,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4010, 'C...",65+,N,Y,4010.0,,,N,N,,,,{'$date': '2022-09-29T00:00:00Z'},,,,L,,,,,,,,2.0,3.0,65.0,27.0,8.0,13.0,,,,,0.0,0.0,,,1,,A,L,F,,,,,,,,,,4010.0,,,1.0,67.0,0.0,,,,Y,N,,0.0,7.0,101.0,,KI,,,,,,,,N,,,3.0,0.0,,N,A,U,154.9,102.5,42.7,E,E,,,N,A946921,,645038.0,"{'RA1': 2, 'RA2': 3, 'RB1': 65, 'RB2': 27, 'RB...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",65+,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2021-09-09T00:00:00Z'},385.0,50-64,60.0,,,{'$date': '2022-09-29T00:00:00.000Z'},LKI,,,,1,0,0,,8587,7.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174379,KI,0.0,2.0,3.0,18.0,48.0,4.0,15.0,M,O,64.1000,167.64,22.8088,,TX,,,64.1000,167.64,15.0,3.0,4010.0,67.0,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-26T00:00:00Z'},1,4,1483697,22.8,22.8,4,96.0,95.0,8.0,12.0,95.0,96.0,96.0,96.0,95.0,96.0,8.0,6.0,1693726.0,Y,Y,17081,Unknown,11377,11377,17081,"[{'CHG_TY': 'A', 'UNOS_CAND_STAT_CD': 4010, 'C...",65+,N,N,4010.0,,,N,N,,,,{'$date': '2022-09-29T00:00:00Z'},,,,L,,,,,,,,2.0,3.0,18.0,48.0,4.0,15.0,,,,,0.0,0.0,,,1,,O,L,F,,,,,,,,,,4010.0,,,1.0,67.0,0.0,,,,Y,N,,0.0,3.0,101.0,,KI,,,,,,,,Y,,,3.0,0.0,,N,A,U,167.6,64.1,22.8,E,E,,,N,A946658,,644789.0,"{'RA1': 2, 'RA2': 3, 'RB1': 18, 'RB2': 48, 'RB...",{'IMMUNO_KP_PA': 0},,,,"{'PTECH1': 'Not Reported', 'PTECH2': 'Not Repo...",65+,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,35-49,44.0,,,{'$date': '2022-09-29T00:00:00.000Z'},LKI,,,,1,0,0,,17081,4.0,E,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


#### What features still have a significant amount of values missing

In [22]:
NaN_count_df = pd.DataFrame(columns=["Feature", "Percentage of NaN"])
for col in reduced_columns:
    totalRows = kidpan_living.shape[0]
    count = kidpan_living[col].isna().sum()
    percentage = count / totalRows * 100
    new_row = {"Feature": col, "Percentage of NaN": percentage}
    NaN_count_df = pd.concat([NaN_count_df, pd.DataFrame([new_row])])
    
NaN_count_df = NaN_count_df.sort_values(by="Percentage of NaN")
NaN_count_df = NaN_count_df.reset_index(drop=True)
NaN_count_df[0:269]

Unnamed: 0,Feature,Percentage of NaN
0,Kidney_Pancreas_HLA,0.0
1,RECOV_FACILITY_CODE,0.0
2,STATUS_LDR,0.0
3,DON_DATE,0.0
4,DON_ORG,0.0
5,NPKID,0.0
6,NPPAN,0.0
7,ETHCAT_DON,0.0
8,DON_TY,0.0
9,AGE,0.0


#### Unwanted features from the Kidney_Pancreas table

In [23]:
# columns containing data collected post-transplantation
kidpan_post_transplant_columns = ["ACUTE_REJ_EPI_KI", "ACUTE_REJ_EPI_PA", 
                           "ANAST_LK_PA", "BIOP_ISLET_PA", "BLEED_PA",
                          "COMPL_ABSC", "COMPL_ANASLK", "COMPL_PANCREA", "FAILDATE_KI", "FAILDATE_PA",
                          "FIRST_WK_DIAL", "GRF_FAIL_CAUSE_TY_KI", "GRF_FAIL_CAUSE_TY_PA", "GRF_STAT_KI",
                           "GRF_STAT_PA", "GRF_VASC_THROMB_PA", "HBA1C_PA_TRR", "GTIME_PA", "GSTATUS_PA", 
                           "INFECT_PA", "PANCREATIT_PA", "REJ_ACUTE_PA", "REJ_BIOPSY", "REJ_HYPER_PA",
                           "REJCNF_KI", "REJCNF_PA", "REJTRT_KI", "REJTRT_PA", "RESUM_MAINT_DIAL",
                           "RESUM_MAINT_DIAL_DT", "SERUM_CREAT", "COD_KI", "COD_PA", "COD_WL", "COD2_KI",
                          "COD2_PA", "COD3_KI", "COD3_PA", "FUNC_STAT_TRF", "LOS", "PRI_PAYMENT_TRR_KI",
                           "TRTREJ1Y_KI", "TRTREJ6M_KI", "CREAT1Y", "CREAT6M"
                          ]

# columns containing data collected at transplantation
kidpan_at_transplant_columns = ["CADEMIC_LEVEL_TRR", "ACADEMIC_PRG_TRR", "ADMISSION_DATE",
                         "COLD_ISCH_KI", "DISCHARGE_DATE", "DON_TY", "MED_COND_TRR", "FUNC_STAT_TRR",
                        "WORK_INCOME_TRR", "ADMISSION_DATE", "ART_RECON", "CMV_IGG", "CMV_IGM", "CMV_STATUS",
                         "DATA_TRANSPLANT", "DIAL_TRR", "L_FIN_FLOW_RATE_TX", "L_FIN_RESIST_TX",
                         "MED_COND_TRR", "ORG_REC_ON", "PERM_STATE_TRR", "PUMP_KI", "R_FIN_FLOW_RATE_TX",
                         "R_FIN_RESIST_TX", "REC_ON_ICE", "REC_ON_PUMP", "TX_PROCEDUR_TY_KI"
                        ]

# irrelevant columns
kidpan_irrelevant_columns = ["_id", "DONOR_ID", "WL_ID_CODE", "PT_CODE", "STATUS_DDR", "STATUS_LDR", "STATUS_TCR",
                      "STATUS_TRR", "TRR_ID_CODE"
                     ]

# columns regarding pancreas transplantation
kidpan_pancreas_columns = ["ACUTE_REJ_EPI_PA", "AMYLASE", "ANAST_LK_PA", "ART_RECON", "BIOP_ISLET_PA", "BLEED_PA",
                   "BLOOD_SUGAR_DIET_PA", "BLOOD_SUGAR_MED_RESUMED_DATE_PA", "BLOOD_SUGAR_MEDICATION_PA",
                    "C_PEPTIDE", "C_PEPTIDE_PA_TCR", "C_PEPTIDE_PA_TRR", "C_PEPTIDEDATE", "DAYSWAIT_CHRON_PA",
                    "DGN2_TCR", "DIAG_PA", "END_STAT_PA", "ENTERIC_DRAIN", "ENTERIC_DRAIN_DT", "GSTATUS_PA",
                    "GTIME_PA", "HBA1C_PA_TCR", "HBA1C_PA_TRR", "INFECT_PA", "INSULIN_DOSAGE_OLD_PA",
                    "INSULIN_DOSAGE_PA", "INSULIN_DUR_DON", "INSULIN_DURATION_PA", "INSULIN_PA",
                    "INSULIN_RESUMED_DATE_PA", "LIPASE", "MALIG_TCR_PA", "METHOD_BLOOD_SUGAR_CONTROL_PA", 
                    "NPPAN", "OPER_TECH", "PA_PRESERV_TM", "PANCREATIT_PA", "PREV_PA_TX", "PRI_PAYMENT_CTRY_TCR_PA",
                    "PRI_PAYMENT_CTRY_TRR_PA", "PRI_PAYMENT_TCR_PA", "PRI_PAYMENT_TRR_PA", "PRVTXDIF_PA",
                    "PX_NON_COMPL_PA", "REJ_ACUTE_PA", "REJ_CHRONIC_PA", "REJ_HYPER_PA", "REJCNF_PA", "REJTRT_PA",
                    "RETXDATE_PA", "TRTREJ1Y_PA", "TRTREJ6M_PA", "TX_PROCEDUR_TY_PA", "TX_TYPE", "VASC_MGMT",
                    "VEN_EXT_GRF", 'Kidney_Pancreas_PRA', 'Kidney_Malig_Followup', 'Kidney_Followup', 
                   ]

kidpan_duplicate_columns = ['DGN_TCR', 'AGE_BIN', 'INIT_AGE_BIN', 'INIT_AGE', 'AGE_BIN_DON', 'AGE_BIN_REC']

# other unwanted columns (e.g. PTIME measures time until death while GTIME_KI measures time until kidney failure)
kidpan_other_to_delete = ["PTIME", "PSTATUS", "PX_STAT", "PX_STAT_DATE", "PT_CODE", 'END_OPO_CTR_CODE', 'CTR_CODE',
                         'RECOV_FACILITY_CODE', 'LISTING_CTR_CODE', 'VAL_DT_TCR', 'VAL_DT_TRR', 'VAL_DT_LDR',
                          'Kidney_Pancreas_WL_History', 'Kidney_Pancreas_Immuno_Followup', 'Kidney_Pancreas_HLA',
                          'Kidney_Pancreas_Immuno_Discharge', '	INIT_OPO_CTR_CODE', 
                         ]

kidpan_unwanted_columns = list(set(kidpan_post_transplant_columns + kidpan_at_transplant_columns +
                                   kidpan_irrelevant_columns + kidpan_pancreas_columns + 
                                   kidpan_duplicate_columns + kidpan_other_to_delete))

#### Unwanted columns from the Living_Donor table

In [24]:
living_other_organ_columns = ["ARRHYTHMIA", "ARRHYTHMIA_POSTOP", "BILIARY_COMP", "BILIARY_COMP_GRADE", "BIOPSY_LI",
                             "INTRAOP_COMP", "INTRAOP_COMP_REASON", "LI_PROC_TY", "LIVER_RECOV", "LU_COMP",
                              "LU_COMP_REASON", "LU_PROC_TY", "LUNG_RECOV", "SACRIFICE_LOBE", "THORAC_TUBES",
                              
                             ]
living_post_transplant_columns = ["BP_POSTOP_DIAST", "BP_POSTOP_SYST", "COD", "DEATH_DT", "FFP_UNITS", 
                                  "HYPERTENSION", "INIT_DISCHARGE_DT", "KI_CREAT_POSTOP", "KIDNEY_RECOV", 
                                  "OTH_COMP_KI", "OTH_COMP_KI_INTER", "OTH_COMP_LI", "OTH_COMP_LI_INTER",
                                  "OTH_INTER_PROC_KI", "OTH_INTER_PROC_KI_DT", "OTH_INTER_PROC_LI",
                                  "OTH_INTER_PROC_LI_DT", "PLATELETS_UNITS", "POSTOP_ALBUM", "POSTOP_ALK_PHOS",
                                  "POSTOP_BILI", "POSTOP_CREAT_LI", "POSTOP_INR", "POSTOP_SGOT_AST",
                                  "POSTOP_SGPT_ALT", "POSTOP_TEST_DT", "POSTOP_URINE_PROTEIN",
                                  "POSTOP_URINE_RATIO", "PRBC_UNITS", "REOP_BILIARY", "REOP_BILIARY_DT", 
                                  "REOP_BLEED_KI", "REOP_BLEED_KI_DT", "REOP_BLEED_LI", "REOP_BLEED_LI_DT",
                                  "REOP_BOWEL_KI", "REOP_BOWEL_KI_DT", "REOP_BOWEL_LI", "REOP_BOWEL_LI_DT", 
                                  "REOP_HERNIA_KI", "REOP_HERNIA_KI_DT", "REOP_HERNIA_LI", "REOP_HERNIA_LI_DT", 
                                  "REOP_LI_FAIL", "REOP_LI_FAIL_DT", "REOP_OTH_KI", "REOP_OTH_KI_DT",
                                  "REOP_OTH_LI", "REOP_OTH_LI_DT", "REOP_VASC_KI", "REOP_VASC_KI_DT", 
                                  "REOP_VASC_LI", "REOP_VASC_LI_DT", "REOPERATION_KI", "REOPERATION_LI",
                                  "VASC_COMP_KI", "VASC_COMP_KI_INTER", "VASC_COMP_LI", "VASC_COMP_LI_INTER",
                                  "WGT_KG"
                                 ]

living_at_transplant = ["CONVERT_OPEN_KI", "CONVERT_OPEN_LU", "KI_PROC_TY", 'ORG_RECOVERY_DT' ]

living_other_to_delete = ["DONOR_ID", "STATUS_LDR",'Living_Donor_Follow',  ]

living_unwanted_columns = list(set(living_other_organ_columns + living_post_transplant_columns +
                                   living_at_transplant + living_other_to_delete))

#### Deleting the columns identified as unwanted

In [25]:
new_columns = [x for x in reduced_columns if x not in kidpan_unwanted_columns and 
               x not in living_unwanted_columns]
len(new_columns)

183

In [26]:
kidpan_living = kidpan_living[new_columns]
kidpan_living

Unnamed: 0,WL_ORG,NUM_PREV_TX,A1,A2,B1,B2,DR1,DR2,GENDER_REC,ABO_REC,WGT_KG_TCR,HGT_CM_TCR,BMI_TCR,CITIZENSHIP_REC,PERM_STATE,FUNC_STAT_TCR,INIT_WGT_KG,INIT_HGT_CM,REM_CD,DAYSWAIT_CHRON,END_STAT,END_DATE,INIT_DATE,ETHNICITY,ETHCAT,INIT_BMI_CALC,END_BMI_CALC,REGION_REC,BW4,BW6,C1,C2,DR51,DR51_2,DR52,DR52_2,DR53,DR53_2,DQ1,DQ2,DATA_WAITLIST,OPO_CTR_CODE,INIT_OPO_CTR_CODE,DONATION,ON_DIALYSIS,INIT_STAT,EXH_PERIT_ACCESS,EXH_VASC_ACCESS,PREV_TX,PREV_KI_TX,MALIG_TRR,TX_DATE,PRE_TX_TXFUS,TXKID,DON_RETYP,DA1,DA2,DB1,DB2,DDR1,DDR2,RA1,RA2,RB1,RB2,RDR1,RDR2,AMIS,BMIS,DRMIS,HLAMIS,NPKID,HBV_CORE_DON,HBV_SUR_ANTIGEN_DON,ETHCAT_DON,CITIZENSHIP_DON,ABO_DON,GENDER_DON,HOME_STATE_DON,CANCER_SITE_DON,HIST_CIG_DON,HIST_HYPERTENS_DON,HIST_CANCER_DON,DIABETES_DON,HGT_CM_DON_CALC,WGT_KG_DON_CALC,BMI_DON_CALC,END_STAT_KI,DIAL_DATE,ABO_MAT,AGE,DISTANCE,DIAG_KI,DWFG_KI,GTIME_KI,GSTATUS_KI,DAYSWAIT_CHRON_KI,ORGAN,CMV_IGG_REC,CMV_IGM_REC,HBV_CORE_REC,HBV_SUR_ANTIGEN_REC,HCV_SEROSTATUS,HIV_SEROSTATUS,PREV_TX_ANY,SHARE_TY,PAYBACK,AGE_GROUP,MALIG,HGT_CM_CALC,WGT_KG_CALC,BMI_CALC,LT_ONE_WEEK_DON,EDUCATION_REC,DIAB,DRUGTRT_COPD,PERIP_VASC,MALIG_TCR_KI,PRI_PAYMENT_TCR_KI,CREAT_TRR,HAPLO_TY_MATCH_DON,CMV_TEST_DON,HBV_TEST_DON,HCV_TEST_DON,HCV_RIBA_DON,HCV_ANTIBODY_DON,LIV_DON_TY,EBV_SEROSTATUS,TOT_SERUM_ALBUM,EBV_TEST_DON,HCV_RNA_DON,EDUCATION_DON,HBV_DNA_DON,CMV_NUCLEIC_DON,CMV_IGG_DON,CMV_IGM_DON,WORK_INCOME_TCR,EBV_IGG_DON,EBV_IGM_DON,WT_QUAL_DATE,DAYSWAIT_ALLOC,AGE_DON,DBW6,DDQ1,DON_DATE,DON_ORG,HCV_ANTIBODY,HCV_RIBA,HOME_STATE,REGION_DON,VIRUSES_TESTED,DBW4,DC1,DDR52,DC2,DDQ2,DDR53,DDP1,DDP2,DDR51,CMV_NUCLEIC,DIABETES,FUNC_STAT,HBV_DNA,HCV_RNA,HEALTH_INS,HIST_CANCER,HIST_CIG,HIST_HYPER,MARITAL_STAT,NON_AUTO_BLOOD,PHYSICAL_CAPACITY,PREDON_HGT,PREDON_WGT,PREOP_URINE_PROTEIN,READMISSION_KI,TOBACCO_USE,WORK_INCOME,BP_PREOP_DIAST,BP_PREOP_SYST,EBV_IGG,EBV_IGM,KI_CREAT_PREOP
0,KI,0.0,3.0,25.0,7.0,27.0,1.0,13.0,F,A,60.0000,160.00,23.4375,1.0,MI,1.0,60.0000,160.00,15.0,247.0,4010.0,{'$date': '1994-11-02T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},0,1,23.4,23.4,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Y,Unknown,9331,N,N,4010.0,U,U,N,N,U,{'$date': '1994-11-02T00:00:00Z'},N,L,N,1.0,25.0,7.0,27.0,1.0,13.0,3.0,25.0,7.0,27.0,1.0,13.0,1.0,0.0,0.0,1.0,0.0,ND,N,1,1.0,O,F,MI,,,,,,,,,4010.0,,2.0,43.0,0.0,3011.0,N,5219.0,1.0,247.0,KI,U,U,ND,N,N,ND,N,3.0,N,A,U,167.0,56.0,20.1,N,,,,,,,6.7,3.0,Y,Y,Y,ND,N,4.0,,,,,,,,,,,,,,,44.0,95.0,2.0,{'$date': '1994-11-02T00:00:00.000Z'},LKI,N,ND,MI,10.0,Y,95.0,1.0,0.0,7.0,6.0,95.0,99.0,99.0,0.0,,,,,,,,,,,,,,,,,,,,,,,
1,KI,0.0,2.0,11.0,62.0,0.0,4.0,13.0,F,O,61.0000,163.00,22.9591,1.0,DE,998.0,61.0000,163.00,15.0,37.0,4010.0,{'$date': '1994-04-06T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},0,1,23.0,23.0,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Y,Unknown,14353,N,N,4010.0,U,U,N,N,U,{'$date': '1994-04-06T00:00:00Z'},Y,L,N,11.0,2.0,62.0,51.0,13.0,3.0,11.0,2.0,62.0,97.0,13.0,4.0,0.0,1.0,1.0,2.0,0.0,N,N,1,1.0,O,M,DE,,,,,,,,,4010.0,{'$date': '1991-04-01T00:00:00Z'},1.0,45.0,0.0,3011.0,N,1459.0,0.0,37.0,KI,U,U,N,N,N,ND,N,3.0,N,A,U,162.0,61.2,23.3,N,,,,,,,,3.0,Y,Y,Y,ND,N,1.0,,,,,,,,,,,,,,,66.0,95.0,1.0,{'$date': '1994-04-06T00:00:00.000Z'},LKI,N,ND,DE,2.0,Y,95.0,3.0,95.0,1.0,2.0,96.0,99.0,99.0,99.0,,,,,,,,,,,,,,,,,,,,,,,
2,KI,0.0,2.0,26.0,7.0,27.0,0.0,0.0,M,O,85.0000,175.00,27.7551,1.0,CA,998.0,85.0000,175.00,15.0,689.0,4010.0,{'$date': '1996-01-18T00:00:00Z'},{'$date': '1994-02-28T00:00:00Z'},1,4,27.8,27.8,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Y,Unknown,20243,,,4010.0,U,U,N,N,U,{'$date': '1996-01-18T00:00:00Z'},Y,L,N,2.0,30.0,16.0,70.0,4.0,8.0,2.0,26.0,7.0,27.0,2.0,6.0,1.0,2.0,2.0,5.0,0.0,N,N,4,2.0,O,F,CA,,,,,,,,,4010.0,{'$date': '1993-10-01T00:00:00Z'},1.0,43.0,0.0,3040.0,N,5212.0,1.0,689.0,KI,U,U,N,N,N,,N,3.0,N,A,U,175.0,79.0,25.8,N,,,,,,,,1.0,Y,Y,Y,ND,N,6.0,,,,,,,,,,,,,,,19.0,95.0,4.0,{'$date': '1996-01-18T00:00:00.000Z'},LKI,N,ND,CA,5.0,Y,96.0,97.0,95.0,97.0,7.0,95.0,99.0,99.0,96.0,,,,,,,,,,,,,,,,,,,,,,,
3,KI,0.0,2.0,30.0,13.0,46.0,9.0,12.0,F,B,46.0000,168.00,16.2982,1.0,WI,998.0,46.0000,168.00,15.0,548.0,4010.0,{'$date': '1995-08-31T00:00:00Z'},{'$date': '1994-03-01T00:00:00Z'},0,5,16.3,16.3,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Y,Unknown,25172,,,4010.0,U,U,N,N,U,{'$date': '1995-08-31T00:00:00Z'},Y,L,N,2.0,97.0,46.0,97.0,8.0,9.0,2.0,30.0,13.0,46.0,9.0,12.0,0.0,0.0,1.0,1.0,0.0,ND,N,5,1.0,B,M,WI,,,,,,,,,4010.0,{'$date': '1993-11-01T00:00:00Z'},1.0,22.0,0.0,3041.0,N,2813.0,1.0,548.0,KI,U,U,ND,N,,ND,N,3.0,N,A,U,167.0,43.0,15.4,N,,,,,,,,3.0,Y,Y,Y,ND,N,1.0,,,,,,,,,,,,,,,47.0,95.0,6.0,{'$date': '1995-08-31T00:00:00.000Z'},LKI,N,ND,WI,7.0,Y,95.0,1.0,96.0,3.0,3.0,95.0,99.0,99.0,99.0,,,,,,,,,,,,,,,,,,,,,,,
4,KI,0.0,2.0,3.0,27.0,35.0,4.0,7.0,M,O,102.0000,185.00,29.8028,1.0,WI,1.0,102.0000,185.00,15.0,1738.0,4999.0,{'$date': '1998-12-03T00:00:00Z'},{'$date': '1994-03-01T00:00:00Z'},0,1,29.8,29.8,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Y,Unknown,25172,N,N,4010.0,U,U,N,N,U,{'$date': '1998-12-03T00:00:00Z'},Y,L,N,2.0,3.0,8.0,50.0,7.0,17.0,2.0,3.0,27.0,35.0,4.0,7.0,0.0,2.0,1.0,3.0,0.0,ND,N,1,1.0,O,F,WI,,,,,,,,,4999.0,{'$date': '1998-06-01T00:00:00Z'},1.0,53.0,0.0,3012.0,N,1947.0,1.0,1738.0,KI,U,U,ND,N,N,,N,3.0,N,A,U,185.0,102.0,29.8,N,,,,,,,,1.0,Y,Y,Y,ND,N,7.0,,,,,,,,,,,,,,,51.0,95.0,2.0,{'$date': '1998-12-03T00:00:00.000Z'},LKI,N,ND,WI,7.0,Y,96.0,99.0,95.0,99.0,97.0,95.0,99.0,99.0,96.0,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174376,KI,0.0,2.0,24.0,7.0,51.0,8.0,11.0,M,A,64.8640,172.72,21.7430,1.0,TX,2090.0,64.8640,172.72,15.0,7.0,4099.0,{'$date': '2022-09-28T00:00:00Z'},{'$date': '2022-09-21T00:00:00Z'},1,4,21.7,21.7,4,95.0,95.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Y,Unknown,11377,N,N,4099.0,N,N,N,N,,{'$date': '2022-09-28T00:00:00Z'},,L,,,,,,,,2.0,24.0,7.0,51.0,8.0,11.0,,,,,0.0,,,4,,O,F,,,,,,,,,,4099.0,,2.0,76.0,0.0,3070.0,N,,0.0,7.0,KI,,,,,,,N,3.0,N,A,U,172.7,64.9,21.7,N,6.0,3.0,,Y,N,1.0,,,,,,,,,,4.5,,,,,,,,N,,,{'$date': '2022-09-21T00:00:00Z'},7.0,66.0,,,{'$date': '2022-09-28T00:00:00.000Z'},LKI,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174377,KI,0.0,2.0,30.0,45.0,53.0,8.0,13.0,M,O,73.0283,172.72,24.4797,,MD,,73.0283,172.72,15.0,6.0,4010.0,{'$date': '2022-09-27T00:00:00Z'},{'$date': '2022-09-21T00:00:00Z'},0,2,24.5,24.5,2,95.0,95.0,4.0,16.0,96.0,96.0,2.0,96.0,96.0,96.0,202.0,501.0,Y,Unknown,14353,N,Y,4010.0,,,N,N,,{'$date': '2022-09-27T00:00:00Z'},,L,,,,,,,,2.0,30.0,45.0,53.0,8.0,13.0,,,,,0.0,,,2,,O,M,,,,,,,,,,4010.0,,1.0,61.0,0.0,,N,,0.0,6.0,KI,,,,,,,N,3.0,N,A,U,172.7,73.0,24.5,N,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2020-04-02T00:00:00Z'},908.0,27.0,,,{'$date': '2022-09-27T00:00:00.000Z'},LKI,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174378,KI,0.0,2.0,3.0,65.0,27.0,8.0,13.0,F,A,102.5120,154.94,42.7019,,IL,,102.5120,154.94,15.0,7.0,4010.0,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-22T00:00:00Z'},0,1,42.7,42.7,7,95.0,95.0,1.0,8.0,96.0,0.0,95.0,0.0,96.0,0.0,7.0,4.0,Y,Unknown,23002,N,Y,4010.0,,,N,N,,{'$date': '2022-09-29T00:00:00Z'},,L,,,,,,,,2.0,3.0,65.0,27.0,8.0,13.0,,,,,0.0,,,1,,A,F,,,,,,,,,,4010.0,,1.0,67.0,0.0,,N,,0.0,7.0,KI,,,,,,,N,3.0,N,A,U,154.9,102.5,42.7,N,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,{'$date': '2021-09-09T00:00:00Z'},385.0,60.0,,,{'$date': '2022-09-29T00:00:00.000Z'},LKI,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
174379,KI,0.0,2.0,3.0,18.0,48.0,4.0,15.0,M,O,64.1000,167.64,22.8088,,TX,,64.1000,167.64,15.0,3.0,4010.0,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-26T00:00:00Z'},1,4,22.8,22.8,4,96.0,95.0,8.0,12.0,95.0,96.0,96.0,96.0,95.0,96.0,8.0,6.0,Y,Unknown,11377,N,N,4010.0,,,N,N,,{'$date': '2022-09-29T00:00:00Z'},,L,,,,,,,,2.0,3.0,18.0,48.0,4.0,15.0,,,,,0.0,,,1,,O,F,,,,,,,,,,4010.0,,1.0,67.0,0.0,,N,,0.0,3.0,KI,,,,,,,Y,3.0,N,A,U,167.6,64.1,22.8,N,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,44.0,,,{'$date': '2022-09-29T00:00:00.000Z'},LKI,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


#### Checking the % of NaN in the remaining columns 

In [27]:
NaN_count_df = pd.DataFrame(columns=["Feature", "Percentage of NaN"])
for col in kidpan_living.columns.to_list():
    totalRows = kidpan_living.shape[0]
    count = kidpan_living[col].isna().sum()
    percentage = count / totalRows * 100
    new_row = {"Feature": col, "Percentage of NaN": percentage}
    NaN_count_df = pd.concat([NaN_count_df, pd.DataFrame([new_row])])
    
NaN_count_df = NaN_count_df.sort_values(by="Percentage of NaN")
NaN_count_df = NaN_count_df.reset_index(drop=True)
NaN_count_df[0:220]

Unnamed: 0,Feature,Percentage of NaN
0,GENDER_REC,0.0
1,ABO_REC,0.0
2,LT_ONE_WEEK_DON,0.0
3,ETHCAT,0.0
4,REGION_REC,0.0
5,DON_ORG,0.0
6,NPKID,0.0
7,TX_DATE,0.0
8,ETHCAT_DON,0.0
9,OPO_CTR_CODE,0.0


# Categorical and Numerical values

#### Checking if columns are correctly identified as categorical or numerical

In [28]:
data_type_df = pd.DataFrame({ 'nunique': kidpan_living.nunique(), 'dtype': kidpan_living.dtypes})
data_type_df = data_type_df.sort_values(by='nunique', ascending=True)
data_type_df = data_type_df.reset_index()
data_type_df = data_type_df.rename(columns={'index': 'feature'})
data_type_df

Unnamed: 0,feature,nunique,dtype
0,REM_CD,1,float64
1,SHARE_TY,1,float64
2,PAYBACK,1,object
3,AGE_GROUP,2,object
4,LT_ONE_WEEK_DON,2,object
5,ETHNICITY,2,int64
6,PREV_TX_ANY,2,object
7,HIST_CIG_DON,2,object
8,DWFG_KI,2,object
9,GENDER_DON,2,object


In [29]:
sorted_columns = data_type_df['feature'].to_list()
kidpan_living.reindex(columns=sorted_columns)

Unnamed: 0,REM_CD,SHARE_TY,PAYBACK,AGE_GROUP,LT_ONE_WEEK_DON,ETHNICITY,PREV_TX_ANY,HIST_CIG_DON,DWFG_KI,GENDER_DON,GSTATUS_KI,OPO_CTR_CODE,DATA_WAITLIST,DONATION,HIST_CIG,NON_AUTO_BLOOD,DON_RETYP,PREV_TX,PREV_KI_TX,GENDER_REC,TXKID,DON_ORG,MALIG_TCR_KI,HCV_TEST_DON,EBV_TEST_DON,DRUGTRT_COPD,HIST_HYPERTENS_DON,ORGAN,MALIG,CMV_TEST_DON,PERIP_VASC,TOBACCO_USE,WORK_INCOME_TCR,HEALTH_INS,HBV_TEST_DON,AMIS,DRMIS,DIABETES,ON_DIALYSIS,ABO_MAT,DIABETES_DON,HIST_CANCER_DON,BMIS,PRE_TX_TXFUS,MALIG_TRR,EXH_PERIT_ACCESS,EXH_VASC_ACCESS,VIRUSES_TESTED,READMISSION_KI,WORK_INCOME,WL_ORG,PREOP_URINE_PROTEIN,HBV_DNA,EBV_SEROSTATUS,HCV_RNA_DON,CMV_NUCLEIC_DON,HBV_DNA_DON,CMV_IGM_DON,CMV_IGG_DON,HCV_RNA,HCV_RIBA_DON,HBV_SUR_ANTIGEN_DON,CMV_NUCLEIC,HBV_CORE_DON,HBV_SUR_ANTIGEN_REC,HCV_SEROSTATUS,HCV_ANTIBODY_DON,PHYSICAL_CAPACITY,HIV_SEROSTATUS,HCV_RIBA,EBV_IGG_DON,EBV_IGG,HCV_ANTIBODY,CMV_IGM_REC,EBV_IGM,EBV_IGM_DON,HBV_CORE_REC,CMV_IGG_REC,BW6,NPKID,BW4,DBW6,DBW4,HIST_HYPER,DIAB,CITIZENSHIP_DON,DDR53,DR53_2,CITIZENSHIP_REC,MARITAL_STAT,HLAMIS,END_STAT_KI,EDUCATION_DON,ETHCAT,ETHCAT_DON,EDUCATION_REC,DR53,HAPLO_TY_MATCH_DON,ABO_REC,DDR51,ABO_DON,DR51_2,END_STAT,DR51,INIT_STAT,DR52,DR52_2,REGION_DON,REGION_REC,NUM_PREV_TX,DDR52,PRI_PAYMENT_TCR_KI,FUNC_STAT,LIV_DON_TY,FUNC_STAT_TCR,DQ1,DQ2,HIST_CANCER,CANCER_SITE_DON,DDQ1,DDQ2,C1,DC1,C2,A1,A2,DA1,DA2,DC2,PERM_STATE,RA2,DDP1,HOME_STATE,HOME_STATE_DON,RA1,DR1,DDR1,DR2,DDR2,RDR1,RDR2,AGE_DON,DIAG_KI,AGE,B2,TOT_SERUM_ALBUM,DB1,B1,DB2,RB2,RB1,BP_PREOP_DIAST,DDP2,BP_PREOP_SYST,INIT_OPO_CTR_CODE,KI_CREAT_PREOP,END_BMI_CALC,BMI_CALC,INIT_BMI_CALC,HGT_CM_DON_CALC,PREDON_HGT,WGT_KG_DON_CALC,HGT_CM_CALC,DISTANCE,WGT_KG_CALC,INIT_HGT_CM,HGT_CM_TCR,CREAT_TRR,DAYSWAIT_CHRON_KI,DAYSWAIT_CHRON,DAYSWAIT_ALLOC,PREDON_WGT,INIT_WGT_KG,WT_QUAL_DATE,END_DATE,WGT_KG_TCR,TX_DATE,DON_DATE,INIT_DATE,GTIME_KI,DIAL_DATE,BMI_DON_CALC,BMI_TCR
0,15.0,3.0,N,A,N,0,N,,N,F,1.0,Unknown,Y,N,,,N,N,N,F,L,LKI,,Y,,,,KI,U,Y,,,,,Y,1.0,0.0,,N,2.0,,,0.0,N,U,U,U,Y,,,KI,,,,,,,,,,ND,N,,ND,N,N,N,,ND,ND,,,N,U,,,ND,U,0.0,0.0,0.0,95.0,95.0,,,1.0,95.0,0.0,1.0,,1.0,4010.0,,1,1,,0.0,3.0,A,0.0,O,0.0,4010.0,0.0,4010.0,0.0,0.0,10.0,10,0.0,0.0,,,4.0,1.0,0.0,0.0,,,2.0,6.0,0.0,1.0,0.0,3.0,25.0,1.0,25.0,7.0,MI,25.0,99.0,MI,MI,3.0,1.0,1.0,13.0,13.0,1.0,13.0,44.0,3011.0,43.0,27.0,,7.0,7.0,27.0,27.0,7.0,,99.0,,9331,,23.4,20.1,23.4,,,,167.0,0.0,56.0,160.00,160.00,6.7,247.0,247.0,,,60.0000,,{'$date': '1994-11-02T00:00:00Z'},60.0000,{'$date': '1994-11-02T00:00:00Z'},{'$date': '1994-11-02T00:00:00.000Z'},{'$date': '1994-02-28T00:00:00Z'},5219.0,,,23.4375
1,15.0,3.0,N,A,N,0,N,,N,M,0.0,Unknown,Y,N,,,N,N,N,F,L,LKI,,Y,,,,KI,U,Y,,,,,Y,0.0,1.0,,N,1.0,,,1.0,Y,U,U,U,Y,,,KI,,,,,,,,,,ND,N,,N,N,N,N,,ND,ND,,,N,U,,,N,U,0.0,0.0,0.0,95.0,95.0,,,1.0,96.0,0.0,1.0,,2.0,4010.0,,1,1,,0.0,3.0,O,99.0,O,0.0,4010.0,0.0,4010.0,0.0,0.0,2.0,2,0.0,95.0,,,1.0,998.0,0.0,0.0,,,1.0,2.0,0.0,3.0,0.0,2.0,11.0,11.0,2.0,1.0,DE,2.0,99.0,DE,DE,11.0,4.0,13.0,13.0,3.0,13.0,4.0,66.0,3011.0,45.0,0.0,,62.0,62.0,51.0,97.0,62.0,,99.0,,14353,,23.0,23.3,23.0,,,,162.0,0.0,61.2,163.00,163.00,,37.0,37.0,,,61.0000,,{'$date': '1994-04-06T00:00:00Z'},61.0000,{'$date': '1994-04-06T00:00:00Z'},{'$date': '1994-04-06T00:00:00.000Z'},{'$date': '1994-02-28T00:00:00Z'},1459.0,{'$date': '1991-04-01T00:00:00Z'},,22.9591
2,15.0,3.0,N,A,N,1,N,,N,F,1.0,Unknown,Y,,,,N,N,N,M,L,LKI,,Y,,,,KI,U,Y,,,,,Y,1.0,2.0,,,1.0,,,2.0,Y,U,U,U,Y,,,KI,,,,,,,,,,ND,N,,N,N,N,N,,,ND,,,N,U,,,N,U,0.0,0.0,0.0,95.0,96.0,,,2.0,95.0,0.0,1.0,,5.0,4010.0,,4,4,,0.0,1.0,O,96.0,O,0.0,4010.0,0.0,4010.0,0.0,0.0,5.0,5,0.0,95.0,,,6.0,998.0,0.0,0.0,,,4.0,7.0,0.0,97.0,0.0,2.0,26.0,2.0,30.0,97.0,CA,26.0,99.0,CA,CA,2.0,0.0,4.0,0.0,8.0,2.0,6.0,19.0,3040.0,43.0,27.0,,16.0,7.0,70.0,27.0,7.0,,99.0,,20243,,27.8,25.8,27.8,,,,175.0,0.0,79.0,175.00,175.00,,689.0,689.0,,,85.0000,,{'$date': '1996-01-18T00:00:00Z'},85.0000,{'$date': '1996-01-18T00:00:00Z'},{'$date': '1996-01-18T00:00:00.000Z'},{'$date': '1994-02-28T00:00:00Z'},5212.0,{'$date': '1993-10-01T00:00:00Z'},,27.7551
3,15.0,3.0,N,A,N,0,N,,N,M,1.0,Unknown,Y,,,,N,N,N,F,L,LKI,,Y,,,,KI,U,Y,,,,,Y,0.0,1.0,,,1.0,,,0.0,Y,U,U,U,Y,,,KI,,,,,,,,,,ND,N,,ND,N,,N,,ND,ND,,,N,U,,,ND,U,0.0,0.0,0.0,95.0,95.0,,,1.0,95.0,0.0,1.0,,1.0,4010.0,,5,5,,0.0,3.0,B,99.0,B,0.0,4010.0,0.0,4010.0,0.0,0.0,7.0,7,0.0,96.0,,,1.0,998.0,0.0,0.0,,,6.0,3.0,0.0,1.0,0.0,2.0,30.0,2.0,97.0,3.0,WI,30.0,99.0,WI,WI,2.0,9.0,8.0,12.0,9.0,9.0,12.0,47.0,3041.0,22.0,46.0,,46.0,13.0,97.0,46.0,13.0,,99.0,,25172,,16.3,15.4,16.3,,,,167.0,0.0,43.0,168.00,168.00,,548.0,548.0,,,46.0000,,{'$date': '1995-08-31T00:00:00Z'},46.0000,{'$date': '1995-08-31T00:00:00Z'},{'$date': '1995-08-31T00:00:00.000Z'},{'$date': '1994-03-01T00:00:00Z'},2813.0,{'$date': '1993-11-01T00:00:00Z'},,16.2982
4,15.0,3.0,N,A,N,0,N,,N,F,1.0,Unknown,Y,N,,,N,N,N,M,L,LKI,,Y,,,,KI,U,Y,,,,,Y,0.0,1.0,,N,1.0,,,2.0,Y,U,U,U,Y,,,KI,,,,,,,,,,ND,N,,ND,N,N,N,,,ND,,,N,U,,,ND,U,0.0,0.0,0.0,95.0,96.0,,,1.0,95.0,0.0,1.0,,3.0,4999.0,,1,1,,0.0,1.0,O,96.0,O,0.0,4999.0,0.0,4010.0,0.0,0.0,7.0,7,0.0,95.0,,,7.0,1.0,0.0,0.0,,,2.0,97.0,0.0,99.0,0.0,2.0,3.0,2.0,3.0,99.0,WI,3.0,99.0,WI,WI,2.0,4.0,7.0,7.0,17.0,4.0,7.0,51.0,3012.0,53.0,35.0,,8.0,27.0,50.0,35.0,27.0,,99.0,,25172,,29.8,29.8,29.8,,,,185.0,0.0,102.0,185.00,185.00,,1738.0,1738.0,,,102.0000,,{'$date': '1998-12-03T00:00:00Z'},102.0000,{'$date': '1998-12-03T00:00:00Z'},{'$date': '1998-12-03T00:00:00.000Z'},{'$date': '1994-03-01T00:00:00Z'},1947.0,{'$date': '1998-06-01T00:00:00Z'},,29.8028
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174376,15.0,3.0,N,A,N,1,N,,N,F,0.0,Unknown,Y,N,,,,N,N,M,L,LKI,N,,,,,KI,U,,Y,,N,,,,,,N,2.0,,,,,,N,N,,,,KI,,,,,,,,,,,,,,,,,,,,,,,,,,,,95.0,0.0,95.0,,,,3.0,,,0.0,1.0,,,4099.0,,4,4,6.0,0.0,,A,,O,0.0,4099.0,0.0,4099.0,0.0,0.0,4.0,4,0.0,,1.0,,,2090.0,0.0,0.0,,,,,0.0,,0.0,2.0,24.0,,,,TX,24.0,,,,2.0,8.0,,11.0,,8.0,11.0,66.0,3070.0,76.0,51.0,4.5,,7.0,,51.0,7.0,,,,11377,,21.7,21.7,21.7,,,,172.7,0.0,64.9,172.72,172.72,,7.0,7.0,7.0,,64.8640,{'$date': '2022-09-21T00:00:00Z'},{'$date': '2022-09-28T00:00:00Z'},64.8640,{'$date': '2022-09-28T00:00:00Z'},{'$date': '2022-09-28T00:00:00.000Z'},{'$date': '2022-09-21T00:00:00Z'},,,,21.7430
174377,15.0,3.0,N,A,N,0,N,,N,M,0.0,Unknown,Y,N,,,,N,N,M,L,LKI,,,,,,KI,U,,,,,,,,,,Y,1.0,,,,,,,,,,,KI,,,,,,,,,,,,,,,,,,,,,,,,,,,,95.0,0.0,95.0,,,,1.0,,,96.0,,,,4010.0,,2,2,,96.0,,O,,O,96.0,4010.0,96.0,4010.0,2.0,96.0,2.0,2,0.0,,,,,,202.0,501.0,,,,,4.0,,16.0,2.0,30.0,,,,MD,30.0,,,,2.0,8.0,,13.0,,8.0,13.0,27.0,,61.0,53.0,,,45.0,,53.0,45.0,,,,14353,,24.5,24.5,24.5,,,,172.7,0.0,73.0,172.72,172.72,,6.0,6.0,908.0,,73.0283,{'$date': '2020-04-02T00:00:00Z'},{'$date': '2022-09-27T00:00:00Z'},73.0283,{'$date': '2022-09-27T00:00:00Z'},{'$date': '2022-09-27T00:00:00.000Z'},{'$date': '2022-09-21T00:00:00Z'},,,,24.4797
174378,15.0,3.0,N,A,N,0,N,,N,F,0.0,Unknown,Y,N,,,,N,N,F,L,LKI,,,,,,KI,U,,,,,,,,,,Y,1.0,,,,,,,,,,,KI,,,,,,,,,,,,,,,,,,,,,,,,,,,,95.0,0.0,95.0,,,,3.0,,,0.0,,,,4010.0,,1,1,,96.0,,A,,A,0.0,4010.0,96.0,4010.0,95.0,0.0,7.0,7,0.0,,,,,,7.0,4.0,,,,,1.0,,8.0,2.0,3.0,,,,IL,3.0,,,,2.0,8.0,,13.0,,8.0,13.0,60.0,,67.0,27.0,,,65.0,,27.0,65.0,,,,23002,,42.7,42.7,42.7,,,,154.9,0.0,102.5,154.94,154.94,,7.0,7.0,385.0,,102.5120,{'$date': '2021-09-09T00:00:00Z'},{'$date': '2022-09-29T00:00:00Z'},102.5120,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-29T00:00:00.000Z'},{'$date': '2022-09-22T00:00:00Z'},,,,42.7019
174379,15.0,3.0,N,A,N,1,Y,,N,F,0.0,Unknown,Y,N,,,,N,N,M,L,LKI,,,,,,KI,U,,,,,,,,,,N,1.0,,,,,,,,,,,KI,,,,,,,,,,,,,,,,,,,,,,,,,,,,95.0,0.0,96.0,,,,3.0,,,96.0,,,,4010.0,,4,1,,95.0,,O,,O,96.0,4010.0,95.0,4010.0,96.0,96.0,4.0,4,0.0,,,,,,8.0,6.0,,,,,8.0,,12.0,2.0,3.0,,,,TX,3.0,,,,2.0,4.0,,15.0,,4.0,15.0,44.0,,67.0,48.0,,,18.0,,48.0,18.0,,,,11377,,22.8,22.8,22.8,,,,167.6,0.0,64.1,167.64,167.64,,3.0,3.0,,,64.1000,,{'$date': '2022-09-29T00:00:00Z'},64.1000,{'$date': '2022-09-29T00:00:00Z'},{'$date': '2022-09-29T00:00:00.000Z'},{'$date': '2022-09-26T00:00:00Z'},,,,22.8088


#### No point in keeping columns with 1 unique value

In [30]:
kidpan_living = kidpan_living.drop(['SHARE_TY', 'PAYBACK', 'REM_CD'], axis=1)

#### Some columns are categorical but have numerical values representing the categories and are therefore incorrectly identified as numerical

In [31]:
categorical_incorrectly_identified_as_numerical = ['GSTATUS_KI', 'ETHNICITY', 'ABO_MAT',
                                                  'BW6', 'BW4', 'DBW6', 'DIAB', 'DBW4', 'HIST_HYPER',
                                                  'CITIZENSHIP_DON', 'DDR53', 'MARITAL_STAT', 'CITIZENSHIP_REC',
                                                  'DR53_2', 'HAPLO_TY_MATCH_DON', 'EDUCATION_DON', 'ETHCAT',
                                                   'EDUCATION_REC', 'DR53', 'END_STAT_KI', 'ETHCAT_DON', 'DDR51',
                                                   'DR51_2', 'DR51', 'REGION_DON', 'DR52_2', 'DDR52', 'REGION_REC',
                                                   'PRI_PAYMENT_TCR_KI', 'FUNC_STAT', 'LIV_DON_TY', 'FUNC_STAT_TCR',
                                                   'DQ1', 'DQ2', 'HIST_CANCER', 'CANCER_SITE_DON', 'DDQ2', 'DDQ1',
                                                   'C1', 'DC1', 'C2', 'A1', 'A2', 'DC2', 'DA1', 'DA2', 'RA2', 'DDP1',
                                                   'RA1', 'DDR1', 'DR1', 'DR2', 'DDR2', 'RDR1', 'RDR2', 'B2',
                                                   'DB1', 'B1', 'DB2', 'RB2', 'RB1', 'DDP2', 'DIAG_KI']
kidpan_living[categorical_incorrectly_identified_as_numerical] = kidpan_living[categorical_incorrectly_identified_as_numerical].astype('object')

In [32]:
# possibly drop these to prevent overcomplicanting, they are already included in HLAMIS
antigen_columns = ['BW6', 'BW4', 'DBW6', 'DBW4','DDR53','DR53_2','DR53','DDR51','DR51_2', 'DR51', 'DR52',
                   'DR52_2', 'DDR52','DQ1', 'DQ2', 'DDQ2', 'DDQ1', 'C1', 'DC1', 'C2', 'A1', 'A2', 'DC2',
                   'DA1', 'DA2', 'RA2', 'DDP1', 'RA1', 'DDR1', 'DR1', 'DR2', 'DDR2', 'RDR1', 'RDR2', 'B2',
                  'DB1', 'B1', 'DB2', 'RB2', 'RB1', 'DDP2', 
                  ]

In [33]:
kidpan_living = kidpan_living.drop(antigen_columns, axis=1)

#### Date columns are strings - transforming them into numerical by only using year

In [34]:
date_columns = ['WT_QUAL_DATE', 'END_DATE', 'TX_DATE', 'DON_DATE', 'INIT_DATE', 'DIAL_DATE']
for col in date_columns:
    years = []
    for value in kidpan_living[col]:
        if pd.notna(value):
            date_string = value.split("'")[3]
            year_string = date_string.split("-")[0]
            years.append(year_string)
        else:
            years.append(np.nan)
    kidpan_living[col] = years
    kidpan_living[col] =  pd.to_numeric(kidpan_living[col], errors='coerce').astype('float64')

In [35]:
kidpan_living[date_columns].dtypes

WT_QUAL_DATE    float64
END_DATE        float64
TX_DATE         float64
DON_DATE        float64
INIT_DATE       float64
DIAL_DATE       float64
dtype: object

In [36]:
kidpan_living

Unnamed: 0,WL_ORG,NUM_PREV_TX,GENDER_REC,ABO_REC,WGT_KG_TCR,HGT_CM_TCR,BMI_TCR,CITIZENSHIP_REC,PERM_STATE,FUNC_STAT_TCR,INIT_WGT_KG,INIT_HGT_CM,DAYSWAIT_CHRON,END_STAT,END_DATE,INIT_DATE,ETHNICITY,ETHCAT,INIT_BMI_CALC,END_BMI_CALC,REGION_REC,DATA_WAITLIST,OPO_CTR_CODE,INIT_OPO_CTR_CODE,DONATION,ON_DIALYSIS,INIT_STAT,EXH_PERIT_ACCESS,EXH_VASC_ACCESS,PREV_TX,PREV_KI_TX,MALIG_TRR,TX_DATE,PRE_TX_TXFUS,TXKID,DON_RETYP,AMIS,BMIS,DRMIS,HLAMIS,NPKID,HBV_CORE_DON,HBV_SUR_ANTIGEN_DON,ETHCAT_DON,CITIZENSHIP_DON,ABO_DON,GENDER_DON,HOME_STATE_DON,CANCER_SITE_DON,HIST_CIG_DON,HIST_HYPERTENS_DON,HIST_CANCER_DON,DIABETES_DON,HGT_CM_DON_CALC,WGT_KG_DON_CALC,BMI_DON_CALC,END_STAT_KI,DIAL_DATE,ABO_MAT,AGE,DISTANCE,DIAG_KI,DWFG_KI,GTIME_KI,GSTATUS_KI,DAYSWAIT_CHRON_KI,ORGAN,CMV_IGG_REC,CMV_IGM_REC,HBV_CORE_REC,HBV_SUR_ANTIGEN_REC,HCV_SEROSTATUS,HIV_SEROSTATUS,PREV_TX_ANY,AGE_GROUP,MALIG,HGT_CM_CALC,WGT_KG_CALC,BMI_CALC,LT_ONE_WEEK_DON,EDUCATION_REC,DIAB,DRUGTRT_COPD,PERIP_VASC,MALIG_TCR_KI,PRI_PAYMENT_TCR_KI,CREAT_TRR,HAPLO_TY_MATCH_DON,CMV_TEST_DON,HBV_TEST_DON,HCV_TEST_DON,HCV_RIBA_DON,HCV_ANTIBODY_DON,LIV_DON_TY,EBV_SEROSTATUS,TOT_SERUM_ALBUM,EBV_TEST_DON,HCV_RNA_DON,EDUCATION_DON,HBV_DNA_DON,CMV_NUCLEIC_DON,CMV_IGG_DON,CMV_IGM_DON,WORK_INCOME_TCR,EBV_IGG_DON,EBV_IGM_DON,WT_QUAL_DATE,DAYSWAIT_ALLOC,AGE_DON,DON_DATE,DON_ORG,HCV_ANTIBODY,HCV_RIBA,HOME_STATE,REGION_DON,VIRUSES_TESTED,CMV_NUCLEIC,DIABETES,FUNC_STAT,HBV_DNA,HCV_RNA,HEALTH_INS,HIST_CANCER,HIST_CIG,HIST_HYPER,MARITAL_STAT,NON_AUTO_BLOOD,PHYSICAL_CAPACITY,PREDON_HGT,PREDON_WGT,PREOP_URINE_PROTEIN,READMISSION_KI,TOBACCO_USE,WORK_INCOME,BP_PREOP_DIAST,BP_PREOP_SYST,EBV_IGG,EBV_IGM,KI_CREAT_PREOP
0,KI,0.0,F,A,60.0000,160.00,23.4375,1.0,MI,1.0,60.0000,160.00,247.0,4010.0,1994.0,1994.0,0,1,23.4,23.4,10,Y,Unknown,9331,N,N,4010.0,U,U,N,N,U,1994.0,N,L,N,1.0,0.0,0.0,1.0,0.0,ND,N,1,1.0,O,F,MI,,,,,,,,,4010.0,,2.0,43.0,0.0,3011.0,N,5219.0,1.0,247.0,KI,U,U,ND,N,N,ND,N,A,U,167.0,56.0,20.1,N,,,,,,,6.7,3.0,Y,Y,Y,ND,N,4.0,,,,,,,,,,,,,,,44.0,1994.0,LKI,N,ND,MI,10.0,Y,,,,,,,,,,,,,,,,,,,,,,,
1,KI,0.0,F,O,61.0000,163.00,22.9591,1.0,DE,998.0,61.0000,163.00,37.0,4010.0,1994.0,1994.0,0,1,23.0,23.0,2,Y,Unknown,14353,N,N,4010.0,U,U,N,N,U,1994.0,Y,L,N,0.0,1.0,1.0,2.0,0.0,N,N,1,1.0,O,M,DE,,,,,,,,,4010.0,1991.0,1.0,45.0,0.0,3011.0,N,1459.0,0.0,37.0,KI,U,U,N,N,N,ND,N,A,U,162.0,61.2,23.3,N,,,,,,,,3.0,Y,Y,Y,ND,N,1.0,,,,,,,,,,,,,,,66.0,1994.0,LKI,N,ND,DE,2.0,Y,,,,,,,,,,,,,,,,,,,,,,,
2,KI,0.0,M,O,85.0000,175.00,27.7551,1.0,CA,998.0,85.0000,175.00,689.0,4010.0,1996.0,1994.0,1,4,27.8,27.8,5,Y,Unknown,20243,,,4010.0,U,U,N,N,U,1996.0,Y,L,N,1.0,2.0,2.0,5.0,0.0,N,N,4,2.0,O,F,CA,,,,,,,,,4010.0,1993.0,1.0,43.0,0.0,3040.0,N,5212.0,1.0,689.0,KI,U,U,N,N,N,,N,A,U,175.0,79.0,25.8,N,,,,,,,,1.0,Y,Y,Y,ND,N,6.0,,,,,,,,,,,,,,,19.0,1996.0,LKI,N,ND,CA,5.0,Y,,,,,,,,,,,,,,,,,,,,,,,
3,KI,0.0,F,B,46.0000,168.00,16.2982,1.0,WI,998.0,46.0000,168.00,548.0,4010.0,1995.0,1994.0,0,5,16.3,16.3,7,Y,Unknown,25172,,,4010.0,U,U,N,N,U,1995.0,Y,L,N,0.0,0.0,1.0,1.0,0.0,ND,N,5,1.0,B,M,WI,,,,,,,,,4010.0,1993.0,1.0,22.0,0.0,3041.0,N,2813.0,1.0,548.0,KI,U,U,ND,N,,ND,N,A,U,167.0,43.0,15.4,N,,,,,,,,3.0,Y,Y,Y,ND,N,1.0,,,,,,,,,,,,,,,47.0,1995.0,LKI,N,ND,WI,7.0,Y,,,,,,,,,,,,,,,,,,,,,,,
4,KI,0.0,M,O,102.0000,185.00,29.8028,1.0,WI,1.0,102.0000,185.00,1738.0,4999.0,1998.0,1994.0,0,1,29.8,29.8,7,Y,Unknown,25172,N,N,4010.0,U,U,N,N,U,1998.0,Y,L,N,0.0,2.0,1.0,3.0,0.0,ND,N,1,1.0,O,F,WI,,,,,,,,,4999.0,1998.0,1.0,53.0,0.0,3012.0,N,1947.0,1.0,1738.0,KI,U,U,ND,N,N,,N,A,U,185.0,102.0,29.8,N,,,,,,,,1.0,Y,Y,Y,ND,N,7.0,,,,,,,,,,,,,,,51.0,1998.0,LKI,N,ND,WI,7.0,Y,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174376,KI,0.0,M,A,64.8640,172.72,21.7430,1.0,TX,2090.0,64.8640,172.72,7.0,4099.0,2022.0,2022.0,1,4,21.7,21.7,4,Y,Unknown,11377,N,N,4099.0,N,N,N,N,,2022.0,,L,,,,,,0.0,,,4,,O,F,,,,,,,,,,4099.0,,2.0,76.0,0.0,3070.0,N,,0.0,7.0,KI,,,,,,,N,A,U,172.7,64.9,21.7,N,6.0,3.0,,Y,N,1.0,,,,,,,,,,4.5,,,,,,,,N,,,2022.0,7.0,66.0,2022.0,LKI,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,
174377,KI,0.0,M,O,73.0283,172.72,24.4797,,MD,,73.0283,172.72,6.0,4010.0,2022.0,2022.0,0,2,24.5,24.5,2,Y,Unknown,14353,N,Y,4010.0,,,N,N,,2022.0,,L,,,,,,0.0,,,2,,O,M,,,,,,,,,,4010.0,,1.0,61.0,0.0,,N,,0.0,6.0,KI,,,,,,,N,A,U,172.7,73.0,24.5,N,,1.0,,,,,,,,,,,,,,,,,,,,,,,,,2020.0,908.0,27.0,2022.0,LKI,,,,2.0,,,,,,,,,,,,,,,,,,,,,,,,
174378,KI,0.0,F,A,102.5120,154.94,42.7019,,IL,,102.5120,154.94,7.0,4010.0,2022.0,2022.0,0,1,42.7,42.7,7,Y,Unknown,23002,N,Y,4010.0,,,N,N,,2022.0,,L,,,,,,0.0,,,1,,A,F,,,,,,,,,,4010.0,,1.0,67.0,0.0,,N,,0.0,7.0,KI,,,,,,,N,A,U,154.9,102.5,42.7,N,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,2021.0,385.0,60.0,2022.0,LKI,,,,7.0,,,,,,,,,,,,,,,,,,,,,,,,
174379,KI,0.0,M,O,64.1000,167.64,22.8088,,TX,,64.1000,167.64,3.0,4010.0,2022.0,2022.0,1,4,22.8,22.8,4,Y,Unknown,11377,N,N,4010.0,,,N,N,,2022.0,,L,,,,,,0.0,,,1,,O,F,,,,,,,,,,4010.0,,1.0,67.0,0.0,,N,,0.0,3.0,KI,,,,,,,Y,A,U,167.6,64.1,22.8,N,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,44.0,2022.0,LKI,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,


# Training the model

#### Dropping rows with missing target values

In [37]:
kidpan_living = kidpan_living.dropna(subset=['GSTATUS_KI', 'GTIME_KI'])
kidpan_living.shape

(173707, 139)

In [38]:
kidpan_living.to_csv('csv_data/final_kidpan_living.csv', index=False)

#### Splitting values into target and feature variables

In [39]:
y = kidpan_living[["GSTATUS_KI", "GTIME_KI"]]
X = kidpan_living.drop(['GSTATUS_KI' ,'GTIME_KI', 'END_DATE', 'DWFG_KI'], axis=1)
X.shape

(173707, 135)

#### Formating y for RSF

In [40]:
y_struct = np.zeros(y.shape[0], dtype=[('event', bool), ('time', float)])
y_struct['event'] = y.iloc[:, 0] == 1
y_struct['time'] = y.iloc[:, 1]

#### Splitting X into train and test data

In [41]:
random_state = 10

X_train, X_test, y_train, y_test = train_test_split(
    X, y_struct, test_size=0.2, random_state=random_state)

#### Defining categorical and numerical columns

In [42]:
all_cols = X.columns.tolist()
categorical_cols = X.select_dtypes(include=["object", "category"]).columns.tolist()
numerical_cols = list(filter(lambda x: x not in categorical_cols, all_cols))

#### Imputing numerical columns

In [43]:
numerical_transformer = SimpleImputer(strategy='mean')

In [44]:
numerical_imputed_X_train = pd.DataFrame(numerical_transformer.fit_transform(X_train[numerical_cols]))
numerical_imputed_X_test = pd.DataFrame(numerical_transformer.transform(X_test[numerical_cols]))
numerical_imputed_X_train.columns = X_train[numerical_cols].columns
numerical_imputed_X_test.columns = X_test[numerical_cols].columns
numerical_imputed_X_train

Unnamed: 0,NUM_PREV_TX,WGT_KG_TCR,HGT_CM_TCR,BMI_TCR,INIT_WGT_KG,INIT_HGT_CM,DAYSWAIT_CHRON,END_STAT,INIT_DATE,INIT_BMI_CALC,END_BMI_CALC,INIT_STAT,TX_DATE,AMIS,BMIS,DRMIS,HLAMIS,NPKID,HGT_CM_DON_CALC,WGT_KG_DON_CALC,BMI_DON_CALC,DIAL_DATE,AGE,DISTANCE,DAYSWAIT_CHRON_KI,HGT_CM_CALC,WGT_KG_CALC,BMI_CALC,CREAT_TRR,TOT_SERUM_ALBUM,WT_QUAL_DATE,DAYSWAIT_ALLOC,AGE_DON,DON_DATE,PHYSICAL_CAPACITY,PREDON_HGT,PREDON_WGT,PREOP_URINE_PROTEIN,BP_PREOP_DIAST,BP_PREOP_SYST,KI_CREAT_PREOP
0,0.0000,77.111000,162.560000,29.180200,77.111000,162.560000,29.000000,4010.00000,2005.000000,29.200000,29.20000,4010.000000,2005.0,2.0,2.0,2.0,6.0,0.0,169.163273,77.241155,26.887090,2005.000000,55.0,0.0,29.000000,162.600000,75.7,28.700000,5.200000,3.100000,2005.000000,29.000000,56.0,2005.0,1.000000,169.041805,77.230969,998.000000,73.992118,121.522718,0.900000
1,0.1178,77.052567,168.377734,31.960301,79.108104,169.209189,353.571883,4031.18402,2009.069039,27.181105,27.12702,4042.777133,1991.0,1.0,1.0,1.0,3.0,0.0,169.163273,77.241155,26.887090,1990.000000,47.0,0.0,353.592987,168.388668,99.7,26.511972,7.187792,3.898689,2010.572223,461.584109,20.0,1991.0,24.539167,169.041805,77.230969,33.344128,73.992118,121.522718,0.863871
2,0.0000,104.780000,184.150000,30.898300,104.780000,184.150000,29.000000,4099.00000,2016.000000,30.900000,30.90000,4099.000000,2017.0,2.0,2.0,1.0,5.0,0.0,185.400000,84.800000,24.670907,2004.989593,62.0,0.0,29.000000,184.200000,101.5,29.900000,3.110000,4.400000,2016.000000,29.000000,38.0,2017.0,1.000000,185.420000,84.820000,2.000000,51.000000,111.000000,0.870000
3,1.0000,64.864000,160.020000,25.331200,64.864000,160.020000,581.000000,4010.00000,2011.000000,25.300000,25.30000,4010.000000,2012.0,2.0,2.0,0.0,4.0,0.0,162.600000,75.300000,28.494930,2011.000000,42.0,580.0,581.000000,160.000000,71.7,28.000000,4.600000,3.600000,2011.000000,581.000000,43.0,2012.0,1.000000,162.560000,75.300000,2.000000,70.000000,116.000000,0.800000
4,0.0000,63.049300,160.020000,24.622500,63.049300,160.020000,237.000000,4010.00000,2005.000000,24.600000,24.60000,4099.000000,2006.0,1.0,2.0,1.0,4.0,0.0,170.200000,81.700000,28.192861,2003.000000,62.0,0.0,237.000000,160.000000,67.5,26.400000,12.300000,3.900000,2005.000000,237.000000,44.0,2006.0,1.000000,170.180000,81.650000,2.000000,74.000000,119.000000,1.100000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
138960,0.1178,63.000000,165.000000,23.140500,79.108104,169.209189,353.571883,4031.18402,2009.069039,27.181105,27.12702,4042.777133,1992.0,0.0,0.0,1.0,1.0,0.0,169.163273,77.241155,26.887090,2004.989593,49.0,0.0,353.592987,165.000000,63.0,23.100000,7.187792,3.898689,2010.572223,461.584109,38.0,1992.0,24.539167,169.041805,77.230969,33.344128,73.992118,121.522718,0.863871
138961,0.0000,97.976000,187.960000,27.732500,97.976000,187.960000,161.000000,4010.00000,2010.000000,27.700000,27.70000,4010.000000,2010.0,2.0,2.0,2.0,6.0,0.0,162.600000,74.800000,28.320857,2009.000000,58.0,0.0,161.000000,187.900000,86.0,24.400000,4.200000,3.900000,2010.000000,161.000000,38.0,2010.0,1.000000,162.560000,74.840000,2.000000,68.000000,128.000000,0.700000
138962,0.1178,65.317300,165.100000,23.962600,79.108104,169.209189,353.571883,4031.18402,2009.069039,27.181105,27.12702,4042.777133,2008.0,0.0,0.0,1.0,1.0,3.0,185.400000,96.200000,27.969281,2007.000000,44.0,0.0,353.592987,165.100000,65.3,24.000000,5.200000,4.400000,2010.572223,461.584109,43.0,2008.0,1.000000,185.420000,96.160000,2.000000,94.000000,139.000000,1.000000
138963,0.0000,108.862200,180.340000,33.472900,108.862200,180.340000,520.000000,4010.00000,2008.000000,33.500000,33.50000,4010.000000,2009.0,1.0,1.0,1.0,3.0,0.0,154.900000,52.200000,21.727536,2004.989593,41.0,0.0,520.000000,180.300000,112.0,34.400000,7.700000,3.900000,2008.000000,520.000000,42.0,2009.0,1.000000,154.940000,52.160000,2.000000,72.000000,118.000000,0.600000


#### Imputing categorical columns

In [45]:
categorical_imputer = SimpleImputer(strategy='most_frequent')

In [46]:
categorical_imputed_X_train = pd.DataFrame(categorical_imputer.fit_transform(X_train[categorical_cols]))
categorical_imputed_X_test = pd.DataFrame(categorical_imputer.transform(X_test[categorical_cols]))
categorical_imputed_X_train.columns = X_train[categorical_cols].columns
categorical_imputed_X_test.columns = X_test[categorical_cols].columns

In [47]:
categorical_imputed_X_train = categorical_imputed_X_train.astype(str)
categorical_imputed_X_test = categorical_imputed_X_test.astype(str)

#### One-hot encoding categorical columns

In [48]:
enc = OneHotEncoder(sparse=False, handle_unknown = 'ignore')
encoded_array_train = enc.fit_transform(categorical_imputed_X_train)
encoded_array_test = enc.transform(categorical_imputed_X_test)
encoded_columns_train=enc.get_feature_names_out(categorical_imputed_X_train.columns)
encoded_columns_test=enc.get_feature_names_out(categorical_imputed_X_test.columns)
categorical_encoded_train = pd.DataFrame(encoded_array_train, columns=encoded_columns_train)
categorical_encoded_test = pd.DataFrame(encoded_array_test, columns=encoded_columns_test)



#### Merging back numerical and categorical columns

In [49]:
X_train = numerical_imputed_X_train.merge(categorical_encoded_train, left_index=True, right_index=True)
X_test = numerical_imputed_X_test.merge(categorical_encoded_test, left_index=True, right_index=True)

#### Defining, tranining and scoring the RSF model 

In [50]:
rsf = RandomSurvivalForest(n_estimators=100,
                           min_samples_split=10,
                           min_samples_leaf=10,
                           n_jobs=-1,
                           random_state=random_state)

In [51]:
rsf.fit(X_train, y_train)

RandomSurvivalForest(min_samples_leaf=10, min_samples_split=10, n_jobs=-1,
                     random_state=10)

In [52]:
rsf.score(X_test, y_test)

0.6387078856220165

#### Calculating permutation importance

In [None]:
result = permutation_importance(rsf, X_test, y_test, n_repeats=1, random_state=random_state, 
                                n_jobs=1, max_samples=0.1)

In [None]:
variable_importance_df = pd.DataFrame(
    {k: result[k] for k in ("importances_mean", "importances_std",)},
    index=X_test.columns
).sort_values(by="importances_mean", ascending=False)

variable_importance_df

In [None]:
reindexed_variable_importance_df = variable_importance_df.reset_index()
renamed_variable_importance_df = reindexed_variable_importance_df.rename(columns={'index': 'Feature'})
renamed_variable_importance_df

In [None]:
renamed_variable_importance_df.to_csv('variable_importances/new_imputed_livdon_n1.csv', index=False)