In [2]:
import pandas as pd
import numpy as np

### Parse UNOS

In [3]:
def parse_UNOS():
    """ Create dataframe of the format [T, Y, X_1, ..., X_D] from the features and outcomes CSV files. """
    # Read CSV files
    path_features = 'unos/raw/features.csv'
    path_outcomes = 'unos/raw/outcomes.csv'
    features_df = pd.read_csv(path_features)
    outcomes_df = pd.read_csv(path_outcomes)

    # Add column names as headers
    feature_col_names = ['Age', 'Gender', 'Ischemic Time', 'Diabetes', 'Infection', 'Transfusion', 'Previous Transplant', 'Number of Previous Transplant', 'Ventilator Assist', 'ECMO Assist', 'Vent Support', 'Creatinine', 'Bilirubin', 'PRA', 'HLA Mismatch', 'Blood Type A', 'Blood Type B', 'Blood Type O', 'Blood Type AB', 'Dialysis', 'IABP', 'Donor Age', 'Donor Gender', 'Donor Blood Type A', 'Donor Blood Type B', 'Donor Blood Type O', 'Donor Blood Type AB', 'ABO Equal', 'ABO_Compatible', 'ABO_Incompatible', 'HEP C Antigen', 'Donor Diabetes', 'Distance', 'Days in State 1', 'Days in State 1A', 'Days in State 2', 'Days in State 1B', 'BMI', 'Donor BMI', 'VAD Assist', 'Total Artificial Heart', 'Inotropic', 'A Mismatch', 'B Mismatch', 'DR Mismatch', 'Height Difference', 'Weight Difference', 'LVAD at listing', 'LVAD while listed', 'LVAD']
    features_df.columns = feature_col_names

    outcomes_col_names = ['Survival Time', 'Censored (Censored = 1)', 'Transplant Year']
    outcomes_df.columns = outcomes_col_names

    # Extract treatment assignment (and drop unneeded ones)
    treatment_col = features_df['LVAD']
    features_df = features_df.drop(['LVAD at listing', 'LVAD while listed', 'LVAD'], axis=1)

    # Extract outcome
    outcome_col = outcomes_df['Survival Time']

    # Create Output dataframe of the form ([T,Y, X_1, ... X_D])
    unos_df = pd.concat([treatment_col, outcome_col, features_df], axis=1)
    
    return unos_df

In [4]:
def create_CSV(unos_df, output_filename):
    """ Writes CSV file of the format [T, X_1, ..., X_D]. """

    # Drop Outcome
    unos_df = unos_df.drop('Survival Time', axis=1)
    
    unos_df.to_csv(output_filename,index=None)


In [5]:
unos_df = parse_UNOS()
create_CSV(unos_df, 'unos/unos_sample.csv')

In [6]:
unos_df
# Features
X = np.array(unos_df.drop('LVAD', axis=1))
X.shape

(60399, 48)

### Test Synthetic Model

In [64]:
import unos

In [65]:
unos_generator = unos.UNOS_data('unos/unos_sample.csv')
unos_df = unos_generator.draw_sample()

unos_df.head()

Unnamed: 0,Age,Gender,Ischemic Time,Diabetes,Infection,Transfusion,Previous Transplant,Number of Previous Transplant,Ventilator Assist,ECMO Assist,...,Total Artificial Heart,Inotropic,A Mismatch,B Mismatch,DR Mismatch,Height Difference,Weight Difference,Treatment,Response,TE
0,0.588784,1.0,0.100874,0.0,0.0,1.0,0.0,-0.172572,0.0,0.0,...,0.0,0.0,-0.618402,-1.325673,-0.817274,-0.610968,-0.327164,0.0,13.756142,53.686792
1,-0.040007,0.0,1.511202,0.0,0.0,0.0,0.0,-0.172572,0.0,0.0,...,0.0,0.0,-0.618402,-1.325673,-0.817274,0.391748,-0.472409,0.0,13.286175,51.946993
2,-0.5116,1.0,-0.035492,0.0,0.0,0.0,0.0,-0.172572,0.0,0.0,...,0.0,0.0,0.924703,-1.325673,-0.817274,-0.337197,-0.785245,0.0,31.969239,34.802092
3,0.536385,0.0,0.540629,0.0,0.0,0.0,0.0,-0.172572,0.0,0.0,...,0.0,0.0,0.924703,-1.325673,0.846109,-0.575193,0.785312,0.0,22.689151,42.451702
4,0.536385,1.0,-1.127057,0.0,1.0,0.0,0.0,-0.172572,0.0,0.0,...,0.0,1.0,0.924703,-1.325673,-0.817274,0.788911,0.133408,0.0,35.791539,31.432988
