In [249]:
import os
import sys
import csv
import json
import moment
import pymysql
import datetime

import numpy as np
import scipy as sp
import scipy.stats as stats
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from collections import defaultdict

from lifelines import CoxPHFitter
from lifelines import KaplanMeierFitter

%matplotlib inline

## This script can generate the results data file required for plots and tables

Step 1. Set which outcome variable you want to generate data for.

In [250]:
# data_file_name = 'data/pandas_df_v2_2020-04-25.pkl'
# data_file_name = 'data/pandas_df_v3_2020-04-25.pkl'
data_file_name = 'data/pandas_df_v3-1_2020-04-25.pkl'

df = pd.read_pickle(data_file_name)
_version, date_retrieved = data_file_name.split('_df_')[1].split('.')[0].split('_')
print(_version, date_retrieved)

v3-1 2020-04-25


In [256]:
outcome = 'intubated'
days_to = 'days_to_intubation'

# outcome = 'died'
# days_to = 'days_to_death'

In [257]:
# NaN indicate people who were not covid positive
covidpos = df.dropna()

# remove patients who were intubated before they were diagnosed
print(sum(covidpos["days_to_intubation"] < 0))
covidpos = covidpos[covidpos["days_to_intubation"] >= 0]

# remove patients that died before they were diagnosed
print(sum(covidpos["days_to_death"] < 0))
covidpos = covidpos[covidpos["days_to_death"] >= 0]

# remove patients who were intubated or died more than 90 days later
print(sum(covidpos["days_to_intubation"] < 0))
covidpos = covidpos[covidpos["days_to_intubation"] < 90]
covidpos = covidpos[covidpos["days_to_death"] < 90]

covidpos['age_over_65'] = (covidpos['age'] > 65)+1-1

covidpos.describe()

121
12
0


Unnamed: 0,pat_mrn_id,intubated,days_to_intubation,died,days_to_death,age,sex,macula,compl_def,coagulation,...,refctrl,race_black,race_asian,race_white,race_other,race_declined,eth_hispanic,eth_nonhispanic,eth_declinedother,age_over_65
count,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,...,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0,6397.0
mean,1095011000.0,0.075191,19.607629,0.08504,19.901516,57.134029,0.497264,0.013756,0.000625,0.184305,...,0.113647,0.22151,0.023136,0.283727,0.280131,0.191496,0.339534,0.369079,0.291387,0.37502
std,131992200.0,0.263721,11.330646,0.278963,11.141045,19.87732,0.500032,0.116487,0.025,0.387763,...,0.317407,0.415295,0.150347,0.450841,0.449098,0.393509,0.473588,0.482593,0.454437,0.484166
min,1000011000.0,0.0,0.0,0.0,0.0,0.002738,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1004378000.0,0.0,11.0,0.0,11.0,41.478439,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1009221000.0,0.0,20.0,0.0,20.0,58.510609,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1200224000.0,0.0,28.0,0.0,28.0,72.005476,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,1.0,1.0
max,1400038000.0,1.0,84.0,1.0,83.0,120.202601,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Iterate through each of the covariates to model and save the summary for later plotting

Covariates
- macula
- compl_def
- coagulation
- hypertension
- type2_diabetes
- obesity
- cad
- age over 65
- cough

In [258]:
covariates = ['age_over_65', 'refctrl', 'macula', 'compl_def', 'coagulation', 'hypertension', 'type2_diabetes', 'obesity', 'cad']

results = defaultdict(dict)

for covar in covariates:
    
    # univariate analysis
    covar_df = covidpos[[outcome, days_to, covar]]
    cph = CoxPHFitter()
    cph.fit(covar_df, duration_col=days_to, event_col=outcome)
    # cph.print_summary()
    
    results[covar]['univariate'] = cph.summary.T.to_dict()[covar]
    
    # age and sex corrected analysis
    covar_df = covidpos[[outcome, days_to, covar, 'age', 'sex']]
    cph = CoxPHFitter()
    cph.fit(covar_df, duration_col=days_to, event_col=outcome)
    #cph.print_summary()
    
    results[covar]['age_sex_corrected'] = cph.summary.T.to_dict()[covar]

In [259]:
# print(json.dumps(results, indent=2))

In [260]:
# save results to file
outfh = open('results/coxph_%s_%s_%s.json' % (outcome, _version, date_retrieved), 'w')
outfh.write(json.dumps(results, indent=2))
outfh.close()