In [1]:
import os
import sys
import csv
import json
import moment
import pymysql
import datetime

import numpy as np
import scipy as sp
import scipy.stats as stats
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

from collections import defaultdict

%matplotlib inline

# Create a file that can make a pretty Table 1, Section B for the manuscript

## Groups of patients (columns)

- All
- COVID+ 
- COVID- 
- Macula 
- Complement deficiency
- Coaglation
- Hypertension
- Diabetes
- Obesity
- CAD

## Features (rows)

Statistical Results
- HR intubation univariate
- HR intubation age/sex corrected
- HR death univariate
- HR death age/sex corrected


In [2]:
# intub_results_file = 'results/coxph_intubated_v2_2020-04-25.json'
# death_results_file = 'results/coxph_died_v2_2020-04-25.json'

# intub_results_file = 'results/coxph_intubated_v3_2020-04-25.json'
# death_results_file = 'results/coxph_died_v3_2020-04-25.json'

# intub_results_file = 'results/coxph_intubated_v3-1_2020-04-25.json'
# death_results_file = 'results/coxph_died_v3-1_2020-04-25.json'

intub_results_file = 'results/coxph_intubated_v4_2020-04-25.json'
death_results_file = 'results/coxph_died_v4_2020-04-25.json'

_version, date_retrieved = intub_results_file.split('_v')[1].split('.')[0].split('_')
_version = 'v' + _version
_version, date_retrieved

('v4', '2020-04-25')

## Load the statistical results that we want to also show in the table

In [3]:
coxph_results = {
    'intub': {
        'name': 'Intub HR (95% CI)',
        'results': json.loads(open(intub_results_file).read())
    },
    'death': {
        'name': 'Death HR (95% CI)',
        'results': json.loads(open(death_results_file).read())
    }
}

## This is the main function that will build the rows for a given dataset

Notes:
- `propci` is a function to copute the 95% confidence interval of a proporation
- `build_features` is a function that generates a dictionary of row values

In [13]:
def propci(p, n):
    s = np.sqrt((p*(1-p)/n))
    return (p, p-1.96*s, p+1.96*s)

def build_results(all_results, group):
    
    features = {}
    
    for outcome in all_results.keys():
        
        stat_results = all_results[outcome]['results'][group]
        
        for name, model in zip(('Univar.', 'Age & Sex Corr.'), ('univariate', 'age_sex_corrected')):
            hr = stat_results[model]['exp(coef)']
            hrlo = stat_results[model]['exp(coef) lower 95%']
            hrup = stat_results[model]['exp(coef) upper 95%']
            pval = stat_results[model]['p']

            pval_str = '†' if pval < 0.001 else '**' if pval < 0.01 else '*' if pval < 0.055 else ''

            features["%s_%s" % (outcome, model)] = {
                'name': "%s / %s" % (all_results[outcome]['name'], name),
                'value': "%.1f (%.1f-%.1f)%s" % (hr, hrlo, hrup, pval_str)
            }
    
    return features

print(json.dumps(build_results(coxph_results, 'macula'), indent=2))


{
  "intub_univariate": {
    "name": "Intub HR (95% CI) / Univar.",
    "value": "2.2 (1.3-3.7)**"
  },
  "intub_age_sex_corrected": {
    "name": "Intub HR (95% CI) / Age & Sex Corr.",
    "value": "1.8 (1.1-3.1)*"
  },
  "death_univariate": {
    "name": "Death HR (95% CI) / Univar.",
    "value": "3.0 (2.0-4.6)\u2020"
  },
  "death_age_sex_corrected": {
    "name": "Death HR (95% CI) / Age & Sex Corr.",
    "value": "1.5 (1.0-2.3)*"
  }
}


In [17]:
results_rows = ['intub_univariate', 'intub_age_sex_corrected', 'death_univariate', 'death_age_sex_corrected']

table_data = {
    'all': {
        'name': 'All Patients',
        'results': None,
    },
    'covidpos': {
        'name': 'SARS-Cov-2 Positive (C19+)',
        'results': None,
    },
    'intubated': {
        'name': 'Intubated and C19+',
        'results': None,
    },
    'death': {
        'name': 'Mortality and C19+',
        'results': None,
    },
    'macula': {
        'name': 'Macula and C19+',
        'results': build_results(coxph_results, 'macula'),
    },
    'compl_def': {
        'name': 'Complement Def. and C19+',
        'results': build_results(coxph_results, 'compl_def'),
    },
    'coagulation': {
        'name': 'Coagulation and C19+',
        'results': build_results(coxph_results, 'coagulation'),
    },
    'hypertension': {
        'name': 'Hypertension and C19+',
        'results': build_results(coxph_results, 'hypertension'),
    },
    'type2_diabetes': {
        'name': 'Type 2 Diabetes and C19+',
        'results': build_results(coxph_results, 'type2_diabetes'),
    },
    'obesity': {
        'name': 'Obesity and C19+',
        'results': build_results(coxph_results, 'obesity'),
    },
    'cad': {
        'name': 'Coronary Artery Disease and C19+',
        'results': build_results(coxph_results, 'cad'),
    },
    'refctrl': {
        'name': 'Cough (Reference) and C19+',
        'results': build_results(coxph_results, 'refctrl')
    },
    'age_over_65': {
        'name': 'Age Over 65 and C19+',
        'results': build_results(coxph_results, 'age_over_65')
    },
    'smoker': {
        'name': 'Smoker and C19+',
        'results': build_results(coxph_results, 'smoker')
    }
}

In [18]:
groups = ['all', 'covidpos', 'intubated', 'death', 'macula', 'compl_def', 'coagulation', 'hypertension', 'type2_diabetes', 'obesity', 'cad', 'age_over_65', 'refctrl', 'smoker']

outfh = open('results/table1_sectionB_%s_%s.csv' % (_version, date_retrieved), 'w')
writer = csv.writer(outfh)

row_data = list()
row_data.append('Variable')
for group in groups:
    row_data.append(table_data[group]['name'])

writer.writerow(row_data)

# The results rows
for row in (results_rows):
    
    row_name = None
    row_data = list()
    
    for group in groups:
        
        if table_data[group]['results'] is None:
            row_data.append('')
            continue
        
        if row_name is None:
            row_name = table_data[group]['results'][row]['name']
        
        if group == 'refctrl':
            print(group, row, table_data[group]['results'][row]['value'])
        row_data.append(table_data[group]['results'][row]['value'])
    
    writer.writerow([row_name] + row_data)
    #print(row_data)

outfh.close()

refctrl intub_univariate 1.5 (1.1-1.9)**
refctrl intub_age_sex_corrected 1.4 (1.1-1.8)**
refctrl death_univariate 1.3 (1.1-1.7)*
refctrl death_age_sex_corrected 1.3 (1.1-1.6)*


In [19]:
table_data['macula']

{'name': 'Macula and C19+',
 'results': {'intub_univariate': {'name': 'Intub HR (95% CI) / Univar.',
   'value': '2.2 (1.3-3.7)**'},
  'intub_age_sex_corrected': {'name': 'Intub HR (95% CI) / Age & Sex Corr.',
   'value': '1.8 (1.1-3.1)*'},
  'death_univariate': {'name': 'Death HR (95% CI) / Univar.',
   'value': '3.0 (2.0-4.6)†'},
  'death_age_sex_corrected': {'name': 'Death HR (95% CI) / Age & Sex Corr.',
   'value': '1.5 (1.0-2.3)*'}}}