# Table 2: Admissions Demographics

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import math
import statistics
import numpy as np
import scipy.stats as st
from scipy import stats
alpha = .05
import statsmodels.api as sm

In [None]:
admissions = pd.read_excel('aml_data_8.16.23.xlsx')
admissions.tail()
#len(admissions)

In [None]:
admissions.columns

In [None]:
df = pd.DataFrame()

## get_pval func


In [None]:
def get_pval(field):
    ads_field = admissions.loc[:,["infection_present",field]]
    bsi_neg= ads_field[(ads_field[field]==0)]['infection_present']
    bsi_pos = ads_field[(ads_field[field]==1)]['infection_present']
    
    tot_ads = stats.shapiro(ads_field[field])

    if tot_ads.pvalue < alpha:
        #mann whitney
        res = stats.mannwhitneyu(bsi_neg,bsi_pos)
    else:
        #ttest
        res = stats.ttest_ind(bsi_neg,bsi_pos)

    return res

## get_pval_b func

In [None]:
def get_pval_b(field):
    contingency_table = pd.crosstab(index=admissions['infection_present'], columns=admissions[field])
    res = stats.barnard_exact([contingency_table[0],contingency_table[1]])
    return res

In [None]:
def get_pval_b2(field):
    contingency_table = pd.crosstab(index=admissions['infection_present'], columns=admissions[field])
    res = stats.boschloo_exact([contingency_table[0],contingency_table[1]])
    return res

## get pval_median func

In [None]:
def get_pval_median(field):
    result_tot_pat = stats.shapiro(admissions[field])
    if result_tot_pat.pvalue < alpha:
        #mann whitney
        tot_pat_res = stats.mannwhitneyu(neg_ads[field],pos_ads[field])
    else:
        #ttest
        tot_pat_res = stats.ttest_ind(neg_ads[field],pos_ads[field],nan_policy='omit')

    return tot_pat_res

## get_stats func

In [None]:
def get_stats(field,val):
    return [len(neg_ads[neg_ads[field]==val]),
    len(neg_ads[neg_ads[field]==val])/len(neg_ads[field]),
    len(pos_ads[pos_ads[field]==val]),
    len(pos_ads[pos_ads[field]==val])/len(pos_ads[field]), 
    len(admissions[admissions[field]==val]),
    len(admissions[admissions[field]==val])/len(admissions[field])] 

### Num admissions

In [None]:
pos_ads = admissions[admissions['infection_present'] == 1]
print('There are '+str(len(pos_ads))+' infection-positive admissions.')
print('Percentage pos '+str(len(pos_ads)/len(admissions)) )
print('There are '+str(len(pos_ads.MRN.unique()))+' patients that were infected.')
#pos_ads.head()
neg_ads = admissions[admissions['infection_present'] == 0]
print('There are '+str(len(neg_ads))+' infection-negative admissions.')
print('Percentage negative '+str(len(neg_ads)/len(admissions)) )
#neg_ads.head()

In [None]:
len(admissions.MRN.unique())

### Sex

In [None]:
admissions['male'] = admissions['gender'].map({'M':1, 'F':0})
neg_ads['male'] = neg_ads['gender'].map({'M':1, 'F':0})
pos_ads['male'] = pos_ads['gender'].map({'M':1, 'F':0})

In [None]:
df["Male"] = get_stats('male',1)
df["Female Stats"] = get_stats('male',0)
df

In [None]:
get_pval('male')

In [None]:
get_pval_b('male')

In [None]:
get_pval_b2('male')

### Race

In [None]:
df["White Stats"] = get_stats('white_caucasian',1)
df["Non White Stats"] = get_stats('white_caucasian',0)
df

In [None]:
get_pval('white_caucasian')

In [None]:
get_pval_b('white_caucasian')

In [None]:
get_pval_b2('white_caucasian')

In [None]:
pd.crosstab(index=admissions['infection_present'], columns=admissions['white_caucasian'])

### Diagnosis

In [None]:
df["AML"] = get_stats('AML_diag','AML')
#df["Down Syd AML"] = get_stats('AML_diag','Down Syd AML')
df["2nd AML"] = get_stats('AML_diag','2nd AML')
#df["APML"] = get_stats('AML_diag','APML')
df

In [None]:
one_hot = pd.get_dummies(admissions['AML_diag'])
admissions = admissions.join(one_hot)

In [None]:
pos_ads = admissions[admissions['infection_present'] == 1]
neg_ads = admissions[admissions['infection_present'] == 0]

In [None]:
get_pval_median('AML')

In [None]:
get_pval_median('2nd AML')

In [None]:
get_pval_b('AML')

In [None]:
get_pval_b2('AML')

### Age

In [None]:
neg_ads.age.describe()

In [None]:
pos_ads.age.describe()

In [None]:
admissions.age.describe()

In [None]:
get_pval_median('age')

### First BMI

In [None]:
neg_ads.first_bmi_kg_m2.describe()

In [None]:
pos_ads.first_bmi_kg_m2.describe()

In [None]:
admissions.first_bmi_kg_m2.describe()

In [None]:
get_pval_median('first_bmi_kg_m2')

### Neutropenia

In [None]:
neg_ads.neutropenia.sum()

In [None]:
neg_ads.neutropenia.sum()/len(neg_ads)

In [None]:
pos_ads.neutropenia.sum()

In [None]:
pos_ads.neutropenia.sum()/len(pos_ads)

In [None]:
admissions.neutropenia.sum()

In [None]:
admissions.neutropenia.sum()/len(admissions)

### Lowest ANC

In [None]:
neg_ads.lowest_neutrophil.describe()

In [None]:
pos_ads.lowest_neutrophil.describe()

In [None]:
admissions.lowest_neutrophil.describe()

In [None]:
get_pval_median('lowest_neutrophil')

### Lowest Platelet

In [None]:
neg_ads.lowest_platelet.describe()

In [None]:
pos_ads.lowest_platelet.describe()

In [None]:
admissions.lowest_platelet.describe()

In [None]:
get_pval_median('lowest_platelet')

### LOS

In [None]:
neg_ads.LOS.describe()

In [None]:
pos_ads.LOS.describe()

In [None]:
admissions.LOS.describe()

In [None]:
get_pval_median('LOS')

In [None]:
stats.ttest_ind(neg_ads['LOS'],pos_ads['LOS'],nan_policy='omit')

### Num PICU visits

In [None]:
print(len(neg_ads[neg_ads['icu_los']>0]))
#len(neg_ads[neg_ads['icu_los']>0])/len(neg_ads)
neg_ads.icu_visit.sum()/len(neg_ads)

In [None]:
print(len(pos_ads[pos_ads['icu_los']>0]))
len(pos_ads[pos_ads['icu_los']>0])/len(pos_ads)

In [None]:
print(len(admissions[admissions['icu_los']>0]))
len(admissions[admissions['icu_los']>0])/len(admissions)

### PICU LOS

In [None]:
neg_ads.icu_los.describe()

In [None]:
pos_ads.icu_los.describe()

In [None]:
admissions.icu_los.describe()

In [None]:
get_pval_median('icu_los')

## Medications

In [None]:
admissions.dropna(subset=['cytarabine'],inplace=True)
admissions.dropna(subset=['levo'],inplace=True)
#admissions.dropna(subset=['vanco'],inplace=True)
len(admissions)

In [None]:
print(len(admissions[admissions.infection_present == 0]))
len(admissions[admissions.infection_present == 0])/len(admissions)

In [None]:
print(len(admissions[admissions.infection_present == 1]))
len(admissions[admissions.infection_present == 1])/len(admissions)

### Cytarabine

In [None]:
df["Cyt Stats"] = get_stats('cytarabine',1)
df["No Cyt Stats"] = get_stats('cytarabine',0)
df

In [None]:
get_pval('cytarabine')

In [None]:
get_pval_b('cytarabine')

In [None]:
get_pval_b2('cytarabine')

#### Cytarabine dosage

In [None]:
neg_ads["Cytarabine mg/m2/day"].describe()

In [None]:
pos_ads["Cytarabine mg/m2/day"].describe()

In [None]:
admissions["Cytarabine mg/m2/day"].describe()

In [None]:
neg_ads.dropna(subset=['Cytarabine mg/m2/day'],inplace=True)
pos_ads.dropna(subset=['Cytarabine mg/m2/day'],inplace=True)
get_pval_median('Cytarabine mg/m2/day')

In [None]:
#neg_ads['Cytarabine mg/m2/day'] = neg_ads['Cytarabine mg/m2/day'].replace(np.nan,0)
#pos_ads['Cytarabine mg/m2/day'] = pos_ads['Cytarabine mg/m2/day'].replace(np.nan,0)
#get_pval_median('Cytarabine mg/m2/day')

### Levofloxacin

In [None]:
df["Levo Stats"] = get_stats('levo',1)
df["No Levo Stats"] = get_stats('levo',0)
df

In [None]:
get_pval('levo')

In [None]:
get_pval_b('levo')

In [None]:
get_pval_b2('levo')

### Vancomycin

In [None]:
df["Vanco Stats"] = get_stats('vanco',1)
df["No Vanco Stats"] = get_stats('vanco',0)
df

In [None]:
get_pval('vanco')

In [None]:
get_pval_b('vanco')

In [None]:
get_pval_b2('vanco')

# Table 1: Patient Demographics

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import math
import statistics
import numpy as np
import scipy.stats as st

### get_pd_pval()

In [None]:
def get_pd_pval(field):
    patients_field = patients.loc[:,["infection_present",field]]
    bsi_neg= patients_field[(patients_field[field]==0)]['infection_present']
    bsi_pos = patients_field[(patients_field[field]==1)]['infection_present']
    
    tot_patients = stats.shapiro(patients_field[field])

    if tot_patients.pvalue < alpha:
        #mann whitney
        res = stats.mannwhitneyu(bsi_neg,bsi_pos)
    else:
        #ttest
        res = stats.ttest_ind(bsi_neg,bsi_pos)

    return res

In [None]:
def get_pd_pval_b(field):
    contingency_table = pd.crosstab(index=admissions['infection_present'], columns=admissions[field])
    res = stats.barnard_exact([contingency_table[0],contingency_table[1]])
    return res

In [None]:
def get_pd_pval_b2(field):
    contingency_table = pd.crosstab(index=admissions['infection_present'], columns=admissions[field])
    res = stats.boschloo_exact([contingency_table[0],contingency_table[1]])
    return res

### get_pd_pval_median() func

In [None]:
def get_pd_pval_median(field):
    result_tot_pat = stats.shapiro(patients[field])
    if result_tot_pat.pvalue < alpha:
        #mann whitney
        tot_pat_res = stats.mannwhitneyu(neg_patients[field],pos_patients[field])
    else:
        #ttest
        tot_pat_res = stats.ttest_ind(neg_patients[field],pos_patients[field],nan_policy='omit')

    return tot_pat_res

### get_pd_stats()

In [None]:
def get_pd_stats(field, value):
    print('Neg BSI')
    print(len(neg_patients[neg_patients[field]==value]))
    print(len(neg_patients[neg_patients[field]==value])/len(neg_patients))
    print('Pos BSI')
    print(len(pos_patients[pos_patients[field]==value]))
    print(len(pos_patients[pos_patients[field]==value])/len(pos_patients))
    print('Total')
    print(len(patients[patients[field]==value]))
    print(len(patients[patients[field]==value])/len(patients))

In [None]:
patients = pd.read_excel('patient_demographics_data.xlsx')
patients.columns

In [None]:
neg_patients = patients[patients['infection_present']==0]
pos_patients = patients[patients['infection_present']==1]

In [None]:
len(neg_patients)

In [None]:
len(neg_patients)/len(patients)

In [None]:
len(pos_patients)

In [None]:
len(pos_patients)/len(patients)

### Num Admissions

In [None]:
patients.num_admissions.describe()

In [None]:
import scipy.stats as st
st.norm.interval(alpha=0.95, loc=np.mean(patients.num_admissions), scale=st.sem(patients.num_admissions))

In [None]:
neg_patients.num_admissions.describe()

In [None]:
import scipy.stats as st
st.norm.interval(alpha=0.95, loc=np.mean(neg_patients.num_admissions), scale=st.sem(neg_patients.num_admissions))

In [None]:
pos_patients.num_admissions.describe()

In [None]:
st.norm.interval(alpha=0.95, loc=np.mean(pos_patients.num_admissions), scale=st.sem(pos_patients.num_admissions))

### Deaths

In [None]:
len(patients[patients.alive_dead == 1])
#len(patients[patients.alive_dead == 1])/len(patients)

In [None]:
len(neg_patients[neg_patients.alive_dead == 1])
#len(neg_patients[neg_patients.alive_dead == 1])/len(neg_patients)

In [None]:
len(pos_patients[pos_patients.alive_dead == 1])
#len(pos_patients[pos_patients.alive_dead == 1])/len(pos_patients)

In [None]:
get_pd_pval('alive_dead')

In [None]:
get_pd_pval_b('alive_dead')

In [None]:
get_pd_pval_b2('alive_dead')

### Sex

In [None]:
patients.loc[:,'male'] = patients['gender'].map({'M':1, 'F':0})
neg_patients.loc[:,'male'] = neg_patients['gender'].map({'M':1, 'F':0})
pos_patients.loc[:,'male'] = pos_patients['gender'].map({'M':1, 'F':0})

In [None]:
get_pd_stats('male',1)

In [None]:
get_pd_stats('male',0)

In [None]:
get_pd_pval('male')

In [None]:
get_pd_pval_b('male')

In [None]:
get_pd_pval_b2('male')

### Race

In [None]:
get_pd_stats('white_caucasian',1)

In [None]:
get_pd_stats('white_caucasian',0)

In [None]:
get_pd_pval('white_caucasian')

In [None]:
get_pd_pval_b('white_caucasian')

In [None]:
get_pd_pval_b2('white_caucasian')

### Diganosis Age

In [None]:
neg_patients['age_diagnosis'].describe()

In [None]:
pos_patients['age_diagnosis'].describe()

In [None]:
patients['age_diagnosis'].describe()

In [None]:
get_pd_pval_median('age_diagnosis')

### Age at 1st infection

In [None]:
pos_patients['age_first_infection'].describe()

### Diagnosis

In [None]:
get_pd_stats('AML_diag','AML')

In [None]:
get_pd_stats('AML_diag','2nd AML')

In [None]:
one_hot = pd.get_dummies(patients['AML_diag'])
patients = patients.join(one_hot)

In [None]:
neg_patients = patients[patients['infection_present']==0]
pos_patients = patients[patients['infection_present']==1]

In [None]:
get_pd_pval_median('AML')

In [None]:
get_pd_pval_median('2nd AML')

In [None]:
get_pd_pval_b('AML')

In [None]:
get_pd_pval_b2('AML')

# Table 5: Cyt > 2000

In [2]:
admissions = pd.read_excel('aml_data_8.16.23.xlsx')
admissions.dropna(subset=['cytarabine'],inplace=True)
admissions.head()
#len(admissions)

Unnamed: 0,MRN,age,race_code,white_caucasian,race,language_code,gender,dod,survival_days,survival_months,...,max_temp,max_temp_38.5,CV_line_days,fday_CV_line,lday_CV_line,CV_line_name,CV_buckets,CV_line,AML Diagnosis,AML_diag
51,38,7.34,White/Caucasian,1,White/Caucasian,English,M,NaT,4250,141.666667,...,40.6,1,,2006-09-18,NaT,BROVIAC or HICKMAN,Hickman,Hickman,AML,AML
52,39,4.12,White/Caucasian,1,White/Caucasian,English,F,NaT,5534,184.466667,...,40.2,1,179.0,2006-09-28,2007-03-27,BROVIAC or HICKMAN,Hickman,Hickman,AML,AML
53,19,4.22,White/Caucasian,1,White/Caucasian,English,M,NaT,5576,185.866667,...,40.4,1,182.0,2006-08-09,2007-02-08,BROVIAC or HICKMAN,Hickman,Hickman,AML,AML
54,40,1.02,White/Caucasian,1,White/Caucasian,English,F,2007-08-11,296,9.866667,...,39.0,1,,2006-10-20,NaT,BROVIAC or HICKMAN,Hickman,Hickman,AML,AML
55,36,6.33,White/Caucasian,1,White/Caucasian,English,M,NaT,5389,179.633333,...,38.7,1,236.0,2006-10-27,2007-06-21,BROVIAC or HICKMAN,Hickman,Hickman,AML,AML


In [3]:
table5 = pd.DataFrame()

In [4]:
pos_ads = admissions[admissions['infection_present'] == 1]
print('There are '+str(len(pos_ads))+' infection-positive admissions.')
print('Percentage pos '+str(len(pos_ads)/len(admissions)) )
print('There are '+str(len(pos_ads.MRN.unique()))+' patients that were infected.')
#pos_ads.head()
neg_ads = admissions[admissions['infection_present'] == 0]
print('There are '+str(len(neg_ads))+' infection-negative admissions.')
print('Percentage negative '+str(len(neg_ads)/len(admissions)) )
#neg_ads.head()

There are 86 infection-positive admissions.
Percentage pos 0.20046620046620048
There are 47 patients that were infected.
There are 343 infection-negative admissions.
Percentage negative 0.7995337995337995


In [5]:
table5["no_cyt"] = get_stats('cytarabine',0)

NameError: name 'get_stats' is not defined

In [None]:
get_pval_b('cytarabine')

In [None]:
table5["cyt_2000"] = get_stats('cyt_2000',1)
table5["cyt_less_2000"] = get_stats('cyt_2000',0)
table5["cyt1_1999"] = get_stats('cyt_1_1999',1)
table5

In [None]:
get_pval_b('cyt_2000')

In [9]:
contingency_table = [[60,224,59],[7,47,32]]
res = stats.chi2_contingency([contingency_table[0],contingency_table[1]])
res

(18.06399917706291,
 0.00011952325673110384,
 2,
 array([[ 53.56876457, 216.67365967,  72.75757576],
        [ 13.43123543,  54.32634033,  18.24242424]]))