In [1]:
#authenticate link to google drive and upload files
from google.colab import auth
import gspread
from google.auth import default
auth.authenticate_user()
creds, _ = default()
gc = gspread.authorize(creds)
from google.colab import drive 
drive.mount('/content/gdrive')

#Unhash below if you need to upload files
##from google.colab import files
##uploaded = files.upload()


Mounted at /content/gdrive


In [2]:
#import relevant libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg

In [4]:
#Read data
supdf=pd.read_csv('/content/Data Comparison of Outcomes SUP CTS Study.csv')

#Create comparison groups
supdf_prefeb20 = supdf[supdf['Pre Feb 2020'] == "Y"]
supdf_postfeb20 = supdf[supdf['Pre Feb 2020'] == "N"]

In [15]:
#######POST OPERATIVE STAY ANALYSIS#######

#PostOP Stay Descriptive Stats
print("Pre Feb 2020\n",supdf_prefeb20['Postoperative stay in days'].describe())
print("\n")
print("Post Feb 2020\n",supdf_postfeb20['Postoperative stay in days'].describe())
print("\n")

#PostOP Stay TTest for difference of means
postop_stay_summary = pg.ttest(x=supdf_prefeb20['Postoperative stay in days'], y=supdf_postfeb20['Postoperative stay in days'])
print("There is no statistically signficant difference in the postop stays\n")
postop_stay_summary


Pre Feb 2020
 count    946.000000
mean       8.595137
std        5.760653
min        0.000000
25%        6.000000
50%        7.000000
75%        9.000000
max       48.000000
Name: Postoperative stay in days, dtype: float64


Post Feb 2020
 count    1014.000000
mean        8.360947
std         6.249493
min         1.000000
25%         6.000000
50%         7.000000
75%         8.000000
max        91.000000
Name: Postoperative stay in days, dtype: float64


There is no statistically signficant difference in the postop stays



Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,0.86326,1957.718762,two-sided,0.3881,"[-0.3, 0.77]",0.038912,0.074,0.138161


In [55]:
#######180 DAY MORTALITY ANALYSIS#######

#180D Mortality Descriptive Stats
print("180 Day Mortality Breakdown")
mortality_crosstab = pd.crosstab(supdf['180D Mortality'], supdf['Pre Feb 2020'])
mortality_crosstab

180 Day Mortality Percentage Breakdown


Pre Feb 2020,N,Y
180D Mortality,Unnamed: 1_level_1,Unnamed: 2_level_1
N,970,916
Y,44,30


In [85]:
#180D Mortality Fisher Exact Test for difference in nominal proportions
mortality_oddsratio, mortality_pvalue = stats.fisher_exact(mortality_crosstab)

In [57]:
#######UGIB ANALYSIS#######

#UGIB Descriptive Stats
print("UGIB Breakdown")
ugib_crosstab = pd.crosstab(supdf['UGIB'], supdf['Pre Feb 2020'])
ugib_crosstab

UGIB Percentage Breakdown


Pre Feb 2020,N,Y
UGIB,Unnamed: 1_level_1,Unnamed: 2_level_1
N,959,923
Y,55,23


In [86]:
#UGIB Fisher Exact Test for difference in nominal proportions
ugib_oddsratio, ugib_pvalue = stats.fisher_exact(ugib_crosstab)

In [69]:
#######OGD ANALYSIS#######

#OGD Descriptive Stats
print("OGD Breakdown")
ogd_crosstab = pd.crosstab(supdf['OGD'], supdf['Pre Feb 2020'])
ogd_crosstab


OGD Breakdown


Pre Feb 2020,N,Y
OGD,Unnamed: 1_level_1,Unnamed: 2_level_1
N,973,928
Y,41,18


In [87]:
#OGD Fisher Exact Test for difference in nominal proportions
ogd_oddsratio, ogd_pvalue = stats.fisher_exact(ogd_crosstab)

In [72]:
#######DEEP STERNAL WOUND INFECTION ANALYSIS#######

#Deep Sternal Wound Infection Descriptive Stats
print("Deep Sternal Wound Infection")
dswi_crosstab = pd.crosstab(supdf['Deep sternal wound infection'], supdf['Pre Feb 2020'])
dswi_crosstab

Deep Sternal Wound Infection


Pre Feb 2020,N,Y
Deep sternal wound infection,Unnamed: 1_level_1,Unnamed: 2_level_1
No,1006,932
Yes,6,12


In [88]:
#DSW Infection Fisher Exact Test for difference in nominal proportions
dswi_oddsratio, dswi_pvalue = stats.fisher_exact(dswi_crosstab)

In [89]:
#Print All Results

print("There is no statistically signficant difference in the postop stays\n")
print(postop_stay_summary)
print('\n')


if mortality_pvalue < 0.05:
  print("The mortality proportions are statistically significantly different with a pvalue of", "%.3f" % mortality_pvalue, "and an odds ratio of", "%.3f" % mortality_oddsratio )
else:
  print("The mortality proportions are NOT statistically significantly different with a pvalue of", "%.3f" % mortality_pvalue, "and an odds ratio of", "%.3f" % mortality_oddsratio )

if ((mortality_pvalue*5 <0.05) and (mortality_pvalue < 0.05)):
  print("The results are still statistically significant after applying a Bonferonni Correction to adjust for the multiple analyses giving a pvalue of", "%.3f" % (mortality_pvalue*5))
else:
  print()
print('\n')

if ugib_pvalue < 0.05:
  print("The UGIB proportions are statistically significantly different with a pvalue of", "%.3f" % ugib_pvalue, "and an odds ratio of", "%.3f" % ugib_oddsratio )
else:
  print("The UGIB proportions are NOT statistically significantly different with a pvalue of", "%.3f" % ugib_pvalue, "and an odds ratio of", "%.3f" % ugib_oddsratio )

if ((ugib_pvalue*5 <0.05) and (ugib_pvalue < 0.05)):
  print("The results are still statistically significant after applying a Bonferonni Correction to adjust for the multiple analyses giving a pvalue of", "%.3f" % (ugib_pvalue*5))
else:
  print()
print('\n')

if ogd_pvalue < 0.05:
  print("The OGD proportions are statistically significantly different with a pvalue of", "%.3f" % ogd_pvalue, "and an odds ratio of", "%.3f" % ogd_oddsratio )
else:
  print("The OGD proportions are NOT statistically significantly different with a pvalue of", "%.3f" % ogd_pvalue, "and an odds ratio of", "%.3f" % ogd_oddsratio )

if ((ogd_pvalue*5 <0.05) and (ogd_pvalue < 0.05)):
  print("The results are still statistically significant after applying a Bonferonni Correction to adjust for the multiple analyses giving a pvalue of", "%.3f" % (ogd_pvalue*5))
else:
  print()
print('\n')

if dswi_pvalue < 0.05:
  print("The Deep Sternal Wound Infection proportions are statistically significantly different with a pvalue of", "%.3f" % dswi_pvalue, "and an odds ratio of", "%.3f" % dswi_oddsratio )
else:
  print("The Deep Sternal Wound Infection proportions are NOT statistically significantly different with a pvalue of", "%.3f" % dswi_pvalue, "and an odds ratio of", "%.3f" % dswi_oddsratio )

if ((dswi_pvalue*5 <0.05) and (dswi_pvalue < 0.05)):
  print("The results are still statistically significant after applying a Bonferonni Correction to adjust for the multiple analyses giving a pvalue of", "%.3f" % (dswi_pvalue*5))
else:
  print()
print('\n')

There is no statistically signficant difference in the postop stays

              T          dof alternative   p-val         CI95%   cohen-d  \
T-test  0.86326  1957.718762   two-sided  0.3881  [-0.3, 0.77]  0.038912   

         BF10     power  
T-test  0.074  0.138161  


The mortality proportions are NOT statistically significantly different with a pvalue of 0.193 and an odds ratio of 0.722



The UGIB proportions are statistically significantly different with a pvalue of 0.001 and an odds ratio of 0.434
The results are still statistically significant after applying a Bonferonni Correction to adjust for the multiple analyses giving a pvalue of 0.004


The OGD proportions are statistically significantly different with a pvalue of 0.005 and an odds ratio of 0.460
The results are still statistically significant after applying a Bonferonni Correction to adjust for the multiple analyses giving a pvalue of 0.027


The Deep Sternal Wound Infection proportions are NOT statistically signifi