In [None]:
import os
import pandas as pd
import numpy as np
from scipy.stats import shapiro, ttest_ind, mannwhitneyu, chi2_contingency, fisher_exact
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import SimpleImputer, IterativeImputer
from rpy2.robjects.packages import importr
from rpy2.robjects import numpy2ri
import scipy.stats as stats

In [None]:
# Load the data 
df = pd.read_csv('sepsis_cleaned.csv')

In [None]:
df.shape

In [None]:
# Convert EDTime and HospTime to hours
df['EDTime'] = df['EDTime'] * 24
df['HospTime'] = df['HospTime'] * 24


In [None]:
# Divide data into groups based on EDTime
df['EDTimeGroup'] = pd.cut(df['EDTime'], [0, 6, 12, 24, np.inf], labels=['<6', '6-12', '12-24', '>24'])

In [None]:
# Count number of patients in each EDTimeGroup
edtime_counts = df['EDTimeGroup'].value_counts()

In [None]:
# Calculate median and IQR of Hospital LOS for each group
los_stats = df.groupby('EDTimeGroup')['HospTime'].agg(['median', lambda x: np.percentile(x, 25), lambda x: np.percentile(x, 75)])
los_stats.columns = ['Median', 'IQR_25', 'IQR_75']

In [None]:
# Count number and percentage of deaths in each group
death_counts = df.groupby('EDTimeGroup')['death'].value_counts().unstack().fillna(0)
death_counts['% Mortality'] = death_counts[1] / (death_counts[0] + death_counts[1]) * 100

In [None]:
# Test for significant differences in HospTime and mortality across groups using Kruskal-Wallis test
hosp_time_p = stats.kruskal(*[group['HospTime'] for name, group in df.groupby('EDTimeGroup')]).pvalue
mortality_p = stats.kruskal(*[group['death'] for name, group in df.groupby('EDTimeGroup')]).pvalue

In [None]:
df.columns

In [None]:
# Print results
print('Table 2 Outcomes of patients according to length of stay in ED')
print(f'{edtime_counts["<6"]} patients had EDTime < 6 hours')
print(f'{edtime_counts["6-12"]} patients had 6-12 hours of EDTime')
print(f'{edtime_counts["12-24"]} patients had 12-24 hours of EDTime')
print(f'{edtime_counts[">24"]} patients had EDTime > 24 hours\n')
print('Hospital LOS (median [IQR])')
print(los_stats.to_string())
print('\nMortality, n (%)')
print(death_counts.to_string())

In [None]:
hosp_time_p

In [None]:
mortality_p

In [None]:
# Load the data 
df_org = pd.read_csv('sepsis.csv')

In [None]:
df_org.columns

In [None]:
from datetime import datetime, timedelta
df_org['28_day_mortality'] = (( pd.to_datetime(df_org['deathtime']) - pd.to_datetime(df_org['intime'])) <= timedelta(days=28)).astype(int)

In [None]:
# Merge df and df_org on subject_id, stay_id, and hadm_id
merged_df = pd.merge(df, df_org[['subject_id', 'stay_id', 'hadm_id', '28_day_mortality']], 
                      on=['subject_id', 'stay_id', 'hadm_id'], 
                      how='left')
# Rename the 28_day_mortality column to a more descriptive name
merged_df = merged_df.rename(columns={'28_day_mortality': 'mortality_28d'})

In [None]:
merged_df.shape

In [None]:
merged_df.head()

In [None]:
death_counts_28d = merged_df.groupby('EDTimeGroup')['mortality_28d'].value_counts().unstack().fillna(0)
death_counts_28d['% Mortality'] = death_counts_28d[1] / (death_counts_28d[0] + death_counts_28d[1]) * 100

In [None]:
death_counts_28d_p = stats.kruskal(*[group['mortality_28d'] for name, group in merged_df.groupby('EDTimeGroup')]).pvalue

In [None]:
print('\n28_day_mortality, n (%)')
print(death_counts_28d.to_string())

In [None]:
death_counts_28d_p

In [None]:
counts_iv = df.groupby('EDTimeGroup')['IV'].value_counts().unstack().fillna(0)
counts_iv['% IV'] = counts_iv[1] / (counts_iv[0] + counts_iv[1]) * 100

In [None]:
IV_p = stats.kruskal(*[group['IV'] for name, group in df.groupby('EDTimeGroup')]).pvalue

In [None]:
print('\nIV n (%)')
print(counts_iv.to_string())
print("-----------------------")
print(IV_p)