In [None]:
# Import necessary libraries
import pandas as pd
from count_ppis_by_pat_nights import count_ppis_by_pat_nights
import matplotlib.pyplot as plt
import pickle
from collections import Counter

In [None]:
# Load dataframe
with open('luso_epcount.pkl', 'rb') as f:
    luso_epcount = pickle.load(f)
    
# Load CSVs
ppi_df = pd.read_csv('ppi_df.csv')
ppi_count_df = pd.read_csv('ppi_count_df.csv')
off_unit_df = pd.read_csv('off_unit_interruptions.csv')
pat_nights = pd.read_csv('patient_night_population.csv')
episode_durations = pd.read_csv('episode_durations.csv')
admission_details = pd.read_csv('admission_details.csv')

In [None]:
# Create unique index for each patient night
pat_nights.reset_index(inplace=True)
pat_nights.rename(columns={'index': 'pat_night_index'}, inplace=True)

In [None]:
# Split full PPI df by PPI type
neuros_df = ppi_df[ppi_df['PPI_TYPE']=='Neuros']
meds_df = ppi_df[ppi_df['PPI_TYPE']=='Meds']
labs_df = ppi_df[ppi_df['PPI_TYPE']=='Labs & Imaging']
vitals_df = ppi_df[ppi_df['PPI_TYPE']=='Vitals']
on_unit_df = ppi_df[ppi_df['PPI_TYPE']=='On Unit']
off_unit_df = off_unit_df[['PAT_ENC_CSN_ID', 'START_TIME', 'END_TIME']]
off_unit_df['PPI_TYPE'] = 'Off Unit'

dataframes = [vitals_df, neuros_df, meds_df, labs_df, off_unit_df, on_unit_df]

for df in dataframes:
    df['START_TIME'] = pd.to_datetime(df['START_TIME'])
    df['END_TIME'] = pd.to_datetime(df['END_TIME'])

# Count PPIs by pat nights for each dataframe
counts = {}
for df, df_name in zip([vitals_df, neuros_df, meds_df, labs_df, off_unit_df, on_unit_df], ['vitals', 'neuros', 'meds', 'labs', 'off_units', 'on_units']):
    counts[df_name] = count_ppis_by_pat_nights(pat_nights, df)
counts

In [None]:
# Rename columns in each count dataframe
for df_name, count_df in counts.items():
    count_df.rename(columns={'COUNT': f'{df_name.upper()}_COUNT'}, inplace=True)
    
vitals_df = counts['vitals']
neuros_df = counts['neuros']
meds_df = counts['meds']
labs_df = counts['labs']
off_unit_df = counts['off_units']
on_unit_df = counts['on_units']

In [None]:
merge1 = pd.merge(vitals_df, neuros_df,  on=['pat_night_index', 'PAT_ENC_CSN_ID','NIGHT_START','NIGHT_END'])
merge2 = pd.merge(merge1, labs_df,  on=['pat_night_index', 'PAT_ENC_CSN_ID','NIGHT_START','NIGHT_END'])
merge3 = pd.merge(merge2, meds_df,  on=['pat_night_index', 'PAT_ENC_CSN_ID','NIGHT_START','NIGHT_END'])
merge4 = pd.merge(merge3, on_unit_df,  on=['pat_night_index', 'PAT_ENC_CSN_ID','NIGHT_START','NIGHT_END'])
all_ppis = pd.merge(merge4, off_unit_df,  on=['pat_night_index', 'PAT_ENC_CSN_ID','NIGHT_START','NIGHT_END'])

all_ppis = all_ppis[['PAT_ENC_CSN_ID', 'NIGHT_START', 'NIGHT_END', 'VITALS_COUNT', 
                      'NEUROS_COUNT', 'LABS_COUNT', 'MEDS_COUNT', 'ON_UNITS_COUNT', 'OFF_UNITS_COUNT']]
all_ppis

In [None]:
all_ppis.describe()

### Table 1: Demographics of patients in the study

In [None]:
admission_details['PAT_ENC_CSN_ID'].nunique()

In [None]:
unique_pat_encs = pat_nights['PAT_ENC_CSN_ID'].unique().tolist()
len(unique_pat_encs)

In [None]:
demographics_df = admission_details[admission_details['PAT_ENC_CSN_ID'].isin(unique_pat_encs)]
demographics_df = demographics_df.drop_duplicates(subset=['PAT_ENC_CSN_ID'])
demographics_df['PAT_ENC_CSN_ID'].value_counts()

In [None]:
demographics_df['PAT_ID'].nunique()

In [None]:
demographics_df = demographics_df.drop_duplicates(subset=['PAT_ID'])

# Custom age ranges
bins = [0, 50, 60, 70, 80, float('inf')]
labels = ['<50', '50-59', '60-69', '70-79', '80+']
demographics_df['AGE_RANGE'] = pd.cut(demographics_df['PATIENT_AGE_YEARS'], bins=bins, labels=labels, right=False)
demographics_df

In [None]:
# Get count and percentage of each gender in population
values = demographics_df['GENDER'].value_counts()
percents= demographics_df['GENDER'].value_counts(normalize=True) 
values,percents

In [None]:
# Get count and percentage of each race in population
values = demographics_df['PATIENT_RACE'].value_counts()
percents= demographics_df['PATIENT_RACE'].value_counts(normalize=True) 
values,percents

In [None]:
# Get count and percentage of each age group in population
values = demographics_df['AGE_RANGE'].value_counts()
percents= demographics_df['AGE_RANGE'].value_counts(normalize=True) 
values,percents

In [None]:
demographics_df['HOSP_DISCH_TIME'] = pd.to_datetime(demographics_df['HOSP_DISCH_TIME'])
demographics_df['HOSP_ADMSN_TIME'] = pd.to_datetime(demographics_df['HOSP_ADMSN_TIME'])
demographics_df['ADMISSION_LENGTH'] = demographics_df['HOSP_DISCH_TIME'] - demographics_df['HOSP_ADMSN_TIME']
demographics_df['ADMISSION_LENGTH'] = demographics_df['ADMISSION_LENGTH'].astype(str)
demographics_df['Days'] = demographics_df['ADMISSION_LENGTH'].str.split().str[0].astype(int)

# Creating day ranges
bins = [0, 5, 10, 15, 20, float('inf')]
labels = ['0-4', '5-9', '10-14', '15-19', '20+']
demographics_df['Day_Ranges'] = pd.cut(demographics_df['Days'], bins=bins, labels=labels, right=False)

 # Get count and percentage of each admission length range
demographics_df
values = demographics_df['Day_Ranges'].value_counts()
percents= demographics_df['Day_Ranges'].value_counts(normalize=True) 
values,percents

In [None]:
# Get count and percentage of nights on the unit
values = demographics_df['NIGHTS_ON_51600'].value_counts()
percents= demographics_df['NIGHTS_ON_51600'].value_counts(normalize=True) 
values,percents

In [None]:
counts = pat_nights['PAT_ENC_CSN_ID'].value_counts()
counts_summary = counts.value_counts().sort_index().reset_index()
counts_summary.columns = ['Count', 'Frequency']

# Calculate percentages
total_values = (pat_nights['PAT_ENC_CSN_ID']).nunique()
percentages = counts_summary['Frequency'] / total_values * 100

# Get count and percentage of nights on unit
summary = pd.DataFrame({ 'Nights on 51600': counts_summary['Count'], 'Frequency': counts_summary['Frequency'], 'Percentage': percentages})
summary

In [None]:
# Get count and percentage of admission diagnoses
values = demographics_df['ADMISSION_DX_NAME'].value_counts()
percents= demographics_df['ADMISSION_DX_NAME'].value_counts(normalize=True) 
values,percents

### Table 2. Statistics from the interruption-count algorithm

In [None]:
# Display summary statistics for PPI count
ppi_count_df.describe()

In [None]:
# Display summary statistics for LUSO and interruptive episode count
luso_epcount.describe()

In [None]:
# Get frequency and percentage of number of PPIs in an interruptuive episode
luso_epcount['ppis_in_episode'] = luso_epcount['GROUPED_TIMES'].apply(lambda x: [len(sublist) for sublist in x])
combined_values = [value for sublist in luso_epcount['ppis_in_episode'] for value in sublist]
value_counts = Counter(combined_values)
value_counts_dict = dict(value_counts)

# Create dataframe
value_counts_df = pd.DataFrame(value_counts_dict.items(), columns=['PPIs in an Episode', 'Frequency'])
value_counts_df = value_counts_df.sort_values(by='PPIs in an Episode', ascending=True)
value_counts_df = value_counts_df.reset_index(drop=True)

# Add percentage column
total_count = value_counts_df['Frequency'].sum()
value_counts_df['Percentage'] = (value_counts_df['Frequency'] / total_count) * 100
value_counts_df['Percentage'] = value_counts_df['Percentage'].apply(lambda x: '{:.2f}%'.format(x))

value_counts_df

### Figure 1: Number of PPIs by type

In [None]:
ppi_df['PPI_TYPE'] = ppi_df['PPI_TYPE'].replace({'Off Unit': 'Intra-Unit Transfer', 'On Unit': 'Intra-Unit Transfer'})
value_counts_normalized = ppi_df['PPI_TYPE'].value_counts(normalize=True)
formatted_percentages = value_counts_normalized.map(lambda x: '{:.1f}'.format(x * 100))
formatted_percentages

In [None]:
# Calculate value counts with normalization
value_counts_normalized = ppi_df['PPI_TYPE'].value_counts(normalize=True)

# Format the percentages
formatted_percentages = value_counts_normalized.map(lambda x: '{:.1f}'.format(x * 100))

# Create a DataFrame 
df_formatted_percentages = pd.DataFrame({'PPI_TYPE': formatted_percentages.index, 'Percentage': formatted_percentages.values})
df_formatted_percentages.to_csv('figure1data.csv')

### Figure 2: Interruptive Episode Count & LUSO Distributions

In [None]:
luso_epcount['LUSO_hours'] = 'Unknown'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '0-59', 'LUSO_hours'] = '0:00-0:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '60-119', 'LUSO_hours'] = '1:00-1:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '120-179', 'LUSO_hours'] = '2:00-2:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '180-239', 'LUSO_hours'] = '3:00-3:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '240-299', 'LUSO_hours'] = '4:00-4:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '300-359', 'LUSO_hours'] = '5:00-5:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '360-419', 'LUSO_hours'] = '6:00-6:59'
luso_epcount.loc[luso_epcount['LUSO_RANGE'] == '420-420', 'LUSO_hours'] = '7:00'
luso_epcount

In [None]:
df = luso_epcount['LUSO_hours'].value_counts()
df2 = pd.DataFrame({'Range': df.index, 'Count': df.values})

# Extract numerical values from the strings
df2['Numerical Value'] = df2['Range'].str.split('-').str[0].str.split(':').str[0].astype(int)

# Sort the DataFrame in ascending order based on the numerical value
df_sorted = df2.sort_values(by='Numerical Value')
df_sorted.drop(columns=['Numerical Value'], inplace=True)
df_sorted.to_csv('figure2luso.csv')

In [None]:
df = luso_epcount['NUM_EPISODES'].value_counts()

epcount = pd.DataFrame({'Range': df.index, 'Count': df.values})
epcount

### Interruptive Episode and LUSO Duration Analysis

In [None]:
episode_durations.describe()

In [None]:
episode_durations.value_counts()

In [None]:
# Create CDF of interrutpive episode duration
sorted_data = episode_durations['Interruptive Episode Duration'].sort_values()
cumulative = sorted_data.cumsum() / sorted_data.sum()

plt.plot(sorted_data, cumulative, marker='o', linestyle='-', color='blue')
plt.xlabel('Duration')
plt.ylabel('Frequency')
plt.title('Distribution of Interruptive Episode Duration')
plt.show()

In [None]:
# Splitting LUSO into bins
bin_edges = [0, 60, 120, 180, 240, 300, 360, 420, 421]
bin_labels = [f'{bin_edges[i]}-{(bin_edges[i+1])-1}' for i in range(len(bin_edges) - 1)]
luso_epcount['LUSO_RANGE'] = pd.cut(luso_epcount['LUSO'], bins=bin_edges, labels=bin_labels, right=False)
luso_epcount

In [None]:
luso_epcount['LUSO_RANGE'].value_counts()