In [30]:
import pandas as pd
import numpy as np
from datetime import  time, datetime, timedelta
import matplotlib.pyplot as plt
import pickle
import warnings
warnings.filterwarnings('ignore')

In [31]:
# Load in CSVs
pat_nights = pd.read_csv('Created_Data/patient_night_population.csv')
pat_nights = pat_nights.drop_duplicates(subset=['PAT_ENC_CSN_ID', 'NIGHT_START', 'NIGHT_END'])
bed_changes = pd.read_csv('Created_Data/bed_changes.csv')
off_unit_transfers = pd.read_csv('Created_Data/off_unit_transfers.csv')
off_unit_transfers.rename(columns={'START_TIME': 'TIME'}, inplace=True)
flowsheets = pd.read_csv('Queried_Final/flowsheets.csv')
orders = pd.read_csv('Queried_Final/orders.csv')
meds = pd.read_csv('Queried_Final/medication_administration.csv')
location_df = pd.read_csv('Queried_Final/location_history.csv')

In [32]:
with open('pickled_dataframes/off_unit_interruptions_df', 'rb') as f:
    off_unit_interruptions = pickle.load(f)
    
with open('pickled_dataframes/on_unit_interruptions_df', 'rb') as f:
    on_unit_interruptions = pickle.load(f)
    
with open('pickled_dataframes/patient_night_population_df', 'rb') as f:
    pat_nights = pickle.load(f)

In [None]:
# Split orders data into bedside and off unit 
merged_df = pd.merge(orders, location_df, on='PAT_ENC_CSN_ID', how='inner')

merged_df['TIME'] = pd.to_datetime(merged_df['TIME'])
merged_df['START_TIME'] = pd.to_datetime(merged_df['START_TIME'])
merged_df['END_TIME'] = pd.to_datetime(merged_df['END_TIME'])                                                  


filtered_df = merged_df[merged_df['TIME'] >= merged_df['START_TIME']]
filtered_df = filtered_df[filtered_df['TIME'] <= filtered_df['END_TIME']]

# Create a new dataframe with orders performed on 516 (bedside)
filtered_df['ROOM_NAME'] = filtered_df['ROOM_NAME'].fillna('')
bedside_testing = filtered_df[filtered_df['ROOM_NAME'].str.startswith('516')]

off_unit_testing = filtered_df[~filtered_df['ROOM_NAME'].str.startswith('516')]

In [34]:
# %%writefile count_ppis_by_pat_nights.py

def drop_duplicates(df, time_column):
    df_copy = df.copy()
    df_unique = df_copy.drop_duplicates(
      subset = ['PAT_ENC_CSN_ID', time_column],
      keep = 'last').reset_index(drop = True)

    df_unique = df_unique[['PAT_ENC_CSN_ID', time_column]]
    df_unique = df_unique.rename({time_column: 'TIME'}, axis=1)
    
    return df_unique

def extract_vitals_neuros(flowsheets_df):
    vitals = flowsheets[(flowsheets['ROW_NAME']=='BP') | 
                          (flowsheets['ROW_NAME']=='Pulse') |
                          (flowsheets['ROW_NAME']=='Temp') |
                          (flowsheets['ROW_NAME']=='SpO2') |
                          (flowsheets['ROW_NAME']=='BG (glucometer)')]
    
    neuros = flowsheets[(flowsheets['GROUP_NAME']=='GCS and Neuro Checks') | 
                          (flowsheets['GROUP_NAME']=='Neuro Checks')]
    return vitals, neuros

def extract_meds(meds):
    taken_meds = meds[(meds['MAR_ACTION'] != 'Canceled Entry') &
                     (meds['MAR_ACTION'] != 'Held') & 
                     (meds['MAR_ACTION'] != 'Rate Verify')]
    return taken_meds

def prepare_psi_df(vitals_df, neuros_df, meds_df, bedside_testing_df, off_unit_testing_df, off_unit_transfers_df, bed_changes_df):
    
    vitals_df['TIME'] = pd.to_datetime(vitals_df['TIME'])
    vitals_df['END_TIME'] = vitals_df['TIME']  + timedelta(minutes=5)
    neuros_df['TIME'] = pd.to_datetime(neuros_df['TIME'])
    neuros_df['END_TIME'] = neuros_df['TIME']  + timedelta(minutes=5)
    meds_df['TIME'] = pd.to_datetime(meds_df['TIME'])
    meds_df['END_TIME'] = meds_df['TIME']  + timedelta(minutes=5)
    bedside_testing_df['TIME'] = pd.to_datetime(bedside_testing_df['TIME'])
    bedside_testing_df['END_TIME'] = bedside_testing_df['TIME']  + timedelta(minutes=5) 
    off_unit_testing_df['TIME'] = pd.to_datetime(off_unit_testing_df['TIME'])
    off_unit_testing_df['END_TIME'] = off_unit_testing_df['TIME']  + timedelta(minutes=5) 
    off_unit_transfers_df['TIME'] = pd.to_datetime(off_unit_transfers_df['TIME'])
    off_unit_transfers_df['END_TIME'] = pd.to_datetime(off_unit_transfers_df['END_TIME']) 
    bed_changes_df['TIME'] = pd.to_datetime(bed_changes_df['TIME'])
    bed_changes_df['END_TIME'] = bed_changes_df['TIME']  + timedelta(minutes=5)
    
    vitals_df['PSI_TYPE'] = 'Vitals'
    neuros_df['PSI_TYPE'] = 'Neuros'
    meds_df['PSI_TYPE'] = 'Meds'
    bedside_testing_df['PSI_TYPE'] = 'Bedside Testing'
    off_unit_testing_df['PSI_TYPE'] = 'Off Unit Testing'
    off_unit_transfers_df['PSI_TYPE'] = 'Off Unit Transfer'
    bed_changes_df['PSI_TYPE'] = 'Bed Changes'
    
    vitals_id_list = vitals_df.PAT_ENC_CSN_ID.values.tolist()
    vitals_start_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in vitals_df['TIME']]
    vitals_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in vitals_df['END_TIME']]
    vitals_psi_list= vitals_df.PSI_TYPE.values.tolist()

    neuros_id_list = neuros_df.PAT_ENC_CSN_ID.values.tolist()
    neuros_start_time_list = [time.strftime('%Y-%m-%d %H:%M:%S') for time in neuros_df['TIME']]
    neuros_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in neuros_df['END_TIME']]
    neuros_psi_list = neuros_df.PSI_TYPE.values.tolist()

    meds_id_list = meds_df.PAT_ENC_CSN_ID.values.tolist()
    meds_start_time_list = [time.strftime('%Y-%m-%d %H:%M:%S') for time in meds_df['TIME']]
    meds_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in meds_df['END_TIME']]
    meds_psi_list = meds_df.PSI_TYPE.values.tolist()

    bedside_testing_id_list = bedside_testing_df.PAT_ENC_CSN_ID.values.tolist()
    bedside_testing_start_time_list = [time.strftime('%Y-%m-%d %H:%M:%S') for time in bedside_testing_df['TIME']]
    bedside_testing_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in bedside_testing_df['END_TIME']]
    bedside_testing_psi_list = bedside_testing_df.PSI_TYPE.values.tolist()
    
    off_unit_testing_id_list = off_unit_testing_df.PAT_ENC_CSN_ID.values.tolist()
    off_unit_testing_start_time_list = [time.strftime('%Y-%m-%d %H:%M:%S') for time in off_unit_testing_df['TIME']]
    off_unit_testing_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in off_unit_testing_df['END_TIME']]
    off_unit_testing_psi_list = off_unit_testing_df.PSI_TYPE.values.tolist()
    
    off_unit_transfers_id_list = off_unit_transfers_df.PAT_ENC_CSN_ID.values.tolist()
    off_unit_transfers_start_time_list = [time.strftime('%Y-%m-%d %H:%M:%S') for time in off_unit_transfers_df['TIME']]
    off_unit_transfers_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in off_unit_transfers_df['END_TIME']]
    off_unit_transfers_psi_list = off_unit_transfers_df.PSI_TYPE.values.tolist()
    
    bed_changes_id_list = bed_changes_df.PAT_ENC_CSN_ID.values.tolist()
    bed_changes_start_time_list = [time.strftime('%Y-%m-%d %H:%M:%S') for time in bed_changes_df['TIME']]
    bed_changes_end_time_list= [time.strftime('%Y-%m-%d %H:%M:%S') for time in bed_changes_df['END_TIME']]
    bed_changes_psi_list = bed_changes_df.PSI_TYPE.values.tolist()

    id_list = vitals_id_list + meds_id_list + neuros_id_list + bedside_testing_id_list + off_unit_testing_id_list + off_unit_transfers_id_list + bed_changes_id_list
    start_time_list = vitals_start_time_list  + meds_start_time_list + neuros_start_time_list + bedside_testing_start_time_list + off_unit_testing_start_time_list + off_unit_transfers_start_time_list + bed_changes_start_time_list
    end_time_list = vitals_end_time_list + meds_end_time_list + neuros_end_time_list + bedside_testing_end_time_list + off_unit_testing_end_time_list + off_unit_transfers_end_time_list + bed_changes_end_time_list
    psi_list = vitals_psi_list + meds_psi_list + neuros_psi_list + bedside_testing_psi_list + off_unit_testing_psi_list + off_unit_transfers_psi_list + bed_changes_psi_list
    
    combined_df = {
    'PAT_ENC_CSN_ID' : id_list,
    'START_TIME' : start_time_list,
    'END_TIME' : end_time_list,
    'PSI_TYPE' : psi_list
    }
    combined_df = pd.DataFrame(combined_df)
    combined_df = combined_df.sort_values(by=['PAT_ENC_CSN_ID', 'START_TIME', 'END_TIME', 'PSI_TYPE'], ascending=True)
    
    return combined_df


def filter_psi_by_pat_nights(pat_nights, filtered_df, reset_index=True):
    # Create an empty list to store the results
    result = []

    # Iterate through pat_nights rows
    for idx, row in pat_nights.iterrows():
        patient_id = row['PAT_ENC_CSN_ID']
        start_time = row['NIGHT_START']
        end_time = row['NIGHT_END']
        
        filtered_df['START_TIME'] = pd.to_datetime(filtered_df['START_TIME'])
        filtered_df['END_TIME'] = pd.to_datetime(filtered_df['END_TIME'])

        # Filter filtered_df for rows where TIME falls between START and END
        filtered_rows = filtered_df[(filtered_df['PAT_ENC_CSN_ID'] == patient_id) & (filtered_df['START_TIME'] >= start_time) & (filtered_df['END_TIME'] <= end_time)]

        # Append the filtered rows to the result list as a DataFrame
        result.append(filtered_rows)

    # Concatenate the DataFrames in the result list into a single DataFrame
    psi_df = pd.concat(result)

    # Reset the index of the final DataFrame if reset_index is True
    if reset_index:
        psi_df.reset_index(drop=True, inplace=True)

    return psi_df

def count_psis_by_pat_nights(pat_nights, psi_df):
    # Initialize an empty dictionary to store the result
    result_dict = {}

    # Iterate through pat_nights rows
    for idx, row in pat_nights.iterrows():
        patient_id = row['PAT_ENC_CSN_ID']
        night_start = row['NIGHT_START']
        night_end = row['NIGHT_END']

        # Filter filtered_df for rows where TIME falls between START and END
        filtered_rows = psi_df[(psi_df['PAT_ENC_CSN_ID'] == patient_id) & (psi_df['START_TIME'] >= night_start) & (psi_df['END_TIME'] <= night_end)]

        # Store the filtered rows as a list in the result dictionary with the pat_nights index as the key
        result_dict[idx] = filtered_rows.to_dict('records')

    # Create a new DataFrame based on pat_nights and add a 'COUNT' column
    result_df = pat_nights.copy()
    result_df['COUNT'] = [len(result_dict[idx]) for idx in result_dict]

    return result_df

In [35]:
def get_episode_duration(psi_df, pat_nights):
    # Convert 'START_TIME' and 'END_TIME' columns to datetime
    psi_df['START_TIME'] = pd.to_datetime(psi_df['START_TIME'])
    psi_df['END_TIME'] = pd.to_datetime(psi_df['END_TIME'])

    # Initialize an empty list to store the result
    durations_list = []

    # Iterate through pat_nights rows
    for _, row in pat_nights.iterrows():
        patient_id = row['PAT_ENC_CSN_ID']
        start_time = row['NIGHT_START']
        end_time = row['NIGHT_END']

        # Filter ppi_df for rows where 'START_TIME' or 'END_TIME' falls within the time span (NIGHT_START to NIGHT_END)
        filtered_rows = psi_df[(psi_df['PAT_ENC_CSN_ID'] == patient_id) & (
            (psi_df['START_TIME'] >= start_time) & (psi_df['START_TIME'] <= end_time) |
            (psi_df['END_TIME'] >= start_time) & (psi_df['END_TIME'] <= end_time))]

        # Sort and convert 'START_TIME' and 'END_TIME' to lists
        all_times = sorted([[list(filtered_rows['START_TIME'])[x], list(filtered_rows['END_TIME'])[x]] for x in range(len(list(filtered_rows['END_TIME'])))])
        
        # Group times within 20 minutes of each other while keeping 'START_TIME' and 'END_TIME' together
        current_group = []

        for time in all_times:
            if not current_group or time[0] < current_group[-1][1] or (time[0] - current_group[-1][1]).seconds / 60 <= 20:
                current_group.append(time)
            else:
                # Append the duration of the group to the list
                durations_list.extend([int((group[-1][1] - group[0][0]).seconds / 60) for group in [current_group]])
                current_group = [time]
        
        if current_group:
            # Append the duration of the last group to the list
            durations_list.extend([int((group[-1][1] - group[0][0]).seconds / 60) for group in [current_group]])

    return durations_list




In [36]:
def group_ppis(psi_df, pat_nights):
    # Convert 'TIME' and 'END_TIME' columns to datetime
    psi_df['START_TIME'] = pd.to_datetime(psi_df['START_TIME'])
    psi_df['END_TIME'] = pd.to_datetime(psi_df['END_TIME'])

    # Initialize an empty dictionary to store the result
    result_dict = {}

    # Iterate through pat_nights rows
    for idx, row in pat_nights.iterrows():
        patient_id = row['PAT_ENC_CSN_ID']
        start_time = row['NIGHT_START']
        end_time = row['NIGHT_END']

        # Filter psi_df for rows where 'START_TIME' or 'END_TIME' falls within the time span (NIGHT_START to NIGHT_END)
        filtered_rows = psi_df[(psi_df['PAT_ENC_CSN_ID'] == patient_id) & (
            (psi_df['START_TIME'] >= start_time) & (psi_df['START_TIME'] <= end_time) |
            (psi_df['END_TIME'] >= start_time) & (psi_df['END_TIME'] <= end_time))]
        
        # Sort and prepare all times into nested lists
        all_times = sorted([[list(filtered_rows['START_TIME'])[x], list(filtered_rows['END_TIME'])[x]] for x in range(len(list(filtered_rows['END_TIME'])))])

        # Group times within 20 minutes of each other while keeping 'START_TIME' and 'END_TIME' together
        grouped_times = []
        current_group = []

        for time in all_times:
            if not current_group or time[0] < current_group[-1][1] or (time[0] - current_group[-1][1]).seconds / 60 <= 20:
                current_group.append(time)
            else:
                grouped_times.append(current_group.copy())
                current_group = [time]
            

        if current_group:
            grouped_times.append(current_group)

        # Append the grouped times to the result dictionary with the pat_nights index as the key
        result_dict[idx] = grouped_times

    # Create a new DataFrame with grouped times
    result_df = pat_nights.copy()
    result_df['GROUPED_TIMES'] = [result_dict[idx] for idx in result_dict]
    result_df['NUM_EPISODES'] = result_df['GROUPED_TIMES'].apply(len)

    return result_df



In [37]:
# Function to calculate the biggest gap with a default of 420 minutes
def calculate_luso(row):
    grouped_times = row['GROUPED_TIMES']
    start_time = row['NIGHT_START']
    end_time = row['NIGHT_END']
    start_time = pd.to_datetime(start_time)
    end_time = pd.to_datetime(end_time)

    # Flatten the list of grouped times
    all_times = [time for sublist in grouped_times for time in sublist]

    # Sort the times
    all_times.sort()

    # Calculate the gaps between times
    gaps = [(all_times[i+1][0] - all_times[i][1]).total_seconds() / 60 for i in range(len(all_times)-1)]

    # Check if gaps is empty and return the default value
    if not gaps:
        # Check if grouped_times is not empty (contains at least one group)
        if grouped_times:
            # Calculate the gap between the start time and the first timestamp
            gap_start = (grouped_times[0][0][0] - start_time).total_seconds() / 60
            # Calculate the gap between the last timestamp and the end time
            gap_end = (end_time - grouped_times[0][-1][-1]).total_seconds() / 60

            biggest_gap_start = start_time if gap_start > gap_end else grouped_times[0][-1][-1]
            biggest_gap_duration = max(gap_start, gap_end)
        else:
            biggest_gap_duration, biggest_gap_start = 420, None  # Default of 420 minutes

    else:
        # Include the gaps between 'START' and the first time, and 'END' and the last time
        if grouped_times:
            gaps.insert(0, (grouped_times[0][0][0] - start_time).total_seconds() / 60)
            gaps.append((end_time - grouped_times[-1][-1][-1]).total_seconds() / 60)

        biggest_gap_index = np.argmax(gaps)

        # Determine the start time of the biggest gap
        if biggest_gap_index == 0:
            biggest_gap_start = start_time
        elif biggest_gap_index == len(gaps) - 1:
            biggest_gap_start = grouped_times[-1][-1][-1]
        else:
            biggest_gap_start = all_times[biggest_gap_index-1][1]

        biggest_gap_duration = gaps[biggest_gap_index]

    # Set the values in the new columns
    row['LUSO'] = biggest_gap_duration
    row['LUSO_START'] = biggest_gap_start

    return row

In [38]:
vitals, neuros = extract_vitals_neuros(flowsheets)
meds = extract_meds(meds)

In [39]:
neuros = drop_duplicates(neuros, 'TIME')
vitals = drop_duplicates(vitals, 'TIME')
orders = drop_duplicates(orders, 'TIME')
bedside_testing = drop_duplicates(bedside_testing, 'TIME')
off_unit_testing = drop_duplicates(off_unit_testing, 'TIME')
meds = drop_duplicates(meds, 'TAKEN_TIME')
bed_changes = drop_duplicates(bed_changes, 'TIME')

In [40]:
psi_df = prepare_psi_df(vitals, neuros, meds, bedside_testing, off_unit_testing, off_unit_transfers, bed_changes)
psi_df = filter_psi_by_pat_nights(pat_nights, psi_df)
psi_df['PPI_TYPE'].value_counts()

In [None]:
psi_df_without_transfers = psi_df[(psi_df['PSI_TYPE']=='Neuros') |(psi_df['PSI_TYPE']=='Vitals')|
                                 (psi_df['PSI_TYPE']=='Meds') | (psi_df['PSI_TYPE']=='Bedside Testing')|
                                 (psi_df['PSI_TYPE']=='Off Unit Testing') | (psi_df['PSI_TYPE']=='Bed Changes')]
psi_df_without_transfers

In [41]:
psi_count_df = count_psis_by_pat_nights(pat_nights, psi_df_without_transfers)
psi_count_df

In [42]:
luso_epcount = group_ppis(psi_df, pat_nights)
luso_epcount = luso_epcount.apply(calculate_luso, axis=1)
luso_epcount

In [43]:
durations = get_episode_duration(psi_df, pat_nights)
columns = ['Interruptive Episode Duration']
duration_df = pd.DataFrame(durations, columns=columns)

In [44]:
# Define the bin edges and labels for the ranges
bin_edges = [0, 60, 120, 180, 240, 300, 360, 420, 421]
bin_labels = [f'{bin_edges[i]}-{(bin_edges[i+1])-1}' for i in range(len(bin_edges) - 1)]

# Use pd.cut to create the LUSO_RANGE column
luso_epcount['LUSO_RANGE'] = pd.cut(luso_epcount['LUSO'], bins=bin_edges, labels=bin_labels, right=False)
luso_epcount

In [45]:
zero_eps = luso_epcount[luso_epcount['NUM_EPISODES']==0]

In [46]:
# Save dataframes to csv
psi_df_without_transfers.to_csv('Created_Data/psi_df_without_transfer.csv')
psi_count_df.to_csv('Queried_Final/ppi_count_df.csv')
luso_epcount.to_csv('Queried_Final/luso_episodecount.csv')
duration_df.to_csv('Queried_Final/episode_durations.csv')
zero_eps.to_csv('Queried_Final/no_interruption_nights.csv')

In [47]:
# Save DataFrame to a pickle file
with open('luso_epcount.pkl', 'wb') as f:
    pickle.dump(luso_epcount, f)
