In [1]:
###Pandas Libraries
import pandas as pd
import datetime
import time
import os

#Dash Libraries
#import dash  # USE THIS IF RUNNING ON SERVER
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
#import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from jupyter_dash import JupyterDash # USE THIS IF RUNNING ON JUPYTER
import numpy as np

##  TABLE MANIPULATION CODE

In [2]:
#*********                     FUNCTION RUNS ALL OF THE DATA MANIPULATION CODE
#start = time.time()  ## TIMER FUNCTION

#Convert Hour of Day to time period
timeconv = {'Admission_Period':['Planned','Unplanned','Unplanned','Unplanned','Unplanned','Planned',
                                'Planned','Planned','Planned','Unplanned','Unplanned','Unplanned',
                                'Unplanned','Unplanned','Unplanned','Unplanned','Unplanned','Planned',
                                'Planned','Planned','Planned','Planned','Planned','Planned'],
            'Hour_of_Day': [0,1,2,3,4,5,
                            6,7,8,9,10,11,
                            12,13,14,15,16,17,
                            18,19,20,21,22,23]}
timeconv_df = pd.DataFrame.from_dict(timeconv)

dayconv = {'Day_Type':['Weekday','Weekday','Weekday','Weekday','Weekday','Weekend','Weekend'],
           'Day': [0,1,2,3,4,5,6]}
dayconv_df = pd.DataFrame.from_dict(dayconv)

event_activity = {'Event_Activity':['High','High','High','Low','Normal','Normal','Low','Low'],
                  'Event':['AFTERXMAS','B4EASTER','BEFOREXMAS','EASTER','NONE','SCHOOL','XMAS','PUBLIC']}
event_activity_df = pd.DataFrame.from_dict(event_activity)


##### CLEAN TRANSFERS DATA
df = pd.read_csv('transfers.csv')
df = df.rename(columns = {'Start_Bed_Dttm':'Start', 'End_Bed_Dttm':'End'})
df = df.drop(columns=['Transfers', 'Bed_Code', 'Full Flow Code', 'Current Flow Code', '1st Level','PASID'])
date_cols = ['Start', 'End']
df[date_cols] = df[date_cols].apply(pd.to_datetime, format='%d/%m/%y %H:%M', errors='raise')
base_df = df.copy(deep=True)
ideal_df = df.copy(deep=True)
orig_df = df.copy(deep=True)

###### CLEAN EVENTS AND PUBLIC HOLIDAY DATA
events_df = pd.read_csv('exception_dates.csv')
events_df = events_df.rename(columns = {'DATE':'Date', 'EVENT':'Event'})
events_df['Date'] = events_df['Date'].apply(pd.to_datetime, format='%d/%m/%Y', errors='coerce')
events_df['Date'] = events_df['Date'].dt.date


def add_next_ward(df):
    df = df.sort_values(['Link', 'Start'])
    df['Next_Ward'] = np.where(df.Link == df.Link.shift(-1), df.Ward_Code.shift(-1),
                              np.where(df.Ward_Code != 'OUT', 'OUT','NONE'))
    return df

ideal_df = add_next_ward(ideal_df)
orig_df = add_next_ward(orig_df)

def add_out_transfer(df):
    ## ADDS RECORD IN ROW IN RECORD TO SHOW WHENT THE PATIENT LEAVES THE HOSPITAL
    df  = df.loc[df.Next_Ward == 'OUT',:].copy()
    df['Start'] = df['End']
    #df['Prev_Flow'] = df.Prev_Flow + '-' + df.Ward_Code
    df['Ward_Code'] = 'OUT'
    df['Next_Ward'] = 'NONE'
    return df

ideal_df = pd.concat([ideal_df, add_out_transfer(ideal_df)])
orig_df = pd.concat([orig_df, add_out_transfer(orig_df)])

def add_hierarical_data(df):
    ####### ADD HIERARICAL DATA
    #DAY OF THE WEEK
    df = df.dropna(subset=['Start'])
    df = df.copy()
    df['Day'] = df.Start.dt.weekday
    df = pd.merge(df, dayconv_df, how='left', on='Day')
    #MONTH NAME
    df['Month'] = df.Start.dt.month_name()
    #YEAR
    df['Year'] = df.Start.dt.year
    #HOUR OF DAY
    df['Hour_of_Day'] = df.Start.dt.hour
    df = pd.merge(df, timeconv_df, how='left', on='Hour_of_Day')
    df.drop(columns='Hour_of_Day')
    #DATE WITHOUT TIME
    df['Date'] = df.Start.dt.date
    return df

ideal_df = add_hierarical_data(ideal_df)
orig_df = add_hierarical_data(orig_df)

def add_events_data(df):
    ### ADD EVENTS TO DATAFRAME
    df = pd.merge(df, events_df, how='left', on='Date')
    df['Event'] = df['Event'].fillna('NONE')
    df = pd.merge(df, event_activity_df, how='left', on='Event')
    #df.drop(columns='Event')
    return df

ideal_df = add_events_data(ideal_df)
orig_df = add_events_data(orig_df)


####### RENAME WARD_CODES THAT ARE NOT PART OF THE IDEAL PATH AND DELETE DUPLICATES
df = ideal_df.copy()
#CA, CL & HE Codes are converted to MT
df.loc[(df['Ward_Code'] == 'CA') | (df['Ward_Code'] == 'CL') | (df['Ward_Code'] == 'HE') , 'Ward_Code'] = 'MT' #Ward Code CA, CL, HE = MT
df.loc[(df['Next_Ward'] == 'CA') | (df['Next_Ward'] == 'CL') | (df['Next_Ward'] == 'HE') , 'Next_Ward'] = 'MT' #Ward Code CA, CL, HE = MT
# Drop DPS from the new dataset
index_names = df.loc[(df['Ward_Code'] == 'DPS') | ((df['Ward_Code'] == 'ON'))].index
df = df.drop(index_names)
# PATIENT PREVIOUS FLOW PATH
df = df.sort_values(['Link', 'Start'])
def prev_flow(df, n):
    for i in range(n, 0, -1):
        df.Prev_Flow = np.where(df.Link == df.Link.shift(i), 
                 df.Prev_Flow + '-' + df.Ward_Code.shift(i), df.Prev_Flow)
    return df.Prev_Flow
df['Prev_Flow'] = 'IN'
df['Prev_Flow'] = prev_flow(df, 7)
# REMOVE DUPLICATE ENTRIES CAUSED BY REPLACING WARD CODES WITH IDEAL WARD_CODES
duplicate_df = df.loc[(df.Ward_Code == df.Next_Ward)|(df.Ward_Code == df.Prev_Flow.str[-2:])|(df.Ward_Code == df.Prev_Flow.str[-3:])]
duplicate_grp_df = duplicate_df.groupby(['Link', 'Ward_Code'])
#get the min start date and max end date from the grouped results
df['min_start'] = duplicate_grp_df['Start'].transform('min')
df['max_end'] = duplicate_grp_df['End'].transform('max')
#Drop the Original start & end dates & rename the new calculated start & end
df['Start'] = np.where(df.min_start.notnull(), df.min_start, df.Start)
df['End'] = np.where(df.max_end.notnull(), df.max_end, df.End)
df = df.sort_values(['Link', 'Start'], ascending = True)
df= df.drop_duplicates(keep='first', subset=['Link', 'Ward_Code','Start'])
#dropped_df = df.loc[(df.Ward_Code == df.Next_Ward)|(df.Ward_Code == df.Prev_Flow.str[-2:])].drop_duplicates(keep='first', subset=['Link', 'Ward_Code','Start'])
#df = df[~df.index.isin(dropped_df.index)]
df = df.drop(columns=['min_start', 'max_end'])
df= df.dropna(subset=['Start'])
ideal_df = df.copy(deep=True)

ideal_df = add_next_ward(ideal_df)


######## CALCULATE THE CRITICAL VARIABLES FOR COMPARISON ON THE SIMULATION
def add_analysis_info(df):
    duration = df['End'] - df['Start']
    df['LOS'] = (duration.dt.days * 24) + (duration.dt.seconds / (60*60)) # converts day & seconds fields to hours
    df['LOS'] = df['LOS'].round(2)
    # DETERMINE PREVIOUS PATIENT FLOW PATH
    #if ('Prev_Flow' in df.columns) == False:
    df = df.sort_values(['Sim_Name','Sim_No','Link', 'Start'])
    def prev_flow(df, n):
        for i in range(n, 0, -1):
            df.Prev_Flow = np.where(df.Link == df.Link.shift(i), 
                                    df.Prev_Flow + '-' + df.Ward_Code.shift(i), df.Prev_Flow)
        return df.Prev_Flow
    df['Prev_Flow'] = 'IN'
    df['Prev_Flow'] = prev_flow(df, 7)
    # SHOW ALL OF THE FLOW CODES FOR PATIENT
    df['Full_Flow_Code'] = 1
    df['Full_Flow_Code'] = df.groupby(['Sim_Name','Sim_No','Link'])['Ward_Code'].transform(lambda x: '-'.join(x))
    df['Full_Flow_Code'] ='IN' + '-' + df.Full_Flow_Code
    #INTERARRIVAL TIME
    df = df.sort_values(['Sim_Name','Sim_No','Start'])
    temp_df = df.loc[df.Prev_Flow == 'IN', 'Start'] - df.loc[df.Prev_Flow == 'IN', 'Start'].shift(1)
    df['Int_Arrival'] = pd.to_numeric(temp_df.dt.days*24 + temp_df.dt.seconds / (60*60))
    df['Int_Arrival'] = df.Int_Arrival.round(2)
    return df

#update ideal dataset and original dataset with LOS, INT_Arrival and Patient flow information
ideal_df['Sim_Name'] = 'ideal_hist'
ideal_df['Sim_No'] = 1
ideal_df = add_analysis_info(ideal_df)
orig_df['Sim_Name'] = 'orig_hist'
orig_df['Sim_No'] = 1
orig_df = add_analysis_info(orig_df)



######## CREATE SUMMARY TABLE FOR INTERARRIVAL TIME
df = ideal_df.copy(deep=True)
df = df.loc[df.Int_Arrival.notnull(),:]
df = df.drop(columns=['Link','Ward_Code','Day', 'Month','Year','Hour_of_Day', 'Date','Event','LOS', 'Prev_Flow', 'Next_Ward','Full_Flow_Code',  'Start', 'End','Sim_Name','Sim_No'])
#create bins for all of counts
df['Int_Arrival_Bins'] = pd.cut(x=df['Int_Arrival'], bins=[0,9,19,29,49,59,69,79,89,99,109])
#temp_df.Int_Arrival.max()  #used to find the biggest number
df = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period','Int_Arrival_Bins']).count()
df = df.reset_index()
df['Total_Counts'] = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period']).transform('sum')
df['Arrival_Probability'] = df.Int_Arrival / df.Total_Counts
df['Arrival_Probability'] = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period']).cumsum()['Arrival_Probability']
df = df.dropna()
df = df.drop(columns = ['Int_Arrival', 'Total_Counts'])
df = df.set_index(['Event_Activity', 'Day_Type', 'Admission_Period','Int_Arrival_Bins'])
arrivalsum_df = df.copy(deep=True)

######## CREATE MODIFIER FOR ARRIVAL INTERVAL YEAR TO YEAR & MONTH TO MONTH
df = ideal_df.copy(deep=True)
overall_mean = df.groupby(['Prev_Flow']).Int_Arrival.mean().loc['IN']
df = df.groupby(['Prev_Flow','Year']).Int_Arrival.mean().loc['IN'].reset_index()
df['Int_Arrival'] = df.Int_Arrival / overall_mean
annual_plot_df = df.copy(deep=True)
min_date = df.Year[0]
max_date = df.Year[7]
min_date_modifier = df.Int_Arrival[0]
max_date_modifier = df.Int_Arrival[7]
# Determine Linear equation for year modifier based on 2013 to 2020
x_yr = np.array([2021, 2022, 2023, 2024, 2025, 2026, 2027, 2028, 2029, 2030, 2031])
A = np.array([[min_date, 1],[max_date, 1]])
B = np.array([min_date_modifier, max_date_modifier])
m, b = np.linalg.solve(A,B)
y_mod = m * x_yr + b
x_yr = np.append(df['Year'], x_yr)
y_mod = np.append(df['Int_Arrival'], y_mod)
# Look up table for year modifier
year_modifier={'year': x_yr,
              'modifier': y_mod}
year_mod_df = pd.DataFrame.from_dict(year_modifier)
# Determine Month by Month modifier
df = ideal_df
df = df.groupby(['Prev_Flow', 'Month']).Int_Arrival.mean().loc['IN'].reset_index()
df['Int_Arrival'] = df.Int_Arrival / overall_mean
month_modifier = {'month': df['Month'].values.tolist(),
                 'modifier': df['Int_Arrival'].values.tolist()} 
month_mod_df = pd.DataFrame.from_dict(month_modifier)

####### CREATE SUMMARY TABLE FOR PATIENT FLOW CODE
df = ideal_df.copy(deep=True)
df = df.drop(columns=['Link',  'Start', 'End','Day', 'Month','Year','Hour_of_Day', 'Date','Event','LOS', 'Full_Flow_Code', 'Int_Arrival','Sim_Name','Sim_No'])
df = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period', 'Prev_Flow','Ward_Code']).count()
df = df.reset_index()
df = df.rename(columns = {'Next_Ward':'Next_Ward_Count'})
df['Total_Counts'] = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period', 'Prev_Flow']).transform('sum')['Next_Ward_Count']
df['Cummulative_Count'] = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period', 'Prev_Flow']).cumsum()['Next_Ward_Count']
df['Next_Ward_Cum_Prob'] = df.Cummulative_Count / df.Total_Counts
df = df.drop(columns=['Next_Ward_Count', 'Total_Counts','Cummulative_Count'])
df = df.set_index(['Event_Activity', 'Day_Type', 'Admission_Period', 'Prev_Flow','Ward_Code'])
patientflowsum_df = df.copy(deep=True)

####### CREATE SUMMARY TABLE FOR LENGTH OF STAY
df = ideal_df.copy(deep=True)
df = df.drop(columns=['Link',  'Start', 'End','Day', 'Month','Year','Hour_of_Day', 'Date','Event', 'Full_Flow_Code', 'Int_Arrival','Next_Ward','Sim_Name','Sim_No'])
#df.LOS.describe()  #used to find the biggest number
df['LOS_Bins'] = pd.cut(x=df['LOS'], bins=[0,19,39,59,79,99,119,139,159,179,199,219,239,259,279,299,319,339,
                                           359,379,399,419,439,459,479,499,519,539,559,579,599,619,639,659,679,
                                           699,719,739,750,779,799,819,839])
df = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period','Prev_Flow','Ward_Code','LOS_Bins']).count()
df.head(50)
df = df.reset_index()
df['Total_Counts'] = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period','Prev_Flow','Ward_Code']).transform('sum')
df['LOS_Probability'] = df.LOS / df.Total_Counts
df['LOS_Probability'] = df.groupby(['Event_Activity', 'Day_Type', 'Admission_Period']).cumsum()['LOS_Probability']
df = df.dropna()
df = df.drop(columns = ['Total_Counts', 'LOS'])
df = df.set_index(['Event_Activity', 'Day_Type', 'Admission_Period','Prev_Flow','Ward_Code','LOS_Bins'])
LOSsum_df = df.copy(deep=True)


def add_patient_occupancy(df):
    ### ADDS PATIENT OCCUPANCY TO THE DATA
    #### add for loop that runs through all sims and all sim numbers
    sim_names = df.Sim_Name.unique()
    for sim_name in sim_names:
        sim_nos = df.Sim_No.unique()
        for sim_no in sim_nos:
            df = df.loc[(df.Sim_Name == sim_name) & (df.Sim_No == sim_no),:]
            df = df.sort_values(['Start'])
            df['MT_Occupancy'] = 0
            df['DS_Occupancy'] = 0
            df['MT_Occupancy'] = np.where((df.Prev_Flow.str[-2:] == 'MT') & (df.Ward_Code != 'MT'), -1,
                                              np.where((df.Ward_Code == 'MT') & (df.Prev_Flow.str[-2:] != 'MT'), 1, 0)).cumsum()
            df['DS_Occupancy'] = np.where((df.Prev_Flow.str[-2:] == 'DS') & (df.Ward_Code != 'DS'), -1,
                                              np.where((df.Ward_Code == 'DS') & (df.Prev_Flow.str[-2:] != 'DS'), 1, 0)).cumsum()
    return df

ideal_df = add_patient_occupancy(ideal_df)
orig_df = add_patient_occupancy(orig_df)

In [3]:
patientflowsum_df.loc['Low']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Next_Ward_Cum_Prob
Day_Type,Admission_Period,Prev_Flow,Ward_Code,Unnamed: 4_level_1
Weekday,Planned,IN,DS,0.402062
Weekday,Planned,IN,MT,1.0
Weekday,Planned,IN-DS,MT,0.692308
Weekday,Planned,IN-DS,OUT,1.0
Weekday,Planned,IN-DS-MT,OUT,1.0
Weekday,Planned,IN-MT,DS,0.166667
Weekday,Planned,IN-MT,OUT,1.0
Weekday,Unplanned,IN,DS,0.309524
Weekday,Unplanned,IN,ICU,0.333333
Weekday,Unplanned,IN,MT,1.0


## SIMULATION CODE

In [7]:
import math

###### SEED DATA FOR SIMULATION
n_sims = 2
start_string = '01/01/2020'
finish_string = '12/11/2020'

arrival_df = arrivalsum_df
LOS_df = LOSsum_df
patientflow_df = patientflowsum_df
start = pd.Timestamp(start_string, unit='%d/%m/%Y %H:%M:%S')
finish = pd.Timestamp(finish_string, unit='%d/%m/%Y %H:%M:%S')


def next_arrival(df, event_activity, day_type, admission_period, from_year, from_month): 
    xp = pd.Series([0]).append(df.loc[event_activity, day_type, admission_period].iloc[:,0])
    fp = pd.Index([0]).append(df.loc[event_activity, day_type, admission_period].index.remove_unused_categories().categories.right)
    random_no = np.random.rand()
    result = np.interp(random_no, xp, fp) # Straight result based on Mean
    year_mod = year_mod_df.loc[year_mod_df.year == from_year,'modifier'].iloc[0]
    month_mod = month_mod_df.loc[month_mod_df.month == from_month,'modifier'].iloc[0]
    result_modified = result * year_mod * month_mod
    return result_modified
#print(next_arrival(arrivalsum_df, 'High', 'Weekday', 'Planned', 2020, 'January'))

def date_to_hierarcy(date_stamp):
    #date_stamp = pd.Timestamp(date_string, unit='%d/%m/%Y %H:%M:%S')
    #Extract time period
    hour_of_day = date_stamp.hour
    admission_period = timeconv['Admission_Period'][hour_of_day]
    #Extract type of day (i.e Weekday or Weekend)
    day_of_week = date_stamp.dayofweek
    day_type = dayconv['Day_Type'][day_of_week]
    #Extract Event based activity
    date_only = date_stamp.date()
    try: event = events_df.groupby('Date').Event.first().loc[date_only]
    except: event = 'NONE'
    event_index_no = event_activity['Event'].index(event)
    event_activity_level = event_activity['Event_Activity'][event_index_no]
    #Extract Month
    month = date_stamp.month_name()
    #Extract Year
    year = date_stamp.year
    return {'Admission_Period':admission_period, 'Day_Type':day_type, 'Event_Activity':event_activity_level,'Month':month, 'Year':year}
#date_to_hierarcy('25/12/2020 23:01:00')

def simulate_arrivals(df, n_sims, start, finish):
    results = {'Sim_No':[],'Arrival_Time':[]}
    current_timestamp = pd.Timestamp(start, unit='%d/%m/%Y')
    finish_timestamp = pd.Timestamp(finish, unit='%d/%m/%Y')
    for i in range(0, n_sims):
        while current_timestamp < finish_timestamp:
            hierarcy = date_to_hierarcy(current_timestamp)
            arrival_interval = round(next_arrival(df, hierarcy['Event_Activity'], hierarcy['Day_Type'], hierarcy['Admission_Period'], hierarcy['Year'], hierarcy['Month'])*60*60,0)
            try: current_timestamp += datetime.timedelta(seconds=arrival_interval) 
            except: print(arrival_interval)
            results['Sim_No'].append(i)
            results['Arrival_Time'].append(current_timestamp)
        current_timestamp = pd.Timestamp(start, unit='%d/%m/%Y')
        i += 1
    return results
#data = simulate_arrivals(arrivalsum_df, 10, '01/01/2017','02/01/2017')

def simulate_patient_flow(df, group_cat, prev_flow):
    ward_prob = df.loc[(group_cat['Event_Activity'], group_cat['Day_Type'], group_cat['Admission_Period'], prev_flow)]
    n = ward_prob.count().iloc[0]
    random_no = np.random.rand()
    for i in range(0, n):
        if random_no <= ward_prob.iloc[i,0]: 
            ward_code = ward_prob.index[i]
            if (prev_flow == 'IN-MT') & (ward_code == 'MT'):
                    print(random_no)
            break 
    return ward_code
#simulate_patient_flow(patientflow_df, date_to_hierarcy(start), 'IN')

def simulate_patient_end(df, start, group_cat, prev_flow, ward_code):
    xp = pd.Series([0]).append(df.loc[group_cat['Event_Activity'], group_cat['Day_Type'], group_cat['Admission_Period'], prev_flow, ward_code].iloc[:,0])
    fp = pd.Index([0]).append(df.loc[group_cat['Event_Activity'], group_cat['Day_Type'], group_cat['Admission_Period'], prev_flow, ward_code].index.remove_unused_categories().categories.right)
    #print (xp, fp)
    random_no = np.random.rand()
    dec_result = np.interp(random_no, xp, fp)
    end = start + datetime.timedelta(seconds = int(dec_result*60*60))
    return end
#simulate_patient_end(LOS_df, date_to_hierarcy(start), 'IN', 'DS')

def run_simulation(n_sims, arrival_df, LOS_df, patientflow_df, start, finish):
    arrivals_data = simulate_arrivals(arrival_df, n_sims, start, finish)
    no_of_admissions = len(arrivals_data['Sim_No'])
    transfer_info = {'Sim_No':[],'Link':[],'Ward_Code':[],'Start':[],'End':[],'Prev_Flow':[]}
    sim_no = 1
    print(sim_no)
    for i in range(0, no_of_admissions):
        prev_flow, prev_ward = 'IN','IN'
        if sim_no != arrivals_data['Sim_No'][i]:
            sim_no = arrivals_data['Sim_No'][i]
            print(sim_no)
        start = arrivals_data['Arrival_Time'][i]
        link = i
        patient_discharged = False
        group_categories = date_to_hierarcy(start)
        while patient_discharged == False:
            ward = simulate_patient_flow(patientflow_df, group_categories, prev_flow)  
            try: end = simulate_patient_end(LOS_df, start, group_categories, prev_flow, ward)
            except: end = start
            if ward == 'OUT':
                patient_discharged = True
#            try:
#                ward = simulate_patient_flow(patientflow_df, group_categories, prev_flow)
#            except: 
#                patient_discharged = True
#                break
#             try: 
#                 end = simulate_patient_end(LOS_df, start, group_categories, prev_flow, ward)
#             except: 
#                 patient_discharged = True
#                 break

            transfer_info['Sim_No'].append(sim_no)
            transfer_info['Start'].append(start)
            transfer_info['Link'].append(link)
            transfer_info['Prev_Flow'].append(prev_flow)
            transfer_info['Ward_Code'].append(ward)
            transfer_info['End'].append(end)

            #create new starting information
            start = end
            prev_flow = '-'.join((prev_flow, ward))
            prev_ward = ward
            #patient_discharged = True
                                 
    results_df = pd.DataFrame.from_dict(transfer_info)
    return results_df

#sim2020_df = run_simulation(n_sims, arrival_df, LOS_df, patientflow_df, start, finish)

#%time run_simulation(n_sims, arrival_df, LOS_df, patientflow_df, start, finish)


In [8]:
sim_source = {'no_sims':1,
             'arrival_df':arrival_df,
             'LOS_df':LOS_df,
             'patientflow_df':patientflow_df,
             'sims': {'title':['sim2020','sim2025','sim2030'],
                      'start':['01/01/2020','01/01/2025','01/01/2030'],
                      'finish':['01/01/2021','02/01/2026','02/01/2031'],
                      'plot_info': {'name':['Maternity Occupancy Probability Distribution','Delivery Suite Occupancy Probability Distribution',
                                                 'Maternity Occupancy Cummulative Probability','Delivery Suite Occupancy Cummulative Probability'],
                                      'x_axis':['MT_Occupancy','DS_Occupancy',
                                                'MT_Occupancy','DS_Occupancy'],
                                    'plot_type':['Histogram','Histogram',
                                                 'Cummulative Histogram','Cummulative Histogram'],
                                       }
                         }
             }

def update_occupancy_results(df, sim_name):
    df = df.sort_values('Start')
    df['MT_Occupancy'] = 0
    df['MT_Occupancy'] = df.Patient_to_MT.cumsum()
    df['DS_Occupancy'] = 0
    df['DS_Occupancy'] = df.Patient_to_DS.cumsum()
    df = df.sort_values('Link')
    df['Sim_Name'] = sim_name 
    return df

def plot_histogram(df, x_axis, plot_type, folder_loc, name, sim_name):
    filename = folder_loc + '/' + sim_name + '_' + x_axis + '_' + plot_type + '.png'
    if plot_type == 'Histogram':
        fig = px.histogram(df, x=x_axis, nbins=20)
        #fig.show()
        fig.write_image(filename)
    if plot_type == 'Cummulative Histogram':
        x_axis_values = df.loc[:,x_axis]
        fig = go.Figure(data=[go.Histogram(x=x_axis_values, cumulative_enabled=True)])
    fig.update_layout(
        title = name,
        xaxis_title = x_axis,
        yaxis_title = "Count"
    )
    fig.write_image(filename)
        

def new_dir(folder_name):
    today = datetime.date.today()  
    todaystr = today.isoformat()  
    full_path = folder_name + todaystr
    if not(os.path.isdir(full_path)):
        os.mkdir(full_path)
    return full_path

def all_sims_and_plots(sim_source):
    folder_loc = new_dir('plots/')
    filename = folder_loc + '/Simulation_Data_' + datetime.date.today().isoformat() + '.csv'
    sims = sim_source['sims']
    prev_df = pd.DataFrame()
    no_scenarios = len(sims['title'])
    no_plots = len(sims['plot_info']['name'])
    for i in range(no_scenarios):
        sim_name = sims['title'][i]
        start = pd.Timestamp(sims['start'][i], unit='%d/%m/%Y %H:%M:%S')
        finish =  pd.Timestamp(sims['finish'][i], unit='%d/%m/%Y %H:%M:%S')
        print(sim_name)
        df = run_simulation(sim_source['no_sims'], sim_source['arrival_df'], sim_source['LOS_df'], sim_source['patientflow_df'], start, finish)
        df['Sim_Name'] = sim_name
        df = add_patient_occupancy(df)
        for n in range(no_plots):
            plot_histogram(df, sims['plot_info']['x_axis'][n], sims['plot_info']['plot_type'][n], folder_loc, sims['plot_info']['name'][n], sim_name)
        if prev_df.empty == False:
            df = pd.concat([df, prev_df])
        prev_df = df.copy()
    ##### ADD ALL DESCRIPTIVE COLUMNS TO SIMULATED DATA
    df = add_next_ward(df)
    df = add_hierarical_data(df)
    df = add_events_data(df)
    df = add_analysis_info(df)
    df.to_csv(filename, index=False)
    return df

%time simrun_df = all_sims_and_plots(sim_source)

sim2020
1
0


KeyError: ('Normal', 'Weekday', 'Planned', 'IN-MT-ICU')

## COMBINE ALL DATASETS

In [6]:
df = pd.concat([simrun_df, ideal_df, orig_df])
#df.loc[df.Sim_Name == 'sim2020',:]['Full_Flow_Code'].unique()
#patientflowsum_df.loc[('Low','Weekday','Unplanned','IN-MT'),:].index[0]
#simrun_df.loc[simrun_df.Full_Flow_Code == 'IN-MT-MT', ['Day_Type', 'Event_Activity', 'Admission_Period', 'Prev_Flow', 'Link', 'Sim_Name', 'Sim_No','Ward_Code']]

NameError: name 'simrun_df' is not defined

In [None]:
start_string = '01/01/2020'
start = pd.Timestamp(start_string, unit='%d/%m/%Y %H:%M:%S')
group_categories = date_to_hierarcy(start)
print(group_categories['Event_Activity'],group_categories['Day_Type'],group_categories['Admission_Period'])
ward = simulate_patient_flow(patientflow_df, group_categories, 'IN-MT')
print(ward)

In [11]:
patientflow_df.loc['Normal','Weekday','Planned',]

Unnamed: 0_level_0,Unnamed: 1_level_0,Next_Ward_Cum_Prob
Prev_Flow,Ward_Code,Unnamed: 2_level_1
IN,DS,0.427433
IN,ICU,0.4354
IN,MT,0.99134
IN,OUT,1.0
IN-DS,ICU,0.0107527
IN-DS,MT,0.885305
IN-DS,OUT,1.0
IN-DS-MT,DS,0.0930233
IN-DS-MT,ICU,0.139535
IN-DS-MT,OUT,1.0


In [None]:
######## REVIEW DATA FROM OBSTETRICS CASES
##### CLEAN TRANSFERS DATA
df = pd.read_csv('Obs_ObstetricCases.csv')
df = df.loc[:,('Link', 'Ward','Admit','SeparationDRGGroupCode')]
obs_df = df
#df = df.loc[df.Ward == 'Maternity',]

df = pd.merge(ideal_df, obs_df, how='left', on='Link')
df.loc[(df.Ward.notnull())].groupby(['Ward_Code','Ward']).count()['Link']



In [None]:
obs_df.groupby('SeparationDRGGroupCode').count()['Link']

In [None]:
import plotly.express as px
fig = px.bar(annual_plot_df, x='Year', y='Int_Arrival', title='Interarrival Time by Overall Interarrival mean')
fig.show()

## ARRIVALS PER YEAR

In [None]:
# PREPARE DATA FOR ARRIVALS PLOT
df = ideal_df
df = df.loc[df.Year == 2013,'Start'].min()
daysin2013 = df.dayofyear
percentdaysin2013 = (365 - daysin2013) / 365
df = ideal_df
df = df.loc[df.Year == 2020,'End'].max()
daysin2020 = df.dayofyear
percentdaysin2020 = daysin2020 / 365
df = ideal_df
df = df.loc[df.Int_Arrival.notnull(),('Year','Int_Arrival')]
df = df.groupby('Year').count()
df = df.reset_index()
df = df.rename(columns={'Int_Arrival':'No_of_Patients'})
nopatients2013 = df.loc[df.Year == 2013, 'No_of_Patients']
df.loc[df.Year == 2013, 'No_of_Patients'] = int(nopatients2013 / percentdaysin2013)
nopatients2020 = df.loc[df.Year == 2020, 'No_of_Patients']
df.loc[df.Year == 2020, 'No_of_Patients'] = int(nopatients2020 / percentdaysin2020)
#Export Plot to external sheet
df.to_csv("Arrivals_per_Year.csv", index=False)  
plot_arrivals_year = df
#Show plot of datat
fig = px.line(plot_arrivals_year, x='Year', y='No_of_Patients', title='Number of Arrivals')
fig.show()

##  IDEAL FLOW PROBABILITY

In [None]:
df = ideal_df
total_patients = df.count().iloc[0]
df = df.loc[df.Prev_Flow == 'IN'].groupby(['Full_Flow_Code']).count()['Link']
df = df.reset_index()
df = df.rename(columns={'Link':'No_of_Patients'})
df['Total_Patients'] = total_patients
df['Full_Flow_Code'] = df['Full_Flow_Code'].str[3:-4]
df = df.sort_values('No_of_Patients', ascending = False)
df['Percentage_Patients'] = df.No_of_Patients / df.Total_Patients
df['Cumulative_Probability'] = df.Percentage_Patients.cumsum()
df = df.drop(columns=['No_of_Patients','Total_Patients'])
ideal_patient_flow_prob_plot_df = df
df.to_csv("Ideal_Flow_Probability.csv", index=False)  
#Show plot of data
fig = px.bar(ideal_patient_flow_prob_plot_df, x=df['Full_Flow_Code'], y=df['Percentage_Patients'], title='Patient Flow Probabilities')
fig.add_scatter(x=df['Full_Flow_Code'], y=df['Cumulative_Probability'], name='Cumulative Plot')
fig.show()

In [None]:
ideal_patient_flow_prob_plot_df

## BRANCHING PROBABILITIES

In [None]:
####### Data Table for Pie Charts
# Split Full flow code data up
df = ideal_df.copy(deep=True)
df = df.loc[df.Prev_Flow == 'IN'].groupby('Full_Flow_Code').count()['Link'].reset_index()
df1 = df.Full_Flow_Code.str.split(pat="-", expand=True)
df = pd.merge(df, df1, left_index=True, right_index=True)
df = df.rename(columns={'Link':'Values'})
# format each level correctly for plats
lvl1_df = df.iloc[:]
lvl1_df['Prev_Flow'] = df[0]
lvl1_df['Ward_Code'] = df[1]
lvl2_df = df.iloc[:]
lvl2_df['Prev_Flow'] = df[0] + '-' + df[1]
lvl2_df['Ward_Code'] = df[2]
lvl3_df = df.iloc[:]
lvl3_df['Prev_Flow'] = df[0]+'-'+df[1]+'-'+df[2]
lvl3_df['Ward_Code'] = df[3]
lvl4_df = df.iloc[:]
lvl4_df['Prev_Flow'] = df[0]+'-'+df[1]+'-'+df[2]+'-'+df[3]
lvl4_df['Ward_Code'] = df[4]
lvl5_df = df.iloc[:]
lvl5_df['Prev_Flow'] = df[0]+'-'+df[1]+'-'+df[2]+'-'+df[3]+'-'+df[4]
lvl5_df['Ward_Code'] = df[5]
lvl6_df = df.iloc[:]
lvl6_df['Prev_Flow'] = df[0]+'-'+df[1]+'-'+df[2]+'-'+df[3]+'-'+df[4]+'-'+df[5]
lvl6_df['Ward_Code'] = df[6]
#bring information back into single dataframe
df = pd.concat([lvl1_df,lvl2_df,lvl3_df,lvl4_df,lvl5_df,lvl6_df])
df = df.dropna(subset=['Ward_Code'])
df = df.drop(columns=['Full_Flow_Code',0,1,2,3,4,5,6])
pie_df = df.copy(deep = True)
pie_df.head()

In [None]:
def flow_code_pie_chart(df, prev_flow_code):
    title = prev_flow_code
    filename = 'plots/Patient_Flow/' + title + '.png'
    df = df.loc[df.Prev_Flow == title].groupby('Ward_Code').sum()['Values'].reset_index()
    fig = px.pie(
        data_frame=df,
        values='Values',
        names='Ward_Code',
        color='Ward_Code',
        color_discrete_map = {'MT':'royalblue',
                             'DS':'green',
                             'ICU':'goldenrod',
                             'OUT':'salmon'},
        labels={'Ward_Code':'Next Ward'},
        title=title,
        template='presentation',
        width=800,
        height=600,
        hole=0
    )
    fig.update_traces(textinfo='percent+label')
    #fig.show()
    fig.write_image(filename)
#flow_code_pie_chart(pie_df, 'IN-DS') 
    
def all_flow_code_pie_charts(df):
    temp_df = df.groupby('Prev_Flow').count()['Values'].reset_index()
    for entry in temp_df['Prev_Flow']:
        flow_code_pie_chart(df, entry)
        
%time all_flow_code_pie_charts(pie_df)

## VERIFICATION OF DATA

In [None]:
df = orig_df
#df = ideal_df

In [None]:
df.loc[df.Prev_Flow=='IN'].groupby('Ward_Code').count()['Link']
#df.groupby('Full_Flow_Code').count()['Link']
#df.count()['Link']
#df.loc[df.Prev_Flow=='IN'].groupby('Full_Flow_Code').count()['Link']
#df.groupby('Full_Flow_Code').count()['Link']

In [None]:
#IN-DS-MT-OUT
#df.loc[(df.Full_Flow_Code=='IN-DS-MT-OUT')&(df.Prev_Flow=='IN'),('Link',"Ward_Code",'Start','End','Prev_Flow','Next_Ward')].sort_values(['Link','Start'])
#IN-MT-OUT
df.loc[(df.Full_Flow_Code=='IN-MT-OUT')&(df.Prev_Flow=='IN'),('Link',"Ward_Code",'Start','End','Prev_Flow','Next_Ward')].sort_values(['Link','Start'])

In [None]:
arrivalsum_df.head(30)

In [None]:
patientflowsum_df.head(50)

In [None]:
LOSsum_df.head(50)

## INTERPRETING RESULTS

In [None]:
sim_run.head()

In [None]:
ideal_df.groupby('Year').describe()['Int_Arrival']

## REPORTS

In [None]:
timeconv_df

In [None]:
event_activity_df

In [None]:
dayconv_df