# MEH Pharmacy Department

## Step 1: Import necessary packages

In [None]:
import pandas as pd
import numpy as np

In [None]:
pd.set_option('display.max_columns', None)

## Initial Data Cleaning

### Step 2: Read and combine all excel tabs into one

In [None]:
#Combining all Excel sheets into one --Assuming all have the same headers, else append

f = '../../Data/Pharmacy Dept/MEH TTO Data/MEH TTO Data.xlsx'
df = pd.read_excel(f, sheet_name=None)
df2 = pd.concat(df, ignore_index=True)

df2.head()

In [None]:
df2.info()

### Step 3: Drop unnecessary columns

In [None]:
#drop unnecessary columns
df2.drop(['DurationOverdue', 'Item Type', 'Path', 'Weekday', 'TAT', 'Column1', 'Column2', 'Column3', 'Column4', 'TTODay', 'TTOWeek', 'Office Hours?', 'Office Hours', 'OfficeHours', 'TTOTAT'], axis=1, inplace=True)

### Step 4: Re-format of Columns and Data

Concat same column headers (with different names) and append as new rows

In [None]:
df2.info()

### Step 5: Create Set 1 df with Apr, Nov, Dec 2021, Jan, Feb 2022 Data

In [None]:
#April, Nov, Dec 2021, Jan, Feb 2022

df_set1 =  df2[df2['Date_x0028_Verified_x0029_'].notnull()]
df_set1

#### Step 6: Drop unnecessary columns that do not belong to Set 1 df

In [None]:
#remove unnecessary columns that do not belong to set1

df_set1.drop(['Date(Verified)', 'Date(Checked)', 'Date(Dispensing)', 'Date(PassedNurse)', 'Date(Pigeon)', 'Date(Pharm)', 'Date(Dispensed)', 'Time Verified', 'Case', 'TTOTimeIn'], axis=1, inplace=True)

In [None]:
df_set1.info()

#### Step 7: Rename Columns for future standardization

In [None]:
#Rename Columns for future standardization
set1_new_cols_name = {  'Date_x0028_Checked_x0029_': 'DateTime TTO Checked',
                        'Date_x0028_Dispensed_x0029_': 'DateTime TTO Dispensed',
                        'Date_x0028_Dispensing_x0029_': 'DateTime (Dispensing)',
                        'Date_x0028_Verified_x0029_': 'DateTime TTO Received',
                        'TTOWardIn': 'Ward',
                        'Title': 'Case Number',
                        'Date_x0028_Pharm_x0029_': 'DateTime (in Pharmacy)',
                        'Date_x0028_Pigeon_x0029_': 'DateTime (in Pigeon)', 
                        'Date_x0028_NurseDispensed_x0029_': 'DateTime (Dispensed by Nurses)',
                        'Date_x0028_PassedNurse_x0029_': 'DateTime (Passed to Nurse)'}

df_set1.rename(columns=set1_new_cols_name, inplace=True)

df_set1.head()

#### Step 8: Change DateTime ISO format to +08:00 for all relevant columns

In [None]:
df_set1['DateTime TTO Checked'] = pd.to_datetime(df_set1['DateTime TTO Checked']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime TTO Dispensed'] = pd.to_datetime(df_set1['DateTime TTO Dispensed']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime (Dispensing)'] = pd.to_datetime(df_set1['DateTime (Dispensing)']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime TTO Received'] = pd.to_datetime(df_set1['DateTime TTO Received']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime (in Pharmacy)'] = pd.to_datetime(df_set1['DateTime (in Pharmacy)']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime (in Pigeon)'] = pd.to_datetime(df_set1['DateTime (in Pigeon)']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime (Dispensed by Nurses)'] = pd.to_datetime(df_set1['DateTime (Dispensed by Nurses)']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')
df_set1['DateTime (Passed to Nurse)'] = pd.to_datetime(df_set1['DateTime (Passed to Nurse)']).dt.tz_convert('Asia/Singapore').dt.strftime('%Y-%m-%d %H:%M:%S')

df_set1.head()

### Step 9: Create Set 2 df with May, June, Aug 2021 Data

In [None]:
#May, June, Aug 2021

df_set2 = df2[df2['Date(Verified)'].notnull() & df2['Case'].isnull()]
df_set2

#### Step 10: Remove unnecessary columns that do not belong to Set2

In [None]:
df_set2.drop(['Date_x0028_Checked_x0029_', 'Date_x0028_Dispensed_x0029_', 'Date_x0028_Dispensing_x0029_', 'Date_x0028_Verified_x0029_', 'TTOTimeIn', 'Date_x0028_Pharm_x0029_', 'Date_x0028_Pigeon_x0029_', 'Date_x0028_NurseDispensed_x0029_', 'Date_x0028_PassedNurse_x0029_', 'Case', 'Time Verified'], axis=1, inplace=True)

In [None]:
df_set2.info()

#### Step 11: Rename Columns for future standardization

In [None]:
set2_new_cols_name = {  'TTOWardIn': 'Ward',
                        'Title': 'Case Number',
                        'Date(Verified)': 'DateTime TTO Received',
                        'Date(Checked)': 'DateTime TTO Checked',
                        'Date(Dispensing)': 'DateTime (Dispensing)',
                        'Date(PassedNurse)': 'DateTime (Passed to Nurse)',
                        'Date(Pigeon)': 'DateTime (in Pigeon)',
                        'Date(Pharm)': 'DateTime (in Pharmacy)',
                        'Date(Dispensed)': 'DateTime TTO Dispensed'}

df_set2.rename(columns=set2_new_cols_name, inplace=True)

df_set2.head()

#### Step 12: Change datetime64[ns] to object

In [None]:
df_set2['DateTime TTO Received'] = df_set2['DateTime TTO Received'].astype(object)
df_set2['DateTime TTO Checked'] = df_set2['DateTime TTO Checked'].astype(object)
df_set2['DateTime (Dispensing)'] = df_set2['DateTime (Dispensing)'].astype(object)
df_set2['DateTime (Passed to Nurse)'] = df_set2['DateTime (Passed to Nurse)'].astype(object)
df_set2['DateTime (in Pigeon)'] = df_set2['DateTime (in Pigeon)'].astype(object)
df_set2['DateTime (in Pharmacy)'] = df_set2['DateTime (in Pharmacy)'].astype(object)
df_set2['DateTime TTO Dispensed'] = df_set2['DateTime TTO Dispensed'].astype(object)

df_set2.info()

### Step 13: Create Set 3 df with Sept 2021 Data

In [None]:
df_set3 = df2[df2['Date(Verified)'].notnull() & df2['Case'].notnull()]
df_set3

#### Step 14: Remove unnecessary columns that do not belong to Set3

In [None]:
df_set3.drop(['Date_x0028_Checked_x0029_', 'Date_x0028_Dispensed_x0029_', 'Date_x0028_Dispensing_x0029_', 'Date_x0028_Verified_x0029_', 'TTOTimeIn', 'Date_x0028_Pharm_x0029_', 'Date_x0028_Pigeon_x0029_', 'Date_x0028_NurseDispensed_x0029_', 'Date_x0028_PassedNurse_x0029_', 'Title', 'Time Verified'], axis=1, inplace=True)

In [None]:
df_set3.info()

#### Step 15: Rename Columns for future standardization

In [None]:
set3_new_cols_name = {  'TTOWardIn': 'Ward',
                        'Case': 'Case Number',
                        'Date(Verified)': 'DateTime TTO Received',
                        'Date(Checked)': 'DateTime TTO Checked',
                        'Date(Dispensing)': 'DateTime (Dispensing)',
                        'Date(PassedNurse)': 'DateTime (Passed to Nurse)',
                        'Date(Pigeon)': 'DateTime (in Pigeon)',
                        'Date(Pharm)': 'DateTime (in Pharmacy)',
                        'Date(Dispensed)': 'DateTime TTO Dispensed'}

df_set3.rename(columns=set3_new_cols_name, inplace=True)

df_set3.head()

### Step 16: Merge 3 Sets into dfinal

In [None]:
dfinal = pd.concat([df_set1, df_set2, df_set3]).reset_index()
dfinal.drop('index', axis=1, inplace=True)

dfinal

#### Step 17: Export Re-format to CSV

In [None]:
#export to csv
dfinal.to_csv('../../Data/Pharmacy Dept/MEH TTO Data/MEH_TTO_Data_Merged.csv', header=True, index=False)

## Data Cleaning

### Step 18: Read Merged Data

In [None]:
#read csv for further cleaning
dfinal_csv = pd.read_csv('../../Data/Pharmacy Dept/MEH TTO Data/MEH_TTO_Data_Merged.csv')
dfinal_csv

In [None]:
dfinal_csv.info()

### Step 19: Fix Data Type(s) for Time

In [None]:
#Change all Time Column to datetime
dfinal_csv['DateTime TTO Checked'] = pd.to_datetime(dfinal_csv['DateTime TTO Checked'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime TTO Dispensed'] = pd.to_datetime(dfinal_csv['DateTime TTO Dispensed'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime (Dispensing)'] = pd.to_datetime(dfinal_csv['DateTime (Dispensing)'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime TTO Received'] = pd.to_datetime(dfinal_csv['DateTime TTO Received'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime (in Pharmacy)'] = pd.to_datetime(dfinal_csv['DateTime (in Pharmacy)'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime (in Pigeon)'] = pd.to_datetime(dfinal_csv['DateTime (in Pigeon)'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime (Dispensed by Nurses)'] = pd.to_datetime(dfinal_csv['DateTime (Dispensed by Nurses)'], infer_datetime_format=True, errors='coerce')
dfinal_csv['DateTime (Passed to Nurse)'] = pd.to_datetime(dfinal_csv['DateTime (Passed to Nurse)'], infer_datetime_format=True, errors='coerce')

In [None]:
#Append 'Date' Column
dfinal_csv['Date'] = dfinal_csv['DateTime TTO Received'].dt.date

In [None]:
dfinal_csv.head()

### Step 20: Rename Columns

In [None]:
#dfinal_cols_name = {'DateTime TTO Checked': 'Time TTO Checked',
#                    'DateTime TTO Dispensed': 'Time (Dispensed)',
#                    'DateTime (Dispensing)': 'Time (Dispensing)',
#                    'DateTime TTO Received': 'Time TTO Received',
#                    'DateTime (in Pharmacy)': 'Time (in Pharmacy)',
#                    'DateTime (in Pigeon)': 'Time (in Pigeon)',
#                    'DateTime (Dispensed by Nurses)': 'Time (Dispensed by Nurse)',
#                    'DateTime (Passed to Nurse)': 'Time (Passed to Nurse)'}
#
#dfinal_csv.rename(columns=dfinal_cols_name, inplace=True)
#
#dfinal_csv.head()

## EDA

### Step 21: Get Day of Week

In [None]:
dfinal_csv['Weekday'] = pd.to_datetime(dfinal_csv['Date']).dt.day_name()

#Assuming no NULL values in 'Date'
dfinal_csv.head()

### Time TTO Dispensed

Disclaimer: Time TTO Dispensed Recalculation is not needed due to change in workflow (Based on Clarification call on 15/03)

In due of various workflow conditions for MEH, re-calculation of Time TTO Dispensed is needed

In [None]:
#ASSUMPTIONS
#if dispensed by nurse -> time tto dispensed = time tto checked
#if passed to nurse -> time tto dispensed = time tto checked
#if in pigeon -> time tto dispensed = time tto checked
#else time tto dispensed = time (dispensed)

#dfinal_csv['Time TTO Dispensed'] = np.where(((dfinal_csv['Time (Dispensed by Nurse)'].notnull()) | 
#                                        (dfinal_csv['Time (Passed to Nurse)'].notnull()) | 
#                                        (dfinal_csv['Time (in Pigeon)'].notnull())), 
#                                        dfinal_csv['Time TTO Checked'], dfinal_csv['Time (Dispensed)'])
#
#dfinal_csv

In [None]:
#dfinal_csv[dfinal_csv['Time TTO Dispensed'].isnull()]

### Step 22: Get Overall Time Taken (TAT)

In [None]:
#ASSUMPTIONS: Time TTO Dispensed is not null
#if in Pharmacy -> total time taken = duration (Checked - Received) + duration (dispensed - dispensing)
#else total time taken = duration (dispensed - received) --> INVALID after new workflow

def duration(end, start):
    difference = 0

    difference = (pd.to_datetime(end.astype(str)) - pd.to_datetime(start.astype(str))).dt.total_seconds()/60
    return difference

dfinal_csv['TAT'] = np.where(dfinal_csv['DateTime TTO Dispensed'].notnull(),
                                            (duration(dfinal_csv['DateTime TTO Checked'], dfinal_csv['DateTime TTO Received']) + duration(dfinal_csv['DateTime TTO Dispensed'], dfinal_csv['DateTime (Dispensing)'])),
                                            #(duration(dfinal_csv['DateTime TTO Dispensed'], dfinal_csv['DateTime TTO Received']))
                                            np.NaN)

dfinal_csv

In [None]:
dfinal_csv[dfinal_csv['TAT'].isnull()]

### Step 23: Create Meet KPI column with yes/no value

MEH: Total Time Taken < 45mins = Yes, else: No.

In [None]:
dfinal_csv['Meet KPI'] = np.where((dfinal_csv['TAT'].isnull()), 'NA',
                                    np.where((dfinal_csv['TAT'] <= (pd.to_datetime('00:45:00')).minute),
                                            'Yes', 'No'))

dfinal_csv

In [None]:
dfinal_csv[dfinal_csv['Meet KPI']=="No"]

### Step 24: Create Office Hours column to check if case handled during offcie hours

Standard Office Hours: 8:30AM to 5:00PM

Taking into assumption that as long as Time TTO Dispensed is before 5:00PM

In [None]:
dfinal_csv['Office Hours'] = np.where(  #dfinal_csv['Time TTO Received'].isnull() | 
                                        dfinal_csv['DateTime TTO Dispensed'].isnull() , 'NA', 
                                (np.where(  #(dfinal_csv['Time TTO Received'] >= pd.to_datetime('08:30:00').time()) & 
                                            (dfinal_csv['DateTime TTO Dispensed'].dt.time <= pd.to_datetime('17:00:00').time()),
                                            'Yes', 'No')))

dfinal_csv

In [None]:
dfinal_csv[dfinal_csv['Office Hours']=="No"]

### Step 25: Calculate Time Taken for Nurses to Dispense

Calculation for Staff Productivity Purposes

Duration Difference of Passed to Nurse & Nurse Dispensed

In [None]:
#Taking duration formula from Overall Time Taken (TAT) as ref to calculate
dfinal_csv['Time Taken (Nurse to Dispense)'] = np.where((dfinal_csv['DateTime (Passed to Nurse)'].notnull() & dfinal_csv['DateTime (Dispensed by Nurses)'].notnull()),
                                                        (duration(dfinal_csv['DateTime (Dispensed by Nurses)'], dfinal_csv['DateTime (Passed to Nurse)'])), np.NaN)

dfinal_csv

In [None]:
#Convert to Seconds 
dfinal_csv['Time Taken (Nurse to Dispense)'] = dfinal_csv['Time Taken (Nurse to Dispense)']*60
dfinal_csv[dfinal_csv['Time Taken (Nurse to Dispense)'].notnull()].head()

In [None]:
#Convert Time Taken (Nurse to Dispense) to timedelta format to show  x days x hr x min x sec
dfinal_csv['Time Taken (Nurse to Dispense)'] = pd.to_timedelta(dfinal_csv['Time Taken (Nurse to Dispense)'], 's')
dfinal_csv[dfinal_csv['Time Taken (Nurse to Dispense)'].notnull()].head()

### Step 26: Create Average Time Taken from (Received to Checked) & (Checked to Dispensed) columns

In [None]:
#Get Duration Difference of Received to Checked
dfinal_csv['Time Taken (Received to Checked)'] = (dfinal_csv['DateTime TTO Checked'] - dfinal_csv['DateTime TTO Received'])
#Get Duration Difference of Checked to Dispensed
dfinal_csv['Time Taken (Checked to Dispensed)'] = (dfinal_csv['DateTime TTO Dispensed'] - dfinal_csv['DateTime TTO Checked'])

In [None]:
dfinal_csv.head()

#### Average time taken per day

In [None]:
#Create new dataframe to get results of Average Time Taken for each (Received to Checked) & (Checked to Dispensed)
dfinal_avg_time = dfinal_csv[['Date', 'Time Taken (Received to Checked)', 'Time Taken (Checked to Dispensed)']]
dfinal_avg_time.head()

In [None]:
#Convert NaT to 00:00:00
dfinal_avg_time.loc[dfinal_avg_time['Time Taken (Received to Checked)'].isnull(), 'Time Taken (Received to Checked)'] = pd.to_timedelta(0)
dfinal_avg_time.loc[dfinal_avg_time['Time Taken (Checked to Dispensed)'].isnull(), 'Time Taken (Checked to Dispensed)'] = pd.to_timedelta(0)

In [None]:
#Get average time taken per day
dfinal_avg_time = dfinal_avg_time.groupby(pd.to_datetime(dfinal_avg_time['Date']).dt.date).mean(numeric_only=False)
dfinal_avg_time

In [None]:
#Remove miliseconds
dfinal_avg_time['Time Taken (Received to Checked)'] = dfinal_avg_time['Time Taken (Received to Checked)'].dt.floor('s')
dfinal_avg_time['Time Taken (Checked to Dispensed)'] = dfinal_avg_time['Time Taken (Checked to Dispensed)'].dt.floor('s')

dfinal_avg_time

In [None]:
#Rename Columns
dfinal_avg_time = dfinal_avg_time.rename({'Time Taken (Received to Checked)' : 'Avg Time Taken / Day (Received to Checked)', 'Time Taken (Checked to Dispensed)' : 'Avg Time Taken / Day (Checked to Dispensed)'}, axis=1)

In [None]:
#Merge the results from grouby for Avg Time Taken
dfinal_csv = dfinal_csv.merge(dfinal_avg_time, on='Date', how='left')
dfinal_csv

In [None]:
#Replace 00:00:00 in time columns with np.NaN
dfinal_csv['Time Taken (Received to Checked)'] = dfinal_csv['Time Taken (Received to Checked)'].replace(pd.Timedelta(0), np.NaN)
dfinal_csv['Time Taken (Checked to Dispensed)'] = dfinal_csv['Time Taken (Checked to Dispensed)'].replace(pd.Timedelta(0), np.NaN)
dfinal_csv['Avg Time Taken / Day (Received to Checked)'] = dfinal_csv['Avg Time Taken / Day (Received to Checked)'].replace(pd.Timedelta(0), np.NaN)
dfinal_csv['Avg Time Taken / Day (Checked to Dispensed)'] = dfinal_csv['Avg Time Taken / Day (Checked to Dispensed)'].replace(pd.Timedelta(0), np.NaN)

In [None]:
dfinal_csv

#### Average time taken per month

In [None]:
dfinal_csv['Month']= pd.to_datetime(dfinal_csv['Date']).dt.month_name()

In [None]:
dfinal_avg_time = dfinal_csv[['Date', 'Time Taken (Received to Checked)', 'Time Taken (Checked to Dispensed)']]

dfinal_avg_time['Month']= pd.to_datetime(dfinal_avg_time['Date']).dt.month_name()

#Get average time taken per month
dfinal_avg_time= dfinal_avg_time.groupby('Month').agg({'Time Taken (Received to Checked)': np.sum, 'Time Taken (Checked to Dispensed)': np.sum})

x = dfinal_avg_time['Time Taken (Received to Checked)'] / np.timedelta64(1, 'm')
y = dfinal_csv.groupby('Month')['Time Taken (Received to Checked)'].count()

dfinal_avg_time['Avg Time Taken / Month (Received to Checked)'] = (x/y)

dfinal_avg_time['Avg Time Taken / Month (Received to Checked)'] = pd.to_datetime(dfinal_avg_time['Avg Time Taken / Month (Received to Checked)'], unit='m').apply(lambda x: x.strftime("%H:%M:%S"))

In [None]:
a = dfinal_avg_time['Time Taken (Checked to Dispensed)'] / np.timedelta64(1, 'm')
b = dfinal_csv.groupby('Month')['Time Taken (Checked to Dispensed)'].count()

dfinal_avg_time['Avg Time Taken / Month (Checked to Dispensed)'] = (a/b)

dfinal_avg_time['Avg Time Taken / Month (Checked to Dispensed)'] = pd.to_datetime(dfinal_avg_time['Avg Time Taken / Month (Checked to Dispensed)'], unit='m').apply(lambda x: x.strftime("%H:%M:%S"))

In [None]:
dfinal_avg_time = dfinal_avg_time.drop(dfinal_avg_time.columns[[0, 1]], axis=1)

dfinal_csv = dfinal_csv.merge(dfinal_avg_time, on='Month', how='left')

dfinal_csv['Avg Time Taken / Month (Received to Checked)'] = pd.to_timedelta(dfinal_csv['Avg Time Taken / Month (Received to Checked)'])
dfinal_csv['Avg Time Taken / Month (Checked to Dispensed)'] = pd.to_timedelta(dfinal_csv['Avg Time Taken / Month (Checked to Dispensed)'])
dfinal_csv

### Step 27: Calculate DateTime (in Pigeon) 

This calculation is for Power BI visualization of the No. of Cases that are needed to be placed in Pigeon. 

Calculate count = 1, if [DateTime (in Pigeon)] AND [DateTime TTO Dispensed] AND [DateTime (Dispensed by Nurse)] == np.NaN

In [None]:
dfinal_csv['Case in Pigeon?'] = np.where((dfinal_csv['DateTime (in Pigeon)'].notnull() &
                                        dfinal_csv['DateTime TTO Dispensed'].isnull() & 
                                        dfinal_csv['DateTime (Dispensed by Nurses)'].isnull()), 
                                        1, 0)

dfinal_csv

In [None]:
dfinal_csv[dfinal_csv['Case in Pigeon?']==1]

## Step 28: Export CLEANED to csv

In [None]:
#export to csv
dfinal_csv.to_csv('../../Data/Pharmacy Dept/Data Cleaning/MEH_Data_Cleaned.csv', header=True, index=False)