In [None]:
# GOAL: predict the patient triage outcomes in emergency settings using supervised machine learning approaches

## **GOAL**: predict the patient triage outcomes in emergency settings using supervised machine learning approaches

## 1: IMPORT THE NECESSARY LIBRARIES

In [None]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')


## 2. EXPLORE THE DATA

In [None]:
# LOAD SAMPLE PATIENT TRIAGE DATA
dataTriage = pd.read_csv('https://github.com/stashing/testing/raw/main/sampTriData.csv')
dataTriage.columns

Index(['sex', 'arrivalMode', 'injury', 'pain', 'nrsPain', 'sbp', 'dbp',
       'triageScore', 'ageGroup', 'medCondi'],
      dtype='object')

In [None]:
dataTriage.head(10)

Unnamed: 0,sex,arrivalMode,injury,pain,nrsPain,sbp,dbp,triageScore,ageGroup,medCondi
0,1,3,1,1,2.0,160,100,2,3,0
1,0,3,1,1,2.0,137,75,4,2,1
2,0,2,1,1,2.0,130,80,4,3,1
3,1,1,0,1,3.0,139,94,4,3,0
4,1,3,0,1,3.0,91,67,4,2,1
5,0,4,0,1,3.0,140,90,3,2,0
6,0,3,0,1,3.0,110,70,2,2,0
7,0,3,0,1,3.0,169,86,2,3,0
8,1,3,0,1,3.0,140,75,4,1,0
9,1,3,0,1,3.0,130,80,4,1,1


| VARIABLE    | DESCRIPTION                                    | TYPE    | GUIDELINE                                                                                                                                  |
| ----------- | ---------------------------------------------- | ------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
| sex         | sex                                            | numeric | 0: female, 1: male                                                                                                                         |
| ageGroup    | age group                                      | numeric | 0: <20 y/o; 1: 20 to 39; 2: 40 to 59; 3: 60 y/o and above                                                                                  |
| arrivalMode | arrival mode                                   | numeric | 0: others, 1: walk-in; 2: emergency hotline, 3: private car; 4: ambulance; 5: public transportation; 6: wheelchair                         |
| pain        | pain                                           | numeric | 0: non-pain, 1: pain                                                                                                                       |
| injury      | injury                                         | numeric | 0: non-injury, 1: injury                                                                                                                   |
| nrsPain     | numeric rating scale                           | numeric | 0: N/A, 1-10                                                                                                                               |
| sbp         | systolic blood pressure (mmHg)                 | numeric |                                                                                                                                            |
| dbp         | diastolic blood pressure (mmHg)                | numeric |                                                                                                                                            |
| triageScore | score based on Australasian Triage Scale (ATS) | numeric | 1: red; seen immediately, 2: orange; seen within 10 min, 3: green; seen within 30 min; 4: blue; seen within 1 h, 5: white; seen within 2 h |
| medCondi    | with medical condition?                        | numeric | 0: No, 1: Yes                                                                                                                              |


In [None]:
# glasgow coma scale (3-15 scale; 3- nonresponsive)



### Australasian Triage Scale (ATS)
![Australasian Triage Scale Figure](https://uc4478506a64356fe21b2abae492.previews.dropboxusercontent.com/p/thumb/ACM0jmF31fkPVdiGFW1SegZFiX_NTISPa2angwlIhtaFB_gOFIvWFyFLlsepruiYWUsPb0kLoKf_j-MdLicZXXlI_kE4SODGoSRQV8bBGPxO_7Ysb7VcrGCqniHMnpPrmk4g4EoNtlqSR80B8hH5vXP9M-9Yr8w8iliMZRjrf9Cb6_Fr6qvwii5FOgBPDg1E9xvYeD6NTM2wdN06gU9SJ_QJctBv1xXYXFnnWQOWMO4YOQqwqWN4wlRCs-uZHvKv0dNBDfjbaoxlck1zQoRLhnijT1SeMjs2PsmdemqzLfwuLvH8GqGvp1uG8OE_lnNGavvmn6lO8nR0ey02yLbZ8H_UxuT9MTg498tldn7gn7QGdWi-hllioLkWjDPKotpSfXNe8zmo4QUh_pY25M1FnraL/p.jpeg)

In [None]:
dataTriage.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
sex,1000.0,0.525,0.499624,0.0,0.0,1.0,1.0,1.0
arrivalMode,1000.0,2.819,0.803047,0.0,2.0,3.0,3.0,6.0
injury,1000.0,0.182,0.386038,0.0,0.0,0.0,0.0,1.0
pain,1000.0,0.571,0.495181,0.0,0.0,1.0,1.0,1.0
nrsPain,569.0,4.047452,1.427668,1.0,3.0,4.0,5.0,10.0
sbp,1000.0,133.551,26.880066,50.0,114.75,130.0,150.0,275.0
dbp,1000.0,79.666,15.0667,31.0,70.0,80.0,90.0,160.0
triageScore,1000.0,3.338,0.903644,1.0,3.0,3.0,4.0,5.0
ageGroup,1000.0,2.127,0.882978,0.0,1.0,2.0,3.0,3.0
medCondi,1000.0,0.506,0.500214,0.0,0.0,1.0,1.0,1.0


Not all datasets are provided to us ***clean***. It is recommended to check these datasets first before doing analyses.

In [None]:
dataTriageInfo = pd.DataFrame(dataTriage.dtypes, columns=['Dtype'])
dataTriageInfo['Unique'] = dataTriage.nunique().values
dataTriageInfo['Null'] = dataTriage.isnull().sum().values
dataTriageInfo

Unnamed: 0,Dtype,Unique,Null
sex,int64,2,0
arrivalMode,int64,7,0
injury,int64,2,0
pain,int64,2,0
nrsPain,float64,10,431
sbp,int64,122,0
dbp,int64,78,0
triageScore,int64,5,0
ageGroup,int64,4,0
medCondi,int64,2,0


In [None]:
dataTriage.isna().sum()

sex              0
arrivalMode      0
injury           0
pain             0
nrsPain        431
sbp              0
dbp              0
triageScore      0
ageGroup         0
medCondi         0
dtype: int64

In [None]:
dataTriage.loc[dataTriage.nrsPain.isna()]

Unnamed: 0,sex,arrivalMode,injury,pain,nrsPain,sbp,dbp,triageScore,ageGroup,medCondi
28,1,2,0,0,,180,82,2,2,1
29,1,3,0,0,,124,81,2,1,1
30,0,3,0,1,,91,74,2,3,1
31,0,2,0,0,,162,75,2,1,0
32,1,2,0,0,,116,77,2,1,0
...,...,...,...,...,...,...,...,...,...,...
995,1,2,0,0,,130,80,2,3,0
996,1,4,0,0,,80,50,2,3,0
997,1,2,0,0,,120,80,2,3,1
998,1,3,0,0,,120,80,4,3,0


In [None]:
dataTriage.dtypes

sex              int64
arrivalMode      int64
injury           int64
pain             int64
nrsPain        float64
sbp              int64
dbp              int64
triageScore      int64
ageGroup         int64
medCondi         int64
dtype: object

In [None]:
dataTriage.nrsPain.unique()

array([ 2.,  3.,  4.,  5.,  6.,  8.,  9., nan,  1.,  7., 10.])

### Let's remove these pesky null values!

In [None]:
# Replace np.nan in 'nrsPain' column with 0
dataTriage['nrsPain'].replace(np.nan, 0, inplace=True)

In [None]:
dataTriage.nrsPain.unique()

array([ 2.,  3.,  4.,  5.,  6.,  8.,  9.,  0.,  1.,  7., 10.])

In [None]:
dataTriage['nrsPain'] = dataTriage['nrsPain'].astype('int64')

In [None]:
for i in dataTriage.columns:
    print(dataTriage[i].value_counts())

sex
1    525
0    475
Name: count, dtype: int64
arrivalMode
3    598
2    202
4    125
1     63
6      8
5      2
0      2
Name: count, dtype: int64
injury
0    818
1    182
Name: count, dtype: int64
pain
1    571
0    429
Name: count, dtype: int64
nrsPain
0     431
3     232
4     111
5     105
6      51
2      33
7      23
8       8
10      3
1       2
9       1
Name: count, dtype: int64
sbp
120    98
110    97
140    73
130    69
100    66
       ..
174     1
195     1
189     1
214     1
213     1
Name: count, Length: 122, dtype: int64
dbp
80     150
70     109
60      96
90      80
100     76
      ... 
52       1
117      1
53       1
114      1
36       1
Name: count, Length: 78, dtype: int64
triageScore
4    380
3    340
2    190
5     82
1      8
Name: count, dtype: int64
ageGroup
3    434
2    286
1    253
0     27
Name: count, dtype: int64
medCondi
1    506
0    494
Name: count, dtype: int64


In [None]:
# 'arrivalMode' column
# 0: Others / 1: Walk-in / 2: emergency hotline / 3: Private car / 4: Private ambulance / 5: Public transportation / 6: Wheelchair

dataTriage.arrivalMode.value_counts()

arrivalMode
3    598
2    202
4    125
1     63
6      8
5      2
0      2
Name: count, dtype: int64

In [None]:
# [PRESENTATION]
# create dataTriage_cleaned as a copy of dataTriage
dataTriage_cleaned = dataTriage.copy()

# save the dataframe dataTriage_cleaned
dataTriage_cleaned.to_csv('/content/sampleTriageData_cleaned.csv',header=True,index=False)

## 3. MODELING


![normalization](https://uce2b65cf459c312c0f8f570704c.previews.dropboxusercontent.com/p/thumb/ACMMOUsCGx1rFaFOl6Cphw5QaMVqnoh4HkEkvUhIwGiZLwyVzJg6UO4aP2E9HXzFsfIG7gK1yCtrwdqRMjzaSIrsMk_inNKqKa08fZMXs-In6BR0HjPLZX1lM19Uf1O-GD_W6vG0KDsYZ4hNGPWtYbilz9YTuWggJmVuyVjownS0nNe_pQjWD-He8rM0vYlTT8ry6JDYAkPPo0wW6c6TeqSM0gN_M_2Aa7U4pT-qnICcIn09NM0BjBAmUIS5_yruTa7_LK7tJTFX1GIOmhkwUGURtD2CMmXi9VohOe_fhu7GGXgqLJho1TEZgfYl0vAVDAbO_fzoRqVOls66uUidRHrxMJnlx3cV4zS6egs-yB5zETsuCFN0mXRPGkFPJPiHkOVVRToMIilssManIjHRsBdn/p.jpeg)



In [None]:
# [PRESENTATION]
# transform sbp and dbp using ln(z)
import math
for bp in ['sbp', 'dbp']:
    dataTriage_cleaned[bp] = dataTriage_cleaned[bp].apply(lambda z: math.log(z))

In [None]:
dataTriage_cleaned.describe().T
# dataTriage.describe([0.10,0.25,0.50,0.75,0.90,0.95,0.99]).T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
sex,1000.0,0.525,0.499624,0.0,0.0,1.0,1.0,1.0
arrivalMode,1000.0,2.819,0.803047,0.0,2.0,3.0,3.0,6.0
injury,1000.0,0.182,0.386038,0.0,0.0,0.0,0.0,1.0
pain,1000.0,0.571,0.495181,0.0,0.0,1.0,1.0,1.0
nrsPain,1000.0,2.303,2.276043,0.0,0.0,3.0,4.0,10.0
sbp,1000.0,4.874615,0.19996,3.912023,4.742749,4.867534,5.010635,5.616771
dbp,1000.0,4.359558,0.193887,3.433987,4.248495,4.382027,4.49981,5.075174
triageScore,1000.0,3.338,0.903644,1.0,3.0,3.0,4.0,5.0
ageGroup,1000.0,2.127,0.882978,0.0,1.0,2.0,3.0,3.0
medCondi,1000.0,0.506,0.500214,0.0,0.0,1.0,1.0,1.0


In [None]:
# 1-5

![trainTest](https://uc8ea39b24171cd7ef66fed6f178.previews.dropboxusercontent.com/p/thumb/ACO_vV45UJIifWNLjHkekPYqZqu3hIxxBEX5cktlNTZJ1J5jYEEtvjEeA5O4PxbaWuMlKPNbbdndqhBJRRAJlTGhIMuKVr0x0UG07SpA6I5b_WdmHlU6Sz01yNAOen8d59Kvq_JtMJ2HgFXJUR-3T9bZsmm3klK-LYX8S_npxXFPFUDF50HgeaXIRZEPIJmodGQCo5Ad3uvw1fkWVzQFT_mI7nhAV97xfhAYgmTzpcWzYYcswWTxHkFYf9DMphx9nO2DeKPDKsONQ8-4ZkYRnbOmogVL8cPBpukc7iaArdkWf6zemQQQqTXViDjD0Q9VsOJbwMUq_Udiqs3PqFj2C737iUy9669VFG4aemljtAJ3D7LJh7OwcIIEhq_RucqHRgw/p.jpeg)

In [None]:
# use a machine learning model that predicts 'triageScore'. will be using MLP.


from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# The dataTriage dataframe is split into features and target variable
X = dataTriage_cleaned.drop('triageScore', axis=1)
y = dataTriage_cleaned['triageScore']

seedNum = 1001

# todo: include a diagram/quick explainer of splitting the data into training and tests
# The datasets X and y are split into training and testing sets
# train-test ratio: 75:25
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seedNum)

# todo: include a diagram that strips down technical info but provides the importance of this in the discussion
# Construct the multilayer perceptron (MLP) model using MLPClassifier
# 25 hidden layers
# relu activation
# adam (optimization algorithm / "solver")
# alpha=0.05
# adaptive learning rate
mlpTriage = MLPClassifier(hidden_layer_sizes=(25,), activation='relu', solver='adam', alpha=0.05, learning_rate='adaptive', random_state=42)

# Fit the MLP model with the training data -> perform predictions on the test set
mlpTriage.fit(X_train, y_train)
y_pred = mlpTriage.predict(X_test)

# Since this is a multiclass classification, we use
# accuracy score = (number of correct predictions)/(total predictions)
accuracyMLP = accuracy_score(y_test, y_pred)



In [None]:
print(accuracyMLP)

0.544


In [None]:
# todo: include image of grid search with k-fold cross validation

In [None]:
# WAYS TO IMPROVE THE MLP model
# We use grid search with 5-fold cross-validation over a parameter grid; scoring is based on accuracy score


# The parameter grid for the MLP model is set as follows:
parameterGridMLP = {
    'hidden_layer_sizes': [(25,), (50,)],
    'activation': ['relu', 'tanh'],
    'solver': ['adam', 'sgd'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}


# Initialize the MLP model
mlpTriageG = MLPClassifier(random_state=seedNum)


# Setting up GridSearchCV for each model
# We'll be employing a grid search with 5-fold cross-validation (stratified CV)
gridSearchMLP = GridSearchCV(mlpTriageG, parameterGridMLP, cv=5, scoring='accuracy', verbose=1)

# Fit the models -> perform predictions based on the best parameters -> calculate the accuracy scores
gridSearchMLP.fit(X_train, y_train)
predMLP_G = gridSearchMLP.predict(X_test)
accuracyMLP = accuracy_score(y_test, predMLP_G)

# Print accuracy score and the best parameters the MLP model
print(accuracyMLP, gridSearchMLP.best_params_)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
0.528 {'activation': 'tanh', 'alpha': 0.05, 'hidden_layer_sizes': (50,), 'learning_rate': 'constant', 'solver': 'adam'}


In [None]:
gridSearchMLP.best_params_

{'activation': 'tanh',
 'alpha': 0.05,
 'hidden_layer_sizes': (50,),
 'learning_rate': 'constant',
 'solver': 'adam'}

In [None]:
# 0: Others / 1: Walk-in / 2: emergency hotline / 3: Private car / 4: Private ambulance / 5: Public transportation / 6: Wheelchair
# 0: <20, 1: 20-39, 2: 40-59, 3: above 60

In [None]:
# CONSIDER SOME CASES!

In [None]:
# female, 20-39 y/o, walk-in, no injury, no pain, no nrs pain, 122, 82, no medical condition
import numpy as np
import pandas as pd
samplePatient0 = {
    'sex': 0,
    'arrivalMode': 1,
    'injury': 0,
    'pain': 0,
    'nrsPain': 0,
    'sbp': np.log(122),
    'dbp': np.log(82),
    'ageGroup': 1,
    'medCondi': 0

}

gridSearchMLP.predict(pd.DataFrame([samplePatient0]))[0]

3

In [None]:
# male, 40-59 y/o,private car, injury, in pain, nrs pain of 6, 130, 95, with medical condition

samplePatient1 = {
    'sex': 1,
    'arrivalMode': 3,
    'injury': 1,
    'pain': 1,
    'nrsPain': 8,
    'sbp': np.log(130),
    'dbp': np.log(95),
    'ageGroup': 2,
    'medCondi': 1

}

gridSearchMLP.predict(pd.DataFrame([samplePatient1]))[0]

3

In [None]:
# male, 40-59 y/o,private car, no injury, no pain, no nrs pain, 128, 90, with medical condition

samplePatient2 = {
    'sex': 1,
    'arrivalMode': 3,
    'injury': 0,
    'pain': 0,
    'nrsPain': 0,
    'sbp': np.log(128),
    'dbp': np.log(90),
    'ageGroup': 2,
    'medCondi': 1

}

gridSearchMLP.predict(pd.DataFrame([samplePatient2]))[0]

3

In [None]:
# female, above 60 y/o,private car, no injury, no pain, no nrs pain, 135, 100, with medical condition

samplePatient3 = {
    'sex': 0,
    'arrivalMode': 3,
    'injury': 0,
    'pain': 0,
    'nrsPain': 0,
    'sbp': np.log(135),
    'dbp': np.log(100),
    'ageGroup': 3,
    'medCondi': 1

}

gridSearchMLP.predict(pd.DataFrame([samplePatient3]))[0]

3

In [None]:
dataTriage_cleaned.loc[dataTriage_cleaned['triageScore']==1]

Unnamed: 0,sex,arrivalMode,injury,pain,nrsPain,sbp,dbp,triageScore,ageGroup,medCondi
26,1,4,1,1,8,4.382027,3.912023,1,3,1
132,0,4,0,0,0,4.60517,4.094345,1,3,1
639,0,2,0,0,0,4.382027,3.806662,1,3,1
750,1,2,0,0,0,5.075174,4.49981,1,2,1
800,1,3,0,0,0,4.867534,4.49981,1,2,0
870,0,3,0,0,0,5.393628,4.812184,1,3,0
893,1,2,1,0,0,4.59512,4.317488,1,3,1
959,0,3,0,1,3,5.010635,4.49981,1,3,0


In [None]:
len(dataTriage_cleaned)

1000

In [None]:
dataTriage.triageScore.value_counts()

triageScore
4    380
3    340
2    190
5     82
1      8
Name: count, dtype: int64

In [None]:
# accuracy score = correct predictions / all predictions
# https://www.evidentlyai.com/classification-metrics/multi-class-metrics

In [None]:
2
3

In [None]:
# created a dataframe for abs(y_test - predictions_mlp) then created a histogram using plotly

import pandas as pd
import plotly.express as px
import numpy as np


# Assuming y_test and predictions_mlp are already defined in the notebook
df_diff = pd.DataFrame({'Absolute Error': abs(y_test - predMLP_G)})

# Create a histogram using Plotly
fig = px.histogram(df_diff, x='Absolute Error', nbins=50, title='Histogram of Absolute Errors for MLP Predictions')

# Adjusting the x-axis range to start from -0.2
fig.update_layout(xaxis_range=[-0.2, max(df_diff['Absolute Error'])+0.2])

# Setting xticks at whole numbers only
fig.update_xaxes(tickmode='array', tickvals=np.arange(int(min(df_diff['Absolute Error'])), int(max(df_diff['Absolute Error'])+1)))



In [None]:
print( np.mean(np.abs(y_test-predMLP_G)) )

0.616


In [None]:
# packaging: deck -----> live coding session (usefulness of AI in triage settings)
# end-to-end treatment: