In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, Masking
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Load Data
file_path = "Sepsis_Merged_Selected_Features_Activity.csv"
df = pd.read_csv(file_path)

df.fillna("None", inplace=True)

# Extract activity sequence columns
activity_columns = [col for col in df.columns if "Activity" in col]
df["Activity_Sequence"] = df[activity_columns].apply(lambda row: " -> ".join(row.values), axis=1)

# Encode final activity
y = df["Final Activity"]
label_encoder = LabelEncoder()
df["Final_Activity_Encoded"] = label_encoder.fit_transform(y)

# Tokenize activity sequences
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(df["Activity_Sequence"])
sequences = tokenizer.texts_to_sequences(df["Activity_Sequence"])

# Pad sequences to uniform length
max_sequence_length = max(map(len, sequences))
X_seq = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

y_seq = tf.keras.utils.to_categorical(df["Final_Activity_Encoded"], num_classes=len(label_encoder.classes_))

# Split data
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42, stratify=df["Final_Activity_Encoded"])

# Define LSTM Model
model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length),
    Masking(mask_value=0.0),
    LSTM(64, return_sequences=False),
    Dense(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

# Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Evaluate Model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Save Model and Tokenizer
model.save("sepsis_lstm_model.h5")
pd.to_pickle(tokenizer, "sepsis_tokenizer.pkl")
pd.to_pickle(label_encoder, "sepsis_label_encoder.pkl")

# Function to Predict Next Activity
def predict_next_activity(activity_sequence):
    sequence = tokenizer.texts_to_sequences([activity_sequence])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    prediction = model.predict(padded_sequence)
    predicted_class = np.argmax(prediction, axis=1)
    return label_encoder.inverse_transform(predicted_class)[0]

# Example Usage
example_sequence = "ER Registration -> ER Triage -> Leucocytes"
predicted_next_activity = predict_next_activity(example_sequence)
print(f"Predicted Next Activity: {predicted_next_activity}")


  df.fillna("None", inplace=True)


Epoch 1/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 66ms/step - accuracy: 0.3411 - loss: 2.5236 - val_accuracy: 0.3895 - val_loss: 1.9246
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 0.3960 - loss: 1.8634 - val_accuracy: 0.4158 - val_loss: 1.6565
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step - accuracy: 0.5790 - loss: 1.6379 - val_accuracy: 0.6737 - val_loss: 1.2077
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 0.7046 - loss: 1.1678 - val_accuracy: 0.7632 - val_loss: 0.9267
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 0.7841 - loss: 0.8278 - val_accuracy: 0.7947 - val_loss: 0.7378
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 54ms/step - accuracy: 0.8035 - loss: 0.6815 - val_accuracy: 0.8000 - val_loss: 0.6120
Epoch 7/10
[1m24/24[0m [32m━━━━



Test Accuracy: 0.88
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 305ms/step
Predicted Next Activity: Leucocytes


In [3]:
print(predict_next_activity("ER Registration -> ER Triage -> Leucocytes"))
print(predict_next_activity("ER Registration -> ER Triage -> Leucocytes -> LacticAcid"))
print(predict_next_activity("ER Registration -> ER Triage -> Leucocytes -> CRP -> IV Antibiotics"))
print(predict_next_activity("ER Registration -> ER Triage -> LacticAcid -> DisfuncOrg"))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 52ms/step
Leucocytes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Leucocytes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
Leucocytes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
Leucocytes


In [5]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Embedding, Masking, Input, Concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load Data
file_path = "Sepsis_Merged_Selected_Features_Activity.csv"
df = pd.read_csv(file_path)

df.fillna("None", inplace=True)

# Extract activity sequence columns
activity_columns = [col for col in df.columns if "Activity" in col]
df["Activity_Sequence"] = df[activity_columns].apply(lambda row: " -> ".join(row.values), axis=1)

# Encode final activity
y = df["Final Activity"]
label_encoder = LabelEncoder()
df["Final_Activity_Encoded"] = label_encoder.fit_transform(y)

# Tokenize activity sequences
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(df["Activity_Sequence"])
sequences = tokenizer.texts_to_sequences(df["Activity_Sequence"])

# Pad sequences to uniform length
max_sequence_length = max(map(len, sequences))
X_seq = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Select numerical features
feature_columns = [
    "DiagnosticArtAstrup", "DiagnosticUrinarySediment", "SIRSCritHeartRate", "SIRSCritTachypnea",
    "SIRSCritTemperature", "Hypotensie", "SIRSCritLeucos", "DiagnosticLacticAcid", "Oligurie",
    "Hypoxie", "DisfuncOrg", "Infusion", "Age", "InfectionSuspected"
]
X_features = df[feature_columns]

# Normalize all numerical features
scaler = StandardScaler()
X_features = scaler.fit_transform(X_features)

y_seq = tf.keras.utils.to_categorical(df["Final_Activity_Encoded"], num_classes=len(label_encoder.classes_))

# Split data
X_train_seq, X_test_seq, X_train_features, X_test_features, y_train, y_test = train_test_split(
    X_seq, X_features, y_seq, test_size=0.2, random_state=42, stratify=df["Final_Activity_Encoded"]
)

# Define LSTM Model
sequence_input = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length)(sequence_input)
masking_layer = Masking(mask_value=0.0)(embedding_layer)
lstm_layer = LSTM(64, return_sequences=False)(masking_layer)

# Define Feature Input Model
feature_input = Input(shape=(len(feature_columns),))
feature_dense = Dense(32, activation='relu')(feature_input)

# Merge Sequence and Feature Inputs
merged = Concatenate()([lstm_layer, feature_dense])
dense_layer = Dense(32, activation='relu')(merged)
output_layer = Dense(len(label_encoder.classes_), activation='softmax')(dense_layer)

# Compile Model
model = Model(inputs=[sequence_input, feature_input], outputs=output_layer)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit([X_train_seq, X_train_features], y_train, validation_data=([X_test_seq, X_test_features], y_test), epochs=10, batch_size=32)

# Evaluate Model
test_loss, test_accuracy = model.evaluate([X_test_seq, X_test_features], y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Save Model and Tokenizer
model.save("sepsis_lstm_model.h5")
pd.to_pickle(tokenizer, "sepsis_tokenizer.pkl")
pd.to_pickle(label_encoder, "sepsis_label_encoder.pkl")
pd.to_pickle(scaler, "sepsis_scaler.pkl")

# Function to Predict Next Activity
def predict_next_activity(activity_sequence, feature_values):
    sequence = tokenizer.texts_to_sequences([activity_sequence])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    
    feature_array = np.array(feature_values).reshape(1, -1)
    feature_array = scaler.transform(feature_array)
    
    prediction = model.predict([padded_sequence, feature_array])
    predicted_class = np.argmax(prediction, axis=1)
    return label_encoder.inverse_transform(predicted_class)[0]

# Example Usage
example_sequence = "ER Registration -> ER Triage -> Leucocytes"
example_features = [1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 50, 1]  # Example feature values
predicted_next_activity = predict_next_activity(example_sequence, example_features)
print(f"Predicted Next Activity: {predicted_next_activity}")

  df.fillna("None", inplace=True)


Epoch 1/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 61ms/step - accuracy: 0.2517 - loss: 2.4046 - val_accuracy: 0.4421 - val_loss: 1.8261
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - accuracy: 0.4362 - loss: 1.7558 - val_accuracy: 0.5316 - val_loss: 1.6171
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.5686 - loss: 1.5285 - val_accuracy: 0.6579 - val_loss: 1.1732
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.6931 - loss: 1.0383 - val_accuracy: 0.7684 - val_loss: 0.8464
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.7986 - loss: 0.7509 - val_accuracy: 0.7737 - val_loss: 0.7147
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.8104 - loss: 0.6291 - val_accuracy: 0.8105 - val_loss: 0.6103
Epoch 7/10
[1m24/24[0m [32m━━━━



Test Accuracy: 0.91




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 286ms/step
Predicted Next Activity: IV Antibiotics


## BioMakers


In [34]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LSTM, Dense, Embedding, Masking, Input, Concatenate
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

# Load Data
file_path = "Sepsis_Merged_Selected_Features_Activity.csv"
df = pd.read_csv(file_path)
df_biomarkers = pd.read_csv("Sepsis_Biomarkers_Next_Activity.csv")

df.fillna("None", inplace=True)

# Extract activity sequence columns
activity_columns = [col for col in df.columns if "Activity" in col]
df["Activity_Sequence"] = df[activity_columns].apply(lambda row: " -> ".join(row.values), axis=1)

# Encode final activity
y = df["Final Activity"]
label_encoder = LabelEncoder()
df["Final_Activity_Encoded"] = label_encoder.fit_transform(y)

# Tokenize activity sequences
tokenizer = Tokenizer(filters='')
tokenizer.fit_on_texts(df["Activity_Sequence"])
sequences = tokenizer.texts_to_sequences(df["Activity_Sequence"])

# Pad sequences to uniform length
max_sequence_length = max(map(len, sequences))
X_seq = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Select numerical features
feature_columns = [
    "DiagnosticArtAstrup", "DiagnosticUrinarySediment", "SIRSCritHeartRate", "SIRSCritTachypnea",
    "SIRSCritTemperature", "Hypotensie", "SIRSCritLeucos", "DiagnosticLacticAcid", "Oligurie",
    "Hypoxie", "DisfuncOrg", "Infusion", "Age", "InfectionSuspected"
]
X_features = df[feature_columns]

# Normalize all numerical features
scaler = StandardScaler()
X_features = pd.DataFrame(scaler.fit_transform(X_features), columns=feature_columns)

y_seq = tf.keras.utils.to_categorical(df["Final_Activity_Encoded"], num_classes=len(label_encoder.classes_))

# Split data
X_train_seq, X_test_seq, X_train_features, X_test_features, y_train, y_test = train_test_split(
    X_seq, X_features, y_seq, test_size=0.2, random_state=42, stratify=df["Final_Activity_Encoded"]
)

# Define LSTM Model
sequence_input = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=64, input_length=max_sequence_length)(sequence_input)
masking_layer = Masking(mask_value=0.0)(embedding_layer)
lstm_layer = LSTM(64, return_sequences=False)(masking_layer)

# Define Feature Input Model
feature_input = Input(shape=(len(feature_columns),))
feature_dense = Dense(32, activation='relu')(feature_input)

# Merge Sequence and Feature Inputs
merged = Concatenate()([lstm_layer, feature_dense])
dense_layer = Dense(32, activation='relu')(merged)
output_layer = Dense(len(label_encoder.classes_), activation='softmax')(dense_layer)

# Compile Model
model = Model(inputs=[sequence_input, feature_input], outputs=output_layer)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit([X_train_seq, X_train_features], y_train, validation_data=([X_test_seq, X_test_features], y_test), epochs=10, batch_size=32)

# Evaluate Model
test_loss, test_accuracy = model.evaluate([X_test_seq, X_test_features], y_test)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Save Model and Tokenizer
model.save("sepsis_lstm_model.h5")
pd.to_pickle(tokenizer, "sepsis_tokenizer.pkl")
pd.to_pickle(label_encoder, "sepsis_label_encoder.pkl")
pd.to_pickle(scaler, "sepsis_scaler.pkl")

# Biomarker-Based Activity Mapping (Prioritized Decision Making)
biomarker_priority = ["LacticAcid", "CRP", "Leucocytes"]
biomarker_next_activity_mapping = {
    "Leucocytes": {"High": "LacticAcid", "Elevated": "CRP", "Normal": "ER Triage"},
    "CRP": {"Severe": "IV Antibiotics", "Moderate": "LacticAcid", "Low": "ER Triage"},
    "LacticAcid": {"Critical": "ICU Admission", "High": "IV Fluid", "Normal": "ER Triage"}
}

# Function to Predict Next Activity with Priority-Based Biomarker Handling
def predict_next_activity(activity_sequence, feature_values, biomarker_values):
    sequence = tokenizer.texts_to_sequences([activity_sequence])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')
    
    feature_array = np.array(feature_values).reshape(1, -1)
    feature_array = scaler.transform(pd.DataFrame(feature_array, columns=feature_columns))
    
    for biomarker in biomarker_priority:
        if biomarker in biomarker_values:
            biomarker_value = biomarker_values[biomarker]
            if biomarker_value in biomarker_next_activity_mapping[biomarker]:
                return biomarker_next_activity_mapping[biomarker][biomarker_value]
    
    model_prediction = model.predict([padded_sequence, feature_array])
    predicted_class = np.argmax(model_prediction, axis=1)
    return label_encoder.inverse_transform(predicted_class)[0]

# Example Usage
example_sequence = "ER Registration -> ER Triage -> Leucocytes"
example_features = [1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 50, 1]
biomarker_values = {"Leucocytes": "High", "CRP": "Severe", "LacticAcid": "Normal"}
predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Predicted Next Activity: {predicted_next_activity}")


Epoch 1/10


  df.fillna("None", inplace=True)


[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 59ms/step - accuracy: 0.2594 - loss: 2.3741 - val_accuracy: 0.3947 - val_loss: 1.8743
Epoch 2/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.4075 - loss: 1.8071 - val_accuracy: 0.5474 - val_loss: 1.5461
Epoch 3/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.6455 - loss: 1.3727 - val_accuracy: 0.7789 - val_loss: 0.9927
Epoch 4/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.7764 - loss: 0.9117 - val_accuracy: 0.7842 - val_loss: 0.8457
Epoch 5/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.8136 - loss: 0.6791 - val_accuracy: 0.8158 - val_loss: 0.6817
Epoch 6/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.8303 - loss: 0.6218 - val_accuracy: 0.8158 - val_loss: 0.6054
Epoch 7/10
[1m24/24[0m [32m━━━━━━━━━━━━━━━




Predicted Next Activity: ER Triage


In [35]:
example_sequence = "ER Registration -> ER Triage -> Leucocytes -> CRP -> LacticAcid"
example_features = [1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 50, 1]
biomarker_values = {"Leucocytes": "High", "CRP": "Moderate", "LacticAcid": "Critical"}

predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Predicted Next Activity: {predicted_next_activity}")


Predicted Next Activity: ICU Admission


In [36]:
example_sequence = "ER Registration -> ER Triage -> Leucocytes -> CRP -> LacticAcid"
example_features = [1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 45, 1]
biomarker_values = {"Leucocytes": "Normal", "CRP": "Low", "LacticAcid": "Normal"}

predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Test 1 - Predicted Next Activity: {predicted_next_activity}")


Test 1 - Predicted Next Activity: ER Triage


In [37]:
example_sequence = "ER Registration -> ER Triage -> Leucocytes -> CRP"
example_features = [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 55, 1]
biomarker_values = {"Leucocytes": "Elevated", "CRP": "Moderate"}

predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Test 2 - Predicted Next Activity: {predicted_next_activity}")


Test 2 - Predicted Next Activity: LacticAcid


In [38]:
example_sequence = "ER Registration -> ER Triage -> LacticAcid"
example_features = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60, 1]
biomarker_values = {"LacticAcid": "Critical"}

predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Test 3 - Predicted Next Activity: {predicted_next_activity}")


Test 3 - Predicted Next Activity: ICU Admission


In [39]:
example_sequence = "ER Registration -> ER Triage -> CRP"
example_features = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 65, 1]
biomarker_values = {"CRP": "Severe"}

predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Test 4 - Predicted Next Activity: {predicted_next_activity}")


Test 4 - Predicted Next Activity: IV Antibiotics


In [40]:
example_sequence = "ER Registration -> ER Triage -> Leucocytes -> CRP -> LacticAcid"
example_features = [1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 50, 1]
biomarker_values = {"Leucocytes": "Elevated", "CRP": "Severe", "LacticAcid": "High"}

predicted_next_activity = predict_next_activity(example_sequence, example_features, biomarker_values)
print(f"Test 5 - Predicted Next Activity: {predicted_next_activity}")


Test 5 - Predicted Next Activity: IV Fluid
