In [None]:
!pip install catboost
!pip install xgboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl.metadata (1.5 kB)
Collecting graphviz (from catboost)
  Downloading graphviz-0.20.3-py3-none-any.whl.metadata (12 kB)
Downloading catboost-1.2.8-cp312-cp312-win_amd64.whl (102.4 MB)
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/102.4 MB ? eta -:--:--
   ---------------------------------------- 0.0/102.4 MB 393.8 kB/s eta 0:04:20
   ---------------------------------------- 0.1/102.4 MB 901.1 kB/s eta 0:01:54
   ---------------------------------------- 0.3/102.4 MB 1.7 MB/s eta 0:00:59
    --------------------------------------- 1.5/102.4 MB 5.9 MB/s eta 0:00:17
    --------------------------------------- 2.4/102.4 MB 7.9 MB/s eta 0:00:13
   - -------------------------------------- 3.0/102.4 MB 8.8 MB/s eta 0:00:12
   - -------------------------------------- 3.0/102.4 MB 8

In [None]:
# Core packages
import pandas as pd
import numpy as np
import os
import pickle

# Preprocessing and transformation
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Model selection and evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Model
from xgboost import XGBClassifier

In [None]:
# Define the file path
file_path = "df_balanced.csv"

# Check if the file exists before reading
if os.path.exists(file_path):
    df_balanced = pd.read_csv(file_path)
    print("File successfully loaded.")
else:
    raise FileNotFoundError(f"File not found: {file_path}")

File successfully loaded.


In [None]:
# Define features and target
features = [
    "HourOfObservation", "PatientAge", "ICULengthOfStay", "PatientGender",
    "TimeSinceHospitalAdmission", "HeartRate", "MeanArterialPressure",
    "OxygenSaturation", "SystolicBloodPressure", "RespiratoryRate", "DiastolicBloodPressure",
    "Temperature", "WhiteBloodCellCount", "CreatinineLevel", "TotalBilirubin",
    "PlateletCount", "LactateLevel"
]
target = 'SepsisIndicator'

X = df_balanced[features]
y = df_balanced[target]

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

In [None]:
# Preprocessing pipelines
numeric_features = [
    "HourOfObservation", "PatientAge", "ICULengthOfStay",
    "TimeSinceHospitalAdmission", "HeartRate", "MeanArterialPressure",
    "OxygenSaturation", "SystolicBloodPressure", "RespiratoryRate", "DiastolicBloodPressure",
    "Temperature", "WhiteBloodCellCount", "CreatinineLevel", "TotalBilirubin",
    "PlateletCount", "LactateLevel"
]
categorical_features = ["PatientGender"]

numeric_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

categorical_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numeric_features),
    ('cat', categorical_pipeline, categorical_features)
])

# Model pipeline
pipeline = Pipeline([
    ('preprocess', preprocessor),
    ('classifier', XGBClassifier(eval_metric='logloss', use_label_encoder=False, random_state=42))
])

# Train model
pipeline.fit(X_train, y_train)

# Print model performance
y_pred = pipeline.predict(X_test)
print(classification_report(y_test, y_pred))

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


              precision    recall  f1-score   support

           0       0.92      0.85      0.88      1998
           1       0.86      0.92      0.89      1998

    accuracy                           0.89      3996
   macro avg       0.89      0.89      0.88      3996
weighted avg       0.89      0.89      0.88      3996



In [None]:
# Save the trained model
with open("sepsis_model.pkl", "wb") as f:
    pickle.dump(pipeline, f)
print("Model saved as sepsis_model.pkl")

Model saved as sepsis_model.pkl


In [None]:
with open('sepsis_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)