In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

In [3]:
df = pd.read_csv("Downloads/triage.csv")
print("Initial shape:", df.shape)

# Convert vitals to numeric
vital_cols = ['temperature', 'heartrate', 'resprate', 'o2sat', 'sbp', 'dbp', 'pain']
for col in vital_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop missing critical values
df.dropna(subset=['acuity', 'chiefcomplaint'], inplace=True)

# Map acuity to labels
acuity_map = {1: 'Critical', 2: 'Moderate', 3: 'Low'}
df['acuity_label'] = df['acuity'].map(acuity_map)

# Keep relevant features
df = df[['chiefcomplaint'] + vital_cols + ['acuity_label']]

# Drop remaining NaNs
df.dropna(inplace=True)

print("After cleaning:", df.shape)
print(df['acuity_label'].value_counts())

Initial shape: (222, 11)
After cleaning: (181, 9)
acuity_label
Low         88
Moderate    87
Critical     6
Name: count, dtype: int64


In [4]:
# Split text and numeric features
X_text = df['chiefcomplaint']
X_numeric = df[vital_cols]
y = df['acuity_label']

# Vectorize chief complaints
tfidf = TfidfVectorizer(stop_words='english', max_features=50)
X_text_tfidf = tfidf.fit_transform(X_text).toarray()

# Combine with numeric
X_combined = np.hstack((X_text_tfidf, X_numeric.values))

In [5]:
# Apply SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_combined, y)

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

In [6]:
# Train AdaBoost with Decision Tree
ada = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=2),
    n_estimators=100,
    learning_rate=1.0,
    random_state=42
)
ada.fit(X_train, y_train)



In [7]:
y_pred = ada.predict(X_test)

print("📈 AdaBoost Classification Report:\n")
print(classification_report(y_test, y_pred))

print("🧾 Confusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))

📈 AdaBoost Classification Report:

              precision    recall  f1-score   support

    Critical       0.94      0.94      0.94        17
         Low       0.69      0.61      0.65        18
    Moderate       0.60      0.67      0.63        18

    accuracy                           0.74        53
   macro avg       0.74      0.74      0.74        53
weighted avg       0.74      0.74      0.74        53

🧾 Confusion Matrix:

[[16  0  1]
 [ 0 11  7]
 [ 1  5 12]]


In [8]:
import pickle

# 💾 Save trained AdaBoost model
with open("adaboost_model.pkl", "wb") as f:
    pickle.dump(ada, f)

# 💾 Save TF-IDF vectorizer used for 'chiefcomplaint'
with open("tfidf_vectorizer.pkl", "wb") as f:
    pickle.dump(tfidf, f)

# 💾 Save the order of vital columns
vital_cols = ['temperature', 'heartrate', 'resprate', 
              'o2sat', 'sbp', 'dbp', 'pain']
with open("vital_columns.pkl", "wb") as f:
    pickle.dump(vital_cols, f)

print("✅ Model, TF-IDF vectorizer, and vital columns saved for Flask deployment.")

✅ Model, TF-IDF vectorizer, and vital columns saved for Flask deployment.
