In [10]:
import pandas as pd
import random

# Generate synthetic dataset
def generate_synthetic_triage_data(n=50):
    genders = ["Male", "Female"]
    histories = ["None", "Hypertension", "Diabetes", "Asthma", "Heart Disease", "Anxiety Disorder", "Tuberculosis", "Epilepsy"]
    urgencies = ["Low", "Moderate", "High"]
    symptoms_list = {
        "Low": [
            "Mild sore throat and runny nose",
            "Seasonal allergies with sneezing",
            "Minor ankle sprain",
            "Low-grade fever and fatigue",
            "Itchy skin rash on arms"
        ],
        "Moderate": [
            "Vomiting and abdominal cramps",
            "Persistent cough and low-grade fever",
            "Dizziness and blurred vision",
            "Migraine with light sensitivity",
            "Chest tightness during exertion"
        ],
        "High": [
            "Severe chest pain and shortness of breath",
            "Seizure and confusion",
            "Bleeding from head wound after fall",
            "Unconscious after accident",
            "High fever with breathing difficulty"
        ]
    }

    data = []

    for i in range(n):
        urgency = random.choice(urgencies)
        symptoms = random.choice(symptoms_list[urgency])
        age = random.randint(18, 85)
        gender = random.choice(genders)
        history = random.choice(histories)

        # Simulate vitals based on urgency
        if urgency == "High":
            hr = random.randint(105, 140)
            bp = f"{random.randint(140, 180)}/{random.randint(90, 110)}"
            temp = round(random.uniform(38.0, 40.5), 1)
            spo2 = random.randint(85, 93)
        elif urgency == "Moderate":
            hr = random.randint(90, 110)
            bp = f"{random.randint(125, 140)}/{random.randint(80, 95)}"
            temp = round(random.uniform(37.3, 38.0), 1)
            spo2 = random.randint(93, 96)
        else:
            hr = random.randint(70, 90)
            bp = f"{random.randint(110, 125)}/{random.randint(70, 85)}"
            temp = round(random.uniform(36.5, 37.2), 1)
            spo2 = random.randint(96, 100)

        data.append({
            "Symptoms": symptoms,
            "Age": age,
            "Gender": gender,
            "Past History": history,
            "Heart Rate": hr,
            "BP": bp,
            "Temp (°C)": temp,
            "SpO2 (%)": spo2,
            "Urgency": urgency
        })

    return pd.DataFrame(data)


df_synthetic_triage = generate_synthetic_triage_data()
#df_synthetic_triage.head()
df_synthetic_triage.shape


(50, 9)

In [1]:
import pandas as pd
import random

# Generate synthetic triage data (extended version for 1000 records)
def generate_synthetic_triage_data(n=1000):
    genders = ["Male", "Female"]
    histories = ["None", "Hypertension", "Diabetes", "Asthma", "Heart Disease", "Anxiety Disorder", "Tuberculosis", "Epilepsy"]
    urgencies = ["Low", "Moderate", "High"]
    symptoms_list = {
        "Low": [
            "Mild sore throat and runny nose",
            "Seasonal allergies with sneezing",
            "Minor ankle sprain",
            "Low-grade fever and fatigue",
            "Itchy skin rash on arms",
            "Mild headache and nausea",
            "Dry cough and tiredness",
            "Sneezing and watery eyes"
        ],
        "Moderate": [
            "Vomiting and abdominal cramps",
            "Persistent cough and low-grade fever",
            "Dizziness and blurred vision",
            "Migraine with light sensitivity",
            "Chest tightness during exertion",
            "Swollen legs and mild breathing difficulty",
            "High temperature with body pain",
            "Ongoing diarrhea and weakness"
        ],
        "High": [
            "Severe chest pain and shortness of breath",
            "Seizure and confusion",
            "Bleeding from head wound after fall",
            "Unconscious after accident",
            "High fever with breathing difficulty",
            "Severe trauma and open wound",
            "Loss of consciousness with vomiting",
            "Sudden paralysis and slurred speech"
        ]
    }

    data = []

    for _ in range(n):
        urgency = random.choice(urgencies)
        symptoms = random.choice(symptoms_list[urgency])
        age = random.randint(18, 90)
        gender = random.choice(genders)
        history = random.choice(histories)

        # Simulate vitals based on urgency
        if urgency == "High":
            hr = random.randint(105, 140)
            bp = f"{random.randint(140, 180)}/{random.randint(90, 110)}"
            temp = round(random.uniform(38.0, 40.5), 1)
            spo2 = random.randint(85, 93)
        elif urgency == "Moderate":
            hr = random.randint(90, 110)
            bp = f"{random.randint(125, 140)}/{random.randint(80, 95)}"
            temp = round(random.uniform(37.3, 38.0), 1)
            spo2 = random.randint(93, 96)
        else:
            hr = random.randint(70, 90)
            bp = f"{random.randint(110, 125)}/{random.randint(70, 85)}"
            temp = round(random.uniform(36.5, 37.2), 1)
            spo2 = random.randint(96, 100)

        data.append({
            "Symptoms": symptoms,
            "Age": age,
            "Gender": gender,
            "Past History": history,
            "Heart Rate": hr,
            "BP": bp,
            "Temp (°C)": temp,
            "SpO2 (%)": spo2,
            "Urgency": urgency
        })

    return pd.DataFrame(data)

# Generate 1000 synthetic records
df_1000 = generate_synthetic_triage_data(1000)
df_1000.head()


Unnamed: 0,Symptoms,Age,Gender,Past History,Heart Rate,BP,Temp (°C),SpO2 (%),Urgency
0,Unconscious after accident,67,Female,Anxiety Disorder,108,148/102,40.0,89,High
1,Vomiting and abdominal cramps,27,Male,Heart Disease,94,135/94,37.3,95,Moderate
2,Itchy skin rash on arms,58,Male,Epilepsy,71,116/83,36.6,100,Low
3,Swollen legs and mild breathing difficulty,30,Female,,92,138/84,37.6,96,Moderate
4,Dry cough and tiredness,22,Female,Epilepsy,76,125/71,37.1,100,Low


In [2]:
df_1000.to_csv("triage_synthetic_1000.csv", index=False)

In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.metrics import classification_report
from transformers import AutoTokenizer, AutoModel


df = pd.read_csv("triage_synthetic_1000.csv")


label_enc = LabelEncoder()
df["label"] = label_enc.fit_transform(df["Urgency"])  # Low, Moderate, High → 0, 1, 2

ohe = OneHotEncoder()
cat_features = ohe.fit_transform(df[["Gender", "Past History"]]).toarray()

numeric = df[["Age", "Heart Rate", "Temp (°C)", "SpO2 (%)"]].values
X_tabular = np.hstack([numeric, cat_features]).astype(np.float32)
y = df["label"].values

tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
encodings = tokenizer(list(df["Symptoms"]), padding=True, truncation=True,max_length=128, return_tensors="pt")

train_idx, test_idx = train_test_split(np.arange(len(df)), test_size=0.2, stratify=y, random_state=42)


X_tabular_tensor = torch.tensor(X_tabular)
y_tensor = torch.tensor(y)
input_ids = encodings["input_ids"]
attention_mask = encodings["attention_mask"]


class HybridClinicalModel(nn.Module):
    def __init__(self, tabular_dim, n_classes):
        super().__init__()
        self.bert = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
        self.tabular_mlp = nn.Sequential(
            nn.Linear(tabular_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2)
        )
        self.classifier = nn.Sequential(
            nn.Linear(768 + 32, 64),
            nn.ReLU(),
            nn.Linear(64, n_classes)
        )

    def forward(self, input_ids, attention_mask, tabular_input):
        bert_out = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        text_feat = bert_out.last_hidden_state.mean(dim=1)
        tab_feat = self.tabular_mlp(tabular_input)
        combined = torch.cat((text_feat, tab_feat), dim=1)
        return self.classifier(combined)


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HybridClinicalModel(tabular_dim=X_tabular.shape[1], n_classes=3).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
loss_fn = nn.CrossEntropyLoss()


EPOCHS = 4
for epoch in range(EPOCHS):
    model.train()
    optimizer.zero_grad()
    
    output = model(
        input_ids[train_idx].to(device),
        attention_mask[train_idx].to(device),
        X_tabular_tensor[train_idx].to(device)
    )
    loss = loss_fn(output, y_tensor[train_idx].to(device))
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}/{EPOCHS} — Loss: {loss.item():.4f}")


model.eval()
with torch.no_grad():
    preds = model(
        input_ids[test_idx].to(device),
        attention_mask[test_idx].to(device),
        X_tabular_tensor[test_idx].to(device)
    )
    y_pred = torch.argmax(preds, dim=1).cpu()
    print("\n📊 Classification Report:")
    print(classification_report(y_tensor[test_idx], y_pred, target_names=label_enc.classes_))


Epoch 1/4 — Loss: 1.2968
Epoch 2/4 — Loss: 1.2519
Epoch 3/4 — Loss: 1.2026
Epoch 4/4 — Loss: 1.1787

📊 Classification Report:
              precision    recall  f1-score   support

        High       0.93      0.61      0.74        67
         Low       0.00      0.00      0.00        68
    Moderate       0.42      1.00      0.59        65

    accuracy                           0.53       200
   macro avg       0.45      0.54      0.44       200
weighted avg       0.45      0.53      0.44       200



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [4]:
import streamlit as st

In [4]:
import joblib
torch.save(model.state_dict(), "triage_model1.pt")
joblib.dump(label_enc, "label_encoder1.pkl")
joblib.dump(ohe, "ohe_encoder1.pkl")


['ohe_encoder1.pkl']