In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [3]:
df = pd.read_csv("/content/healthcare_dataset.csv")

In [4]:
df

Unnamed: 0,Name,Age,Gender,Blood Type,Medical Condition,Date of Admission,Doctor,Hospital,Insurance Provider,Billing Amount,Room Number,Admission Type,Discharge Date,Medication,Test Results
0,Bobby JacksOn,30,Male,B-,Cancer,2024-01-31,Matthew Smith,Sons and Miller,Blue Cross,18856.281306,328,Urgent,2024-02-02,Paracetamol,Normal
1,LesLie TErRy,62,Male,A+,Obesity,2019-08-20,Samantha Davies,Kim Inc,Medicare,33643.327287,265,Emergency,2019-08-26,Ibuprofen,Inconclusive
2,DaNnY sMitH,76,Female,A-,Obesity,2022-09-22,Tiffany Mitchell,Cook PLC,Aetna,27955.096079,205,Emergency,2022-10-07,Aspirin,Normal
3,andrEw waTtS,28,Female,O+,Diabetes,2020-11-18,Kevin Wells,"Hernandez Rogers and Vang,",Medicare,37909.782410,450,Elective,2020-12-18,Ibuprofen,Abnormal
4,adrIENNE bEll,43,Female,AB+,Cancer,2022-09-19,Kathleen Hanna,White-White,Aetna,14238.317814,458,Urgent,2022-10-09,Penicillin,Abnormal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55495,eLIZABeTH jaCkSOn,42,Female,O+,Asthma,2020-08-16,Joshua Jarvis,Jones-Thompson,Blue Cross,2650.714952,417,Elective,2020-09-15,Penicillin,Abnormal
55496,KYle pEREz,61,Female,AB-,Obesity,2020-01-23,Taylor Sullivan,Tucker-Moyer,Cigna,31457.797307,316,Elective,2020-02-01,Aspirin,Normal
55497,HEATher WaNG,38,Female,B+,Hypertension,2020-07-13,Joe Jacobs DVM,"and Mahoney Johnson Vasquez,",UnitedHealthcare,27620.764717,347,Urgent,2020-08-10,Ibuprofen,Abnormal
55498,JENniFER JOneS,43,Male,O-,Arthritis,2019-05-25,Kimberly Curry,"Jackson Todd and Castro,",Medicare,32451.092358,321,Elective,2019-05-31,Ibuprofen,Abnormal


In [5]:
df = df.drop(columns=["Name", "Date of Admission", "Discharge Date", "Doctor", "Hospital"])
X = df.drop("Test Results", axis=1)
y = df["Test Results"]

In [6]:
y

Unnamed: 0,Test Results
0,Normal
1,Inconclusive
2,Normal
3,Abnormal
4,Abnormal
...,...
55495,Abnormal
55496,Normal
55497,Abnormal
55498,Abnormal


In [17]:
categorical_cols = X.select_dtypes(include="object").columns.tolist()
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

In [10]:
encoders = {col: LabelEncoder().fit(X[col]) for col in categorical_cols}
for col in categorical_cols:
    X[col] = encoders[col].transform(X[col])

In [11]:
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

In [12]:
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [14]:
X_test

Unnamed: 0,Age,Gender,Blood Type,Medical Condition,Insurance Provider,Billing Amount,Room Number,Admission Type,Medication
31641,0.278567,1,6,3,3,-1.542602,0.328571,0,0
9246,-0.027520,0,0,3,0,0.804341,0.614925,1,4
1583,-1.608969,1,0,1,1,1.326666,-1.328811,0,4
36506,1.145813,0,4,5,0,0.141831,-1.441617,1,1
11259,0.227552,1,3,5,0,0.135596,-0.148686,1,3
...,...,...,...,...,...,...,...,...,...
32183,-0.894766,1,7,0,4,-1.209451,0.450054,0,3
17967,-0.282593,1,2,1,4,-1.404754,1.708276,0,3
46192,1.145813,0,4,5,4,0.922846,1.178955,1,2
43968,0.176538,0,3,2,0,-1.425671,-1.016425,0,1


In [15]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [18]:
from sklearn.metrics import accuracy_score, classification_report

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=target_encoder.classes_)

In [19]:
comparison = pd.DataFrame({
    'Actual': target_encoder.inverse_transform(y_test),
    'Predicted': target_encoder.inverse_transform(y_pred)
})

In [20]:
print(comparison.head(10))

         Actual     Predicted
0  Inconclusive      Abnormal
1      Abnormal  Inconclusive
2  Inconclusive      Abnormal
3      Abnormal        Normal
4  Inconclusive  Inconclusive
5      Abnormal  Inconclusive
6  Inconclusive      Abnormal
7      Abnormal  Inconclusive
8  Inconclusive        Normal
9        Normal  Inconclusive


In [21]:
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 43.33%


In [22]:
import pickle

In [23]:
with open("healthcare_model.pkl", "wb") as f:
    pickle.dump({
        "model": model,
        "features": X.columns.tolist(),
        "encoders": encoders,
        "scaler": scaler,
        "target_encoder": target_encoder,
        "numerical_cols": numerical_cols,
        "categorical_cols": categorical_cols
    }, f)

In [24]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [25]:
def predict(age, gender, blood_type, condition, insurance, bill, room, adm_type, medication):
    row = pd.DataFrame({
        "Age": [age],
        "Gender": [encoders["Gender"].transform([gender])[0]],
        "Blood Type": [encoders["Blood Type"].transform([blood_type])[0]],
        "Medical Condition": [encoders["Medical Condition"].transform([condition])[0]],
        "Insurance Provider": [encoders["Insurance Provider"].transform([insurance])[0]],
        "Billing Amount": [bill],
        "Room Number": [room],
        "Admission Type": [encoders["Admission Type"].transform([adm_type])[0]],
        "Medication": [encoders["Medication"].transform([medication])[0]]
    })
    row[numerical_cols] = scaler.transform(row[numerical_cols])
    result = model.predict(row)[0]
    return target_encoder.inverse_transform([result])[0]

In [26]:
import gradio as gr

In [27]:
gr.Interface(
    fn=predict,
    inputs=[
        gr.Number(label="Age"),
        gr.Dropdown(choices=encoders["Gender"].classes_.tolist(), label="Gender"),
        gr.Dropdown(choices=encoders["Blood Type"].classes_.tolist(), label="Blood Type"),
        gr.Dropdown(choices=encoders["Medical Condition"].classes_.tolist(), label="Medical Condition"),
        gr.Dropdown(choices=encoders["Insurance Provider"].classes_.tolist(), label="Insurance Provider"),
        gr.Number(label="Billing Amount"),
        gr.Number(label="Room Number"),
        gr.Dropdown(choices=encoders["Admission Type"].classes_.tolist(), label="Admission Type"),
        gr.Dropdown(choices=encoders["Medication"].classes_.tolist(), label="Medication")
    ],
    outputs=gr.Text(label="Predicted Test Result"),
    title="Healthcare Test Result Predictor"
).launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://40935b7c00244ef11e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


