In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

In [None]:
df = pd.read_csv("/content/healthcare_dataset.csv")

In [None]:
df = df.drop(columns=["Name", "Date of Admission", "Discharge Date", "Doctor", "Hospital"])
X = df.drop("Test Results", axis=1)
y = df["Test Results"]

In [None]:
categorical_cols = X.select_dtypes(include="object").columns.tolist()
numerical_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()

In [None]:
encoders = {col: LabelEncoder().fit(X[col]) for col in categorical_cols}
for col in categorical_cols:
    X[col] = encoders[col].transform(X[col])

In [None]:
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

In [None]:
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, classification_report

# Predict and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, target_names=target_encoder.classes_)

In [None]:
comparison = pd.DataFrame({
    'Actual': target_encoder.inverse_transform(y_test),
    'Predicted': target_encoder.inverse_transform(y_pred)
})

In [None]:
print(comparison.head(10))

         Actual     Predicted
0      Abnormal  Inconclusive
1      Abnormal        Normal
2      Abnormal        Normal
3  Inconclusive      Abnormal
4      Abnormal      Abnormal
5      Abnormal  Inconclusive
6      Abnormal  Inconclusive
7      Abnormal  Inconclusive
8        Normal      Abnormal
9        Normal        Normal


In [None]:
print("Accuracy: {:.2f}%".format(accuracy * 100))

Accuracy: 32.82%


In [None]:
import pickle

In [None]:
with open("healthcare_model.pkl", "wb") as f:
    pickle.dump({
        "model": model,
        "features": X.columns.tolist(),
        "encoders": encoders,
        "scaler": scaler,
        "target_encoder": target_encoder,
        "numerical_cols": numerical_cols,
        "categorical_cols": categorical_cols
    }, f)

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.30.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.10.1 (from gradio)
  Downloading gradio_client-1.10.1-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.

In [None]:
def predict(age, gender, blood_type, condition, insurance, bill, room, adm_type, medication):
    row = pd.DataFrame({
        "Age": [age],
        "Gender": [encoders["Gender"].transform([gender])[0]],
        "Blood Type": [encoders["Blood Type"].transform([blood_type])[0]],
        "Medical Condition": [encoders["Medical Condition"].transform([condition])[0]],
        "Insurance Provider": [encoders["Insurance Provider"].transform([insurance])[0]],
        "Billing Amount": [bill],
        "Room Number": [room],
        "Admission Type": [encoders["Admission Type"].transform([adm_type])[0]],
        "Medication": [encoders["Medication"].transform([medication])[0]]
    })
    row[numerical_cols] = scaler.transform(row[numerical_cols])
    result = model.predict(row)[0]
    return target_encoder.inverse_transform([result])[0]

In [None]:
import gradio as gr

In [None]:
gr.Interface(
    fn=predict,
    inputs=[
        gr.Number(label="Age"),
        gr.Dropdown(choices=encoders["Gender"].classes_.tolist(), label="Gender"),
        gr.Dropdown(choices=encoders["Blood Type"].classes_.tolist(), label="Blood Type"),
        gr.Dropdown(choices=encoders["Medical Condition"].classes_.tolist(), label="Medical Condition"),
        gr.Dropdown(choices=encoders["Insurance Provider"].classes_.tolist(), label="Insurance Provider"),
        gr.Number(label="Billing Amount"),
        gr.Number(label="Room Number"),
        gr.Dropdown(choices=encoders["Admission Type"].classes_.tolist(), label="Admission Type"),
        gr.Dropdown(choices=encoders["Medication"].classes_.tolist(), label="Medication")
    ],
    outputs=gr.Text(label="Predicted Test Result"),
    title="Healthcare Test Result Predictor"
).launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://99cec98c5f8745bd61.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


