In [7]:
import pandas as pd
import joblib
import numpy as np # Import numpy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# ======================
# Load dataset
# ======================
df = pd.read_csv("/content/MB-DATA-with-Disease-Type (1).csv")

# ======================
# Features & Target
# ======================
X = df.drop(columns=['PatientID', 'Anemia', 'Anemia_Disease_Type'])
y = df['Anemia_Disease_Type']

# Encode target
le = LabelEncoder()
y = le.fit_transform(y)

# ======================
# Columns
# ======================
numeric_features = ['Age', 'RBC10¬π¬≤-L', 'HGBg-dL', 'HCT%', 'MCVfL', 'MCHpg', 'MCHCg-dL', 'RDW-CV%']
categorical_features = ['Gender']

# ======================
# Preprocessing
# ======================
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
    ]
)

# ======================
# Pipeline
# ======================
model = Pipeline(steps=[
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(random_state=42))
])

# ======================
# Train-test split
# ======================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ======================
# Train
# ======================
model.fit(X_train, y_train)

# ======================
# Evaluate
# ======================
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
# Get unique labels present in the test set
# Use these unique labels to get corresponding target names from the LabelEncoder
actual_target_names = le.inverse_transform(np.unique(y_test))
print(classification_report(y_test, y_pred, target_names=actual_target_names))

from sklearn.metrics import balanced_accuracy_score, f1_score

bal_acc = balanced_accuracy_score(y_test, y_pred)
macro_f1 = f1_score(y_test, y_pred, average='macro')

print("‚öñÔ∏è Balanced Accuracy:", bal_acc)
print("üìê Macro F1-score:", macro_f1)

# ======================
# Save model
# ======================
joblib.dump(model, "anemia_pipeline.pkl")
joblib.dump(le, "label_encoder.pkl")

print("‚úÖ Model & encoder saved successfully")

Accuracy: 0.96
                                                         precision    recall  f1-score   support

              Anemia of Chronic Disease or Renal Anemia       1.00      1.00      1.00         4
Hereditary Spherocytosis or Autoimmune Hemolytic Anemia       1.00      1.00      1.00         6
                                 Iron Deficiency Anemia       1.00      1.00      1.00         5
                                              No Anemia       1.00      1.00      1.00         2
                                        Reticulocytosis       1.00      1.00      1.00         1
                                            Thalassemia       0.86      1.00      0.92         6
                                           Unclassified       0.00      0.00      0.00         1

                                               accuracy                           0.96        25
                                              macro avg       0.84      0.86      0.85        25
             

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.53.1-py3-none-any.whl.metadata (10 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.53.1-py3-none-any.whl (9.1 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m9.1/9.1 MB[0m [31m55.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m6.9/6.9 MB[0m [31m94.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydeck, streamlit
Successfully installed pydeck-0.9.1 streamlit-1.53.1


In [12]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib

# ======================
# Page config
# ======================
st.set_page_config(
    page_title="Anemia Disease Prediction",
    page_icon="ü©∏",
    layout="centered"
)

st.title("ü©∏ Anemia Disease Prediction System")
st.write("Predict anemia type using blood test parameters")

# ======================
# Load model & encoder
# ======================
@st.cache_resource
def load_model():
    model = joblib.load("/content/anemia_pipeline.pkl")
    le = joblib.load("/content/label_encoder.pkl")
    return model, le

model, le = load_model()

# ======================
# Sidebar
# ======================
st.sidebar.header("Input Method")
input_method = st.sidebar.radio(
    "Choose input type:",
    ("Manual Entry", "Upload CSV")
)

# ======================
# Manual Input
# ======================
if input_method == "Manual Entry":
    st.subheader("üß™ Enter Patient Details")

    age = st.number_input("Age", min_value=1, max_value=120, value=30)
    gender = st.selectbox("Gender", ["Male", "Female"])

    rbc = st.number_input("RBC (10¬π¬≤/L)", value=4.5)
    hgb = st.number_input("HGB (g/dL)", value=13.5)
    hct = st.number_input("HCT (%)", value=40.0)
    mcv = st.number_input("MCV (fL)", value=90.0)
    mch = st.number_input("MCH (pg)", value=30.0)
    mchc = st.number_input("MCHC (g/dL)", value=33.0)
    rdw = st.number_input("RDW-CV (%)", value=13.0)

    if st.button("üîç Predict"):
        input_df = pd.DataFrame([{
            'Age': age,
            'Gender': gender,
            'RBC10¬π¬≤-L': rbc,
            'HGBg-dL': hgb,
            'HCT%': hct,
            'MCVfL': mcv,
            'MCHpg': mch,
            'MCHCg-dL': mchc,
            'RDW-CV%': rdw
        }])

        probs = model.predict_proba(input_df)[0]
        pred_idx = np.argmax(probs)

        disease = le.inverse_transform([pred_idx])[0]
        confidence = probs[pred_idx] * 100

        st.success(f"üß¨ Predicted Disease: **{disease}**")
        st.info(f"üìä Confidence: **{confidence:.2f}%**")

        if confidence < 60:
            st.warning("‚ö†Ô∏è Low confidence prediction. Further clinical evaluation is recommended.")

# ======================
# CSV Upload
# ======================
else:
    st.subheader("üìÇ Upload CSV File")
    st.write("CSV must contain the same feature columns used during training.")

    uploaded_file = st.file_uploader("Upload CSV", type=["csv"])

    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)

        predictions = model.predict(df)
        probs = model.predict_proba(df)

        df["Predicted_Disease"] = le.inverse_transform(predictions)
        df["Confidence (%)"] = np.max(probs, axis=1) * 100

        st.success("‚úÖ Prediction completed")
        st.dataframe(df)

        csv = df.to_csv(index=False).encode("utf-8")
        st.download_button(
            label="‚¨áÔ∏è Download Results",
            data=csv,
            file_name="anemia_predictions.csv",
            mime="text/csv"
        )

# ======================
# Footer
# ======================
st.markdown("---")
st.caption("‚öïÔ∏è This tool is for academic & research purposes only.")


2026-02-01 18:05:45.708 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2026-02-01 18:05:45.777 Session state does not function when running a script without `streamlit run`


DeltaGenerator()

In [13]:
!streamlit run app.py


Usage: streamlit run [OPTIONS] [TARGET] [ARGS]...
Try 'streamlit run --help' for help.

Error: Invalid value: File does not exist: app.py
