In [7]:
import joblib

model = joblib.load("disease_model.pkl")
tfidf = joblib.load("tfidf_vectorizer.pkl")
label_encoders = joblib.load("label_encoders.pkl")
disease_encoder = joblib.load("disease_encoder.pkl")

print("Model and tools loaded.")


Model and tools loaded.


In [None]:

import numpy as np

def predict_disease(symptom_text, age, sex_text, top_n=5):
   
    symptom_vec = tfidf.transform([symptom_text])

    age_array = np.array([[age]])
    sex_encoded = label_encoders['Sex'].transform([sex_text])
    sex_array = np.array(sex_encoded).reshape(-1, 1)

    from . import hstack
    full_input = hstack([symptom_vec, age_array, sex_array])

    probas = model.predict_proba(full_input)[0]

    disease_names = disease_encoder.inverse_transform(np.arange(len(probas)))

    results = list(zip(disease_names, probas))
    results.sort(key=lambda x: x[1], reverse=True)

    print("\nTop disease probabilities:")
    for disease, prob in results[:top_n]:
        print(f"{disease}: {prob*100:.2f}%")


In [9]:
predict_disease("shortness of breath and coughing", 30, "female")


Top disease probabilities:
Asthma: 47.00%
Bronchiectasis: 18.00%
Asbestosis: 14.00%
Acute Respiratory Distress Syndrome: 8.00%
Respiratory syncytial virus: 8.00%
