In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Manually created dataset based on app.py
data = [
    ["Flu", ["fever", "cough", "body ache", "fatigue", "headache", "chills", "sore throat", "congestion"], 
     ["Rest well", "Drink fluids", "Take fever medication"], ["Paracetamol", "Ibuprofen"]],
    
    ["Covid-19", ["fever", "cough", "fatigue", "shortness of breath", "loss of taste", "loss of smell", "body ache", "sore throat", "headache"], 
     ["Isolate yourself", "Monitor oxygen levels", "Drink warm fluids"], ["Paracetamol", "Cough syrup"]],
    
    ["Viral Infection", ["fatigue", "fever", "headache", "body ache", "cough", "sore throat", "chills", "runny nose"], 
     ["Stay hydrated", "Take pain relievers", "Get enough rest"], ["Acetaminophen", "Ibuprofen"]],
    
    ["Allergy", ["sneezing", "runny nose", "itchy eyes", "skin rash", "cough", "congestion", "wheezing"], 
     ["Avoid allergens", "Take antihistamines", "Use a nasal spray"], ["Antihistamines", "Nasal spray"]],
    
    ["Stomach Bug", ["nausea", "vomiting", "diarrhea", "abdominal pain", "fever", "fatigue"], 
     ["Drink electrolytes", "Eat bland foods", "Avoid dairy"], ["ORS solution", "Loperamide"]],
    
    ["Migraine", ["headache", "nausea", "sensitivity to light", "sensitivity to sound", "fatigue", "vision changes"], 
     ["Rest in a dark room", "Take pain relievers", "Avoid triggers"], ["Pain relievers", "Anti-nausea medication"]],
    
    ["Sinus Infection", ["headache", "facial pain", "congestion", "runny nose", "sore throat", "fatigue"], 
     ["Use steam inhalation", "Drink warm fluids", "Take decongestants"], ["Decongestants", "Nasal steroids"]],
    
    ["Common Cold", ["sneezing", "runny nose", "cough", "sore throat", "mild fever", "fatigue"], 
     ["Drink warm tea", "Use a humidifier", "Get plenty of rest"], ["Cough syrup", "Vitamin C"]],
    
    ["Bronchitis", ["cough", "chest discomfort", "fatigue", "shortness of breath", "fever", "sore throat"], 
     ["Use a humidifier", "Drink warm liquids", "Avoid smoking"], ["Cough suppressants", "Bronchodilators"]],
    
    ["Pneumonia", ["fever", "cough", "shortness of breath", "chest pain", "fatigue", "body ache"], 
     ["Seek medical attention", "Take antibiotics (if bacterial)", "Get plenty of rest"], ["Antibiotics", "Cough medicine"]]
]

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Disease", "Symptoms", "Precautions", "Medicines"])

# Encode symptoms using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
X = mlb.fit_transform(df["Symptoms"])
y = df["Disease"]

# Split data into training & testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a machine learning model (Random Forest Classifier)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Print detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred,zero_division=1))

# Ask user for symptoms
user_symptoms = input("\nEnter your symptoms (comma-separated): ").lower().split(",")

# Convert input into the same format as training data
user_symptoms = [symptom.strip() for symptom in user_symptoms if symptom.strip() in mlb.classes_]
user_input_vector = mlb.transform([user_symptoms])

# Predict probability for each disease
predicted_probs = model.predict_proba(user_input_vector)[0]
disease_probabilities = {disease: prob for disease, prob in zip(model.classes_, predicted_probs)}

# Filter diseases that have **any non-zero probability**
possible_diseases = [(disease, prob) for disease, prob in disease_probabilities.items() if prob > 0]

# Sort diseases by probability (highest first)
possible_diseases.sort(key=lambda x: x[1], reverse=True)

if not possible_diseases:
    print("\n❌ No matching disease found. Please consult a doctor.")
else:
    print("\n🔍 Possible diseases based on your symptoms:")
    for disease, prob in possible_diseases:
        print(f" - {disease} ({prob*100:.2f}% probability)")
        
        # Fetch precautions & medicines
        precautions = df[df["Disease"] == disease]["Precautions"].values[0]
        medicines = df[df["Disease"] == disease]["Medicines"].values[0]
        
        print(f"   🛑 Precautions: {', '.join(precautions)}")
        print(f"   💊 Suggested Medicines: {', '.join(medicines)}\n")

# Display model accuracy in response
print(f"\n(Note: The model has an accuracy of {accuracy * 100:.2f}%)")


Model Accuracy: 0.00%

Classification Report:
                 precision    recall  f1-score   support

     Bronchitis       1.00      0.00      0.00       1.0
       Covid-19       1.00      0.00      0.00       1.0
      Pneumonia       0.00      1.00      0.00       0.0
Viral Infection       0.00      1.00      0.00       0.0

       accuracy                           0.00       2.0
      macro avg       0.50      0.50      0.00       2.0
   weighted avg       1.00      0.00      0.00       2.0


🔍 Possible diseases based on your symptoms:
 - Stomach Bug (23.00% probability)
   🛑 Precautions: Drink electrolytes, Eat bland foods, Avoid dairy
   💊 Suggested Medicines: ORS solution, Loperamide

 - Pneumonia (18.00% probability)
   🛑 Precautions: Seek medical attention, Take antibiotics (if bacterial), Get plenty of rest
   💊 Suggested Medicines: Antibiotics, Cough medicine

 - Viral Infection (13.00% probability)
   🛑 Precautions: Stay hydrated, Take pain relievers, Get enough rest
  

In [9]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Manually created dataset
symptoms = [
    "fever cough body ache fatigue headache chills sore throat congestion",
    "fever cough fatigue shortness of breath loss of taste loss of smell body ache sore throat headache",
    "fatigue fever headache body ache cough sore throat chills runny nose",
    "sneezing runny nose itchy eyes skin rash cough congestion wheezing",
    "nausea vomiting diarrhea abdominal pain fever fatigue",
    "headache nausea sensitivity to light sensitivity to sound fatigue vision changes",
    "headache facial pain congestion runny nose sore throat fatigue",
    "sneezing runny nose cough sore throat mild fever fatigue",
    "cough chest discomfort fatigue shortness of breath fever sore throat",
    "fever cough shortness of breath chest pain fatigue body ache"
]

diseases = [
    "Flu", "Covid-19", "Viral Infection", "Allergy", "Stomach Bug",
    "Migraine", "Sinus Infection", "Common Cold", "Bronchitis", "Pneumonia"
]

# Convert to DataFrame
data = pd.DataFrame({"symptoms": symptoms, "disease": diseases})

# Text vectorization
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(data["symptoms"])
y = data["disease"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predictions & evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)
print(classification_report(y_test, y_pred,zero_division=1))

# Save model and vectorizer
pickle.dump(model, open("disease_prediction_model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))

print("Model and vectorizer saved successfully!")


Model Accuracy: 0.0
              precision    recall  f1-score   support

  Bronchitis       1.00      0.00      0.00       1.0
    Covid-19       1.00      0.00      0.00       1.0
   Pneumonia       0.00      1.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.67      0.33      0.00       2.0
weighted avg       1.00      0.00      0.00       2.0

Model and vectorizer saved successfully!


In [10]:
import pandas as pd
import numpy as np
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Manually created dataset with symptoms, diseases, precautions, and medicines
data = {
    "Disease": ["Flu", "Cold", "COVID-19", "Malaria"],
    "Symptoms": ["fever, cough, fatigue", "cough, sneezing, sore throat", "fever, cough, shortness of breath", "fever, chills, sweating"],
    "Precautions": ["Rest, drink fluids, take ibuprofen", "Rest, drink warm liquids, use humidifier", "Isolate, rest, monitor oxygen levels", "Antimalarial medication, mosquito net, hydration"],
    "Medicine": ["Paracetamol, Ibuprofen", "Antihistamines, Cough syrup", "Paracetamol, Azithromycin", "Chloroquine, Quinine"]
}

df = pd.DataFrame(data)

# Convert symptoms to lowercase for better processing
df['Symptoms'] = df['Symptoms'].apply(lambda x: x.lower())

# Feature extraction using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Symptoms'])

# Target variable
y = df['Disease']

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train RandomForest Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict for testing
y_pred = model.predict(X_test)

# Model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Save model and vectorizer
with open("model/symptom_checker.pkl", "wb") as model_file:
    pickle.dump(model, model_file)

with open("model/vectorizer.pkl", "wb") as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

print("Model and vectorizer saved successfully!")


Model Accuracy: 0.00%
Model and vectorizer saved successfully!
