In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:
df = pd.read_csv("dataset.csv")
df.head()


Unnamed: 0,Disease,Symptom_1,Symptom_2,Symptom_3,Symptom_4,Symptom_5,Symptom_6,Symptom_7,Symptom_8,Symptom_9,Symptom_10,Symptom_11,Symptom_12,Symptom_13,Symptom_14,Symptom_15,Symptom_16,Symptom_17
0,Fungal infection,itching,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,
1,Fungal infection,skin_rash,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
2,Fungal infection,itching,nodal_skin_eruptions,dischromic _patches,,,,,,,,,,,,,,
3,Fungal infection,itching,skin_rash,dischromic _patches,,,,,,,,,,,,,,
4,Fungal infection,itching,skin_rash,nodal_skin_eruptions,,,,,,,,,,,,,,


In [3]:
# Fill missing values with 0 or 'none'
df = df.fillna(0)

# Load symptom severity map
severity_df = pd.read_csv("Symptom-severity.csv")
symptom_severity = dict(zip(severity_df['Symptom'].str.lower(), severity_df['weight']))


In [4]:
# Convert all symptom strings into numerical values
def encode_symptom(symptom):
    if symptom == 0:
        return 0
    symptom = symptom.strip().lower()
    return symptom_severity.get(symptom, 0)

# Apply encoding to each symptom column
for col in df.columns:
    if col != 'Disease':
        df[col] = df[col].apply(encode_symptom)


In [5]:
X = df.drop("Disease", axis=1)
y = df["Disease"]


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [7]:
model = RandomForestClassifier()
model.fit(X_train, y_train)


In [8]:
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9949186991869918
Classification Report:
                                          precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        18
                                   AIDS       1.00      1.00      1.00        30
                                   Acne       1.00      1.00      1.00        24
                    Alcoholic hepatitis       1.00      1.00      1.00        25
                                Allergy       0.92      1.00      0.96        24
                              Arthritis       1.00      1.00      1.00        23
                       Bronchial Asthma       1.00      1.00      1.00        33
                   Cervical spondylosis       1.00      0.87      0.93        23
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        15
                            Common Cold       1.00     

In [9]:
# Simulate a user giving symptoms
input_symptoms = ['fatigue', 'vomiting', 'headache']

# Encode input symptoms into severity values
input_encoded = [0]*17  # Initialize all 17 symptoms

for i in range(len(input_symptoms)):
    symptom = input_symptoms[i].strip().lower()
    input_encoded[i] = symptom_severity.get(symptom, 0)

# Predict disease
predicted_disease = model.predict([input_encoded])
print("Predicted Disease:", predicted_disease[0])


Predicted Disease: Allergy




In [10]:
desc_df = pd.read_csv("symptom_Description.csv")
prec_df = pd.read_csv("symptom_precaution.csv")

# Description
disease_name = predicted_disease[0]
desc = desc_df[desc_df['Disease'] == disease_name]['Description'].values[0]
print("Description:", desc)

# Precautions
precautions = prec_df[prec_df['Disease'] == disease_name].values[0][1:]
print("Precautions:", list(precautions))


Description: An allergy is an immune system response to a foreign substance that's not typically harmful to your body.They can include certain foods, pollen, or pet dander. Your immune system's job is to keep you healthy by fighting harmful pathogens.
Precautions: ['apply calamine', 'cover area with bandage', nan, 'use ice to compress itching']


In [12]:
# Function to take input, predict disease, and show info
def predict_disease():
    print("Enter up to 5 symptoms (e.g., headache, fatigue, nausea):")
    input_symptoms = []

    for i in range(5):
        symptom = input(f"Symptom {i+1} (or press Enter to skip): ").strip().lower()
        if symptom == "":
            break
        input_symptoms.append(symptom)

    # Validate symptoms
    valid_symptoms = symptom_severity.keys()
    unknown = [s for s in input_symptoms if s not in valid_symptoms]

    if unknown:
        print(f"\n❌ Unknown symptoms: {', '.join(unknown)}")
        print("Please check the spelling or try fewer symptoms.")
        return

    # Encode symptoms
    input_encoded = [0]*17
    for i in range(len(input_symptoms)):
        input_encoded[i] = symptom_severity[input_symptoms[i]]

    # Predict disease
    predicted = model.predict([input_encoded])[0]
    print(f"\n🩺 Predicted Disease: **{predicted.upper()}**")

    # Description
    try:
        desc = desc_df[desc_df['Disease'] == predicted]['Description'].values[0]
        print(f"\n📘 Description:\n{desc}")
    except:
        print("\n(No description available)")

    # Precautions
    try:
        row = prec_df[prec_df['Disease'] == predicted]
        precautions = row.iloc[0, 1:].dropna().tolist()
        print("\n💡 Precautions to take:")
        for i, prec in enumerate(precautions, 1):
            print(f"{i}. {prec}")
    except:
        print("\n(No precautions available)")

# Call the function
predict_disease()


Enter up to 5 symptoms (e.g., headache, fatigue, nausea):


Symptom 1 (or press Enter to skip):  yellowish_skin
Symptom 2 (or press Enter to skip):  nausea
Symptom 3 (or press Enter to skip):  loss_of_appetite
Symptom 4 (or press Enter to skip):  abdominal_pain
Symptom 5 (or press Enter to skip):  yellowing_of_eyes



🩺 Predicted Disease: **HEPATITIS C**

📘 Description:
Inflammation of the liver due to the hepatitis C virus (HCV), which is usually spread via blood transfusion (rare), hemodialysis, and needle sticks. The damage hepatitis C does to the liver can lead to cirrhosis and its complications as well as cancer.

💡 Precautions to take:
1. Consult nearest hospital
2. vaccination
3. eat healthy
4. medication




In [13]:
from rapidfuzz import process

# Helper: Get closest matching symptom from known ones
def correct_symptom(input_symptom, valid_symptoms, threshold=70):
    match, score, _ = process.extractOne(input_symptom, valid_symptoms)
    if score >= threshold:
        return match
    return None




In [14]:
def predict_disease_fuzzy():
    print("Enter up to 5 symptoms (e.g., headache, fatigue, nausea):")
    input_symptoms = []

    for i in range(5):
        symptom = input(f"Symptom {i+1} (or press Enter to skip): ").strip().lower()
        if symptom == "":
            break
        corrected = correct_symptom(symptom, list(symptom_severity.keys()))
        if corrected:
            input_symptoms.append(corrected)
        else:
            print(f"❌ Couldn’t recognize: {symptom} — try something else.")
            return

    # Encode symptoms
    input_encoded = [0]*17
    for i in range(len(input_symptoms)):
        input_encoded[i] = symptom_severity[input_symptoms[i]]

    # Predict
    predicted = model.predict([input_encoded])[0]
    print(f"\n🩺 Predicted Disease: **{predicted.upper()}**")

    # Description
    try:
        desc = desc_df[desc_df['Disease'] == predicted]['Description'].values[0]
        print(f"\n📘 Description:\n{desc}")
    except:
        print("\n(No description available)")

    # Precautions
    try:
        row = prec_df[prec_df['Disease'] == predicted]
        precautions = row.iloc[0, 1:].dropna().tolist()
        print("\n💡 Precautions to take:")
        for i, prec in enumerate(precautions, 1):
            print(f"{i}. {prec}")
    except:
        print("\n(No precautions available)")
