In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("itachi9604/disease-symptom-description-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/itachi9604/disease-symptom-description-dataset?dataset_version_number=2...


100%|██████████| 30.1k/30.1k [00:00<00:00, 20.6MB/s]

Extracting files...
Path to dataset files: /root/.cache/kagglehub/datasets/itachi9604/disease-symptom-description-dataset/versions/2





In [None]:
# Step 1: Import libraries
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 2: Load datasets
df = pd.read_csv('/content/sample_data/dataset.csv')
severity = pd.read_csv('/content/sample_data/Symptom-severity.csv')
description = pd.read_csv('/content/sample_data/symptom_Description.csv')
precaution = pd.read_csv('/content/sample_data/symptom_precaution.csv')

# Step 3: Preprocess data
df.fillna('None', inplace=True)
symptom_weight = dict(zip(severity['Symptom'].str.strip().str.lower(), severity['weight']))

# Convert symptoms to weights
for col in df.columns[1:]:
    df[col] = df[col].apply(lambda x: symptom_weight.get(str(x).strip().lower(), 0) if isinstance(x, str) else 0)

# Encode disease labels
le = LabelEncoder()
df['Disease'] = le.fit_transform(df['Disease'])

# Step 4: Train model
X = df.drop('Disease', axis=1)
y = df['Disease']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier()
model.fit(X_train, y_train)

# Accuracy display
accuracy = accuracy_score(y_test, model.predict(X_test))
print("✅ Model Trained Successfully. Accuracy:", round(accuracy * 100, 2), "%")

# Step 5: Prediction Function
def predict_disease(symptoms_list):
    input_data = [0] * 17
    for i in range(min(len(symptoms_list), 17)):
        s = symptoms_list[i].strip().lower()
        input_data[i] = symptom_weight.get(s, 0)

    probs = model.predict_proba([input_data])[0]
    top3 = probs.argsort()[-3:][::-1]
    results = []

    for i in top3:
        disease = le.inverse_transform([i])[0]
        prob = round(probs[i] * 100, 2)

        desc_row = description[description['Disease'] == disease]
        desc = desc_row['Description'].values[0] if not desc_row.empty else 'No description available.'

        prec_row = precaution[precaution['Disease'] == disease]
        if not prec_row.empty:
            prec = prec_row.iloc[0][['Precaution_1', 'Precaution_2', 'Precaution_3', 'Precaution_4']].tolist()
        else:
            prec = ['No precaution info available.']

        results.append((disease, prob, desc, prec))

    return results

# Step 6: Symptom Menu Setup
all_symptoms = sorted(severity['Symptom'].str.strip().str.lower().unique())
symptom_dict = {i + 1: s for i, s in enumerate(all_symptoms)}

def show_symptom_menu():
    print("\n🔍 Available Symptoms:")
    for i in range(1, len(symptom_dict) + 1):
        print("{}: {}".format(i, symptom_dict[i]))

# Step 7: Menu-Driven System
while True:
    print("\n===== Disease Prediction System =====")
    print("1. Predict Disease")
    print("2. Exit")
    choice = input("Enter your choice (1/2): ")

    if choice == '1':
        show_symptom_menu()
        selected = input("\nEnter symptom numbers separated by commas (e.g., 46, 95): ")

        try:
            nums = [int(n.strip()) for n in selected.split(",") if n.strip().isdigit()]
            user_symptoms = [symptom_dict[n] for n in nums if n in symptom_dict]
        except:
            print("❌ Invalid input. Try again.")
            continue

        if not user_symptoms:
            print("❌ No valid symptoms selected.")
            continue

        print("\n✅ You selected:")
        for n in nums:
            if n in symptom_dict:
                print("- {}: {}".format(n, symptom_dict[n]))

        results = predict_disease(user_symptoms)

        print("\n🔮 Top Predicted Diseases:")
        for idx, (disease, prob, desc, precs) in enumerate(results, 1):
            print("\n{}. 📌 {} ({:.2f}%)".format(idx, disease, prob))
            print("📖 Description:", desc)
            print("🛡️ Precautions:")
            for p in precs:
                print("-", p)

    elif choice == '2':
        print("👋 Exiting the system. Stay healthy!")
        break
    else:
        print("❌ Invalid choice. Please select 1 or 2.")


✅ Model Trained Successfully. Accuracy: 99.49 %

===== Disease Prediction System =====
1. Predict Disease
2. Exit
Enter your choice (1/2): 1

🔍 Available Symptoms:
1: abdominal_pain
2: abnormal_menstruation
3: acidity
4: acute_liver_failure
5: altered_sensorium
6: anxiety
7: back_pain
8: belly_pain
9: blackheads
10: bladder_discomfort
11: blister
12: blood_in_sputum
13: bloody_stool
14: blurred_and_distorted_vision
15: breathlessness
16: brittle_nails
17: bruising
18: burning_micturition
19: chest_pain
20: chills
21: cold_hands_and_feets
22: coma
23: congestion
24: constipation
25: continuous_feel_of_urine
26: continuous_sneezing
27: cough
28: cramps
29: dark_urine
30: dehydration
31: depression
32: diarrhoea
33: dischromic_patches
34: distention_of_abdomen
35: dizziness
36: drying_and_tingling_lips
37: enlarged_thyroid
38: excessive_hunger
39: extra_marital_contacts
40: family_history
41: fast_heart_rate
42: fatigue
43: fluid_overload
44: foul_smell_ofurine
45: headache
46: high_fever




✅ You selected:
- 3: acidity
- 20: chills
- 21: cold_hands_and_feets
- 27: cough
- 30: dehydration
- 45: headache
- 65: mild_fever
- 71: muscle_weakness
- 95: runny_nose
- 111: sweating
- 118: throat_irritation
- 101: skin_peeling
- 94: restlessness
- 91: red_sore_around_nose
- 40: family_history
- 61: loss_of_appetite
- 73: neck_pain

🔮 Top Predicted Diseases:

1. 📌 Common Cold (27.00%)
📖 Description: The common cold is a viral infection of your nose and throat (upper respiratory tract). It's usually harmless, although it might not feel that way. Many types of viruses can cause a common cold.
🛡️ Precautions:
- drink vitamin c rich drinks
- take vapour
- avoid cold food
- keep fever in check

2. 📌 Tuberculosis (25.00%)
📖 Description: Tuberculosis (TB) is an infectious disease usually caused by Mycobacterium tuberculosis (MTB) bacteria. Tuberculosis generally affects the lungs, but can also affect other parts of the body. Most infections show no symptoms, in which case it is known as l

In [None]:
# Optional: print accuracy
accuracy = accuracy_score(y_test, model.predict(X_test))
print("✅ Model Trained Successfully. Accuracy:", round(accuracy * 100, 2), "%")


✅ Model Trained Successfully. Accuracy: 99.49 %
