In [None]:
# STEP 1 — Imports
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Load Dataset

In [None]:

df = pd.read_csv("Symptom2Disease.csv")

# Drop null value

In [None]:
df = df.drop(columns=['Unnamed: 0'], errors='ignore')

# check data

In [None]:
print(df.head())
print("\nColumns:", df.columns.tolist())

       label                                               text
0  Psoriasis  I have been experiencing a skin rash on my arm...
1  Psoriasis  My skin has been peeling, especially on my kne...
2  Psoriasis  I have been experiencing joint pain in my fing...
3  Psoriasis  There is a silver like dusting on my skin, esp...
4  Psoriasis  My nails have small dents or pits in them, and...

Columns: ['label', 'text']


In [None]:
df['label'].unique()

array(['Psoriasis', 'Varicose Veins', 'Typhoid', 'Chicken pox',
       'Impetigo', 'Dengue', 'Fungal infection', 'Common Cold',
       'Pneumonia', 'Dimorphic Hemorrhoids', 'Arthritis', 'Acne',
       'Bronchial Asthma', 'Hypertension', 'Migraine',
       'Cervical spondylosis', 'Jaundice', 'Malaria',
       'urinary tract infection', 'allergy',
       'gastroesophageal reflux disease', 'drug reaction',
       'peptic ulcer disease', 'diabetes'], dtype=object)

# Define features and target

In [None]:

X = df['text']      # symptom descriptions
y = df['label']     # disease labels


# Split dataset

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42)

#  TF-IDF Vectorization

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Train model

In [None]:
model = LogisticRegression(max_iter=300)
model.fit(X_train_tfidf, y_train)

# Evalution

In [None]:
y_pred = model.predict(X_test_tfidf)

print("Accuracy:", round(accuracy_score(y_test, y_pred)*100, 2), "%")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 97.5 %

Classification Report:
                                  precision    recall  f1-score   support

                           Acne       1.00      1.00      1.00         7
                      Arthritis       1.00      1.00      1.00        10
               Bronchial Asthma       1.00      1.00      1.00        11
           Cervical spondylosis       1.00      1.00      1.00         7
                    Chicken pox       0.86      1.00      0.92        12
                    Common Cold       1.00      1.00      1.00        12
                         Dengue       1.00      0.83      0.91        12
          Dimorphic Hemorrhoids       1.00      1.00      1.00         7
               Fungal infection       1.00      1.00      1.00        13
                   Hypertension       1.00      1.00      1.00        10
                       Impetigo       1.00      1.00      1.00        11
                       Jaundice       1.00      1.00      1.00        11
        

# prediction model

In [None]:
sample_text = "I have high fever, muscle pain and rashes on my skin."
sample_vector = vectorizer.transform([sample_text])

prediction = model.predict(sample_vector)[0]
print(" You may be suffering from:", prediction)


 You may be suffering from: Dengue


# save model

In [None]:
import pickle
pickle.dump(model, open("symptom_model.pkl", "wb"))
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
print(" Model and vectorizer saved successfully!")


 Model and vectorizer saved successfully!
