In [None]:
import  pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np
import joblib

In [None]:
#load all datasets
dataset = pd.read_csv('datasets/Training.csv')
sym_des = pd.read_csv("datasets/symtoms_df.csv")
precautions = pd.read_csv("datasets/precautions_df.csv")
workout = pd.read_csv("datasets/workout_df.csv")
description = pd.read_csv("datasets/description.csv")
medications = pd.read_csv('datasets/medications.csv')
diets = pd.read_csv("datasets/diets.csv")
symptom_columns = dataset.columns[:-1]

In [None]:
dataset

In [None]:
X = dataset.drop('prognosis', axis=1)
y = dataset['prognosis']

# ecoding prognonsis
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)
    
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)

In [None]:

# Train models

#store models
models = {
    'SVC': SVC(kernel='linear'),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'KNeighbors': KNeighborsClassifier(n_neighbors=5),
    'MultinomialNB': MultinomialNB()
}

# Loop through the models, train, test, and print results
for model_name, model in models.items():
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)

    # accuracy
    accuracy = accuracy_score(y_test, predictions)
    print(f"{model_name} Accuracy: {accuracy}")

    #confusion matrix
    cm = confusion_matrix(y_test, predictions)
    print(f"{model_name} Confusion Matrix:")
    print(np.array2string(cm, separator=', '))

    print("\n" + "="*40 + "\n")


In [None]:
# selecting svc
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
ypred = svc.predict(X_test)
accuracy_score(y_test,ypred)

# save svc
import pickle
pickle.dump(svc,open('svc.pkl','wb'))

# load model
svc = pickle.load(open('svc.pkl','rb'))

In [None]:
# Encode target labels
label_encoder = LabelEncoder()
dataset["prognosis"] = label_encoder.fit_transform(dataset["prognosis"])

# Save LabelEncoder
joblib.dump(label_encoder, "label_encoder.pkl")

print("LabelEncoder saved successfully!")

In [None]:
# Load the trained model
model = joblib.load("svc.pkl")  # Ensure the model is saved

# Load the label encoder used during training
label_encoder = joblib.load("label_encoder.pkl")  # Ensure the encoder is saved


In [None]:
# test 1:
print("predicted disease :",svc.predict(X_test.iloc[0].values.reshape(1,-1)))
print("Actual Disease :", y_test[0])

In [None]:
# test 2:
print("predicted disease :",svc.predict(X_test.iloc[100].values.reshape(1,-1)))
print("Actual Disease :", y_test[100])

In [None]:
def preprocess_symptoms(symptom_input):
    """
    Converts user symptoms into a one-hot encoded format.
    """
    # Convert input to lowercase and split by comma
    symptoms = [s.strip().lower() for s in symptom_input.split(",")]

    # Create a DataFrame with correct feature names
    input_vector = pd.DataFrame(0, index=[0], columns=symptom_columns)

    # Set '1' for the symptoms that are present
    for symptom in symptoms:
        if symptom in symptom_columns:
            input_vector[symptom] = 1

    return input_vector  # Returns DataFrame with correct feature names



In [None]:
def predict_disease(symptom_input):
    """
    Takes symptom input from user, processes it, and predicts disease.
    """
    input_vector = preprocess_symptoms(symptom_input)

    # Make prediction using trained model
    predicted_class = model.predict(input_vector)[0]

    # Decode the predicted label
    disease = label_encoder.inverse_transform([predicted_class])[0]
    
    return disease


In [None]:
if 'Medication' in medications.columns:
    meds = medications['Medication']
else:
    print("Error: 'Medication' column not found in medications.csv")

In [None]:
def get_disease_info(disease):
    """
    Retrieves details about the predicted disease from datasets.
    """
    # Fetch Description
    description_row = description[description["Disease"] == disease]
    description_text = description_row["Description"].values[0] if not description_row.empty else "No description available."

    # Fetch Precautions
    precautions_row = precautions[precautions["Disease"] == disease]
    if not precautions_row.empty:
        precautions_list = precautions_row.iloc[:, 1:].values.flatten()
        precautions_text = [f"{i+1} :  {precaution}" for i, precaution in enumerate(precautions_list) if precaution]
    else:
        precautions_text = ["No precautions available."]

    # Fetch Medications
    medications_row = medications[medications["Disease"] == disease]
    medications_text = medications_row["Medication"].values[0] if not medications_row.empty else "No medication data available."

    # Fetch Workout Tips
    workout_row = workout[workout["Disease"] == disease]
    if not workout_row.empty:
        workout_list = workout_row.iloc[:, 1:].values.flatten()
        workout_text = [f"{i+6} :  {tip}" for i, tip in enumerate(workout_list) if tip]
    else:
        workout_text = ["No workout recommendations available."]

    # Fetch Diet Recommendations
    diet_row = diets[diets["Disease"] == disease]
    diets_text = diet_row["Diet"].values[0] if not diet_row.empty else "No diet recommendations available."

    # Print Output in Required Format
    output = f"""
=================Predicted Disease============
{disease}
=================Description==================
{description_text}
=================Precautions==================
{chr(10).join(precautions_text)}
=================Medications==================
5 :  {medications_text}
=================Workout==================
{chr(10).join(workout_text)}
=================Diets==================
16 :  {diets_text}
"""
    return output


def predict_disease(symptom_input):
    """
    Predicts disease based on symptoms and fetches relevant information.
    """
    input_vector = preprocess_symptoms(symptom_input)

    # Predict disease
    predicted_class = model.predict(input_vector)[0]
    disease = label_encoder.inverse_transform([predicted_class])[0]

    # Get detailed information about the disease
    return get_disease_info(disease)


In [None]:
user_input = input("Enter symptoms separated by commas (e.g., itching, skin rash): ")
result = predict_disease(user_input)
print(result)

In [3]:


print(workout_df.columns)  # Check the actual column names in the diets DataFrame


NameError: name 'workout_df' is not defined