In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.utils import shuffle
from warnings import filterwarnings

# Suppress warnings
filterwarnings("ignore")

# Load and shuffle the data
train = pd.read_csv("C:/Users/KIIT/Desktop/major_project 2.0/Training.csv")
test = pd.read_csv("C:/Users/KIIT/Desktop/major_project 2.0/Testing.csv")
train = shuffle(train, random_state=42)

# Separate features and target variable from training data
X = train.drop(columns=["prognosis"])
Y = train["prognosis"]

# Apply scaling to improve model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

# Define a wider grid for hyperparameter tuning
param_grid = {
    'max_depth': [10, 15, 20, 25, 30],        # Increase depth for more complex patterns
    'min_samples_split': [2, 5, 10],          # Decrease samples split to allow more splits
    'min_samples_leaf': [1, 5, 10],           # Allow smaller leaves to capture more details
    'max_features': ['sqrt', 'log2', None],   # Different feature selection methods
}

# Initialize Decision Tree model
dt = DecisionTreeClassifier(random_state=42)

# Use GridSearchCV with cross-validation for tuning
grid_search = GridSearchCV(dt, param_grid, cv=5, scoring='accuracy')
grid_search.fit(xtrain, ytrain)

# Get the best model after tuning
best_dt = grid_search.best_estimator_

# Evaluate the best model
tr_pred_dt = best_dt.predict(xtrain)
ts_pred_dt = best_dt.predict(xtest)

# Calculate accuracy and precision
training_accuracy = accuracy_score(ytrain, tr_pred_dt) * 100
testing_accuracy = accuracy_score(ytest, ts_pred_dt) * 100
precision = precision_score(ytest, ts_pred_dt, average='weighted') * 100

print(f"Training accuracy: {training_accuracy:.2f}%")
print(f"Testing accuracy: {testing_accuracy:.2f}%")
print(f"Testing precision: {precision:.2f}%")

# Perform cross-validation to ensure model generalization
cv_scores = cross_val_score(best_dt, X_scaled, Y, cv=5)
print(f"Cross-validated accuracy: {np.mean(cv_scores) * 100:.2f}%")

# Append predictions to test data for evaluation
P_scaled = scaler.transform(test.drop(columns=["prognosis"]))
test_with_predictions = test.join(pd.DataFrame(best_dt.predict(P_scaled), columns=["predicted"]))
print(test_with_predictions[["prognosis", "predicted"]].head())

# List of all symptoms in the dataset
all_symptoms = X.columns.tolist()

# User input section
def get_user_symptoms():
    print("\nPlease enter symptoms from the list below (type 'done' when finished):")
    print(", ".join(all_symptoms))
    user_symptoms = []
    
    while True:
        symptom = input("Enter a symptom (or type 'done' to finish): ").strip().lower()
        if symptom.lower() == 'done':
            break
        elif symptom in all_symptoms:
            user_symptoms.append(symptom)
        else:
            print(f"'{symptom}' is not a recognized symptom. Please try again.")

    # Create a feature vector based on user input
    user_symptom_vector = np.zeros(len(all_symptoms))
    for symptom in user_symptoms:
        user_symptom_vector[all_symptoms.index(symptom)] = 1
    return scaler.transform(user_symptom_vector.reshape(1, -1))

# Get symptoms from user and make a prediction
user_symptom_vector = get_user_symptoms()

# Predict disease based on symptoms provided
predicted_disease = best_dt.predict(user_symptom_vector)

print("\nBased on the symptoms provided, the predicted disease is:", predicted_disease[0])


Training accuracy: 89.86%
Testing accuracy: 86.89%
Testing precision: 88.85%
Cross-validated accuracy: 84.55%
             prognosis            predicted
0     Fungal infection              Allergy
1              Allergy              Allergy
2                 GERD                 GERD
3  Chronic cholestasis  Chronic cholestasis
4        Drug Reaction        Drug Reaction

Please enter symptoms from the list below (type 'done' when finished):
itching, skin_rash, nodal_skin_eruptions, continuous_sneezing, shivering, chills, joint_pain, stomach_pain, acidity, ulcers_on_tongue, muscle_wasting, vomiting, burning_micturition, spotting_ urination, fatigue, weight_gain, anxiety, cold_hands_and_feets, mood_swings, weight_loss, restlessness, lethargy, patches_in_throat, irregular_sugar_level, cough, high_fever, sunken_eyes, breathlessness, sweating, dehydration, indigestion, headache, yellowish_skin, dark_urine, nausea, loss_of_appetite, pain_behind_the_eyes, back_pain, constipation, abdominal_p

Enter a symptom (or type 'done' to finish):  mucoid_sputum
Enter a symptom (or type 'done' to finish):  family_history
Enter a symptom (or type 'done' to finish):  breathlessness
Enter a symptom (or type 'done' to finish):  high_fever
Enter a symptom (or type 'done' to finish):  cough
Enter a symptom (or type 'done' to finish):  fatigue
Enter a symptom (or type 'done' to finish):  done



Based on the symptoms provided, the predicted disease is: Bronchial Asthma
