In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score
from warnings import filterwarnings

# Suppress warnings
filterwarnings("ignore")

# Load the training and testing data
train = pd.read_csv("C:/Users/KIIT/Desktop/major_project 2.0/Training.csv")
test = pd.read_csv("C:/Users/KIIT/Desktop/major_project 2.0/Testing.csv")

# Separate features and target variable from training data
X = train.drop(columns=["prognosis"])
Y = train["prognosis"]

# Separate test features
P = test.drop(columns=["prognosis"])

# Split the data into training and testing sets
xtrain, xtest, ytrain, ytest = train_test_split(X, Y, test_size=0.2, random_state=42)

# Tweak the hyperparameters to reduce overfitting
param_grid = {
    'n_estimators': [50, 100],  # Fewer trees to reduce complexity
    'max_depth': [3, 4],        # Shallower trees to avoid overfitting
    'min_samples_split': [10, 15],  # Increase the minimum samples to split
    'min_samples_leaf': [10, 15],   # Increase the minimum samples per leaf
}

# Initialize Random Forest model
rf = RandomForestClassifier(random_state=42)

# Use GridSearchCV for hyperparameter tuning
grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='accuracy')
grid_search.fit(xtrain, ytrain)

# Get the best model after tuning
best_rf = grid_search.best_estimator_

# Evaluate the best model
tr_pred_rf = best_rf.predict(xtrain)
ts_pred_rf = best_rf.predict(xtest)

# Calculate accuracy and precision
training_accuracy = accuracy_score(ytrain, tr_pred_rf) * 100
testing_accuracy = accuracy_score(ytest, ts_pred_rf) * 100
precision = precision_score(ytest, ts_pred_rf, average='weighted') * 100

print(f"Training accuracy: {training_accuracy:.2f}%")
print(f"Testing accuracy: {testing_accuracy:.2f}%")
print(f"Testing precision: {precision:.2f}%")

# Perform cross-validation to ensure model generalization
cv_scores = cross_val_score(best_rf, X, Y, cv=5)
print(f"Cross-validated accuracy: {np.mean(cv_scores) * 100:.2f}%")

# Append predictions to test data for evaluation
test_with_predictions = test.join(pd.DataFrame(best_rf.predict(P), columns=["predicted"]))
print(test_with_predictions[["prognosis", "predicted"]].head())

# List of all symptoms in the dataset
all_symptoms = X.columns.tolist()

# User input section
def get_user_symptoms():
    print("\nPlease enter symptoms from the list below (type 'done' when finished):")
    print(", ".join(all_symptoms))
    user_symptoms = []
    
    while True:
        symptom = input("Enter a symptom (or type 'done' to finish): ").strip().lower()
        if symptom.lower() == 'done':
            break
        elif symptom in all_symptoms:
            user_symptoms.append(symptom)
        else:
            print(f"'{symptom}' is not a recognized symptom. Please try again.")

    # Create a feature vector based on user input
    user_symptom_vector = np.zeros(len(all_symptoms))
    for symptom in user_symptoms:
        user_symptom_vector[all_symptoms.index(symptom)] = 1
    return user_symptom_vector.reshape(1, -1)

# Get symptoms from user and make a prediction
user_symptom_vector = get_user_symptoms()

# Ensure correct prediction by matching input with dataset features
predicted_disease = best_rf.predict(user_symptom_vector)

print("\nBased on the symptoms provided, the predicted disease is:", predicted_disease[0])


Training accuracy: 97.18%
Testing accuracy: 97.26%
Testing precision: 96.60%
Cross-validated accuracy: 98.43%
             prognosis            predicted
0     Fungal infection     Fungal infection
1              Allergy              Allergy
2                 GERD                 GERD
3  Chronic cholestasis  Chronic cholestasis
4        Drug Reaction        Drug Reaction

Please enter symptoms from the list below (type 'done' when finished):
itching, skin_rash, nodal_skin_eruptions, continuous_sneezing, shivering, chills, joint_pain, stomach_pain, acidity, ulcers_on_tongue, muscle_wasting, vomiting, burning_micturition, spotting_ urination, fatigue, weight_gain, anxiety, cold_hands_and_feets, mood_swings, weight_loss, restlessness, lethargy, patches_in_throat, irregular_sugar_level, cough, high_fever, sunken_eyes, breathlessness, sweating, dehydration, indigestion, headache, yellowish_skin, dark_urine, nausea, loss_of_appetite, pain_behind_the_eyes, back_pain, constipation, abdominal_p

Enter a symptom (or type 'done' to finish):  mucoid_sputum
Enter a symptom (or type 'done' to finish):  family_history
Enter a symptom (or type 'done' to finish):  breathlessness
Enter a symptom (or type 'done' to finish):  high_fever
Enter a symptom (or type 'done' to finish):  cough
Enter a symptom (or type 'done' to finish):  fatigue
Enter a symptom (or type 'done' to finish):  done



Based on the symptoms provided, the predicted disease is: Bronchial Asthma
