In [None]:
import pandas as pd
import numpy as np
import pickle  # For saving/loading models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 1: Load Datasets
symptoms_df = pd.read_csv("SymptomsBinaryMatrix.csv")  # Binary symptoms dataset
precautions_df = pd.read_csv("cleaned_disease_precaution.csv")  # Precaution dataset

# Step 2: Encode Disease Labels
label_encoder = LabelEncoder()
symptoms_df["Disease"] = label_encoder.fit_transform(symptoms_df["Disease"])

# Save the label encoder for future use
with open("label_encoder.pkl", "wb") as encoder_file:
    pickle.dump(label_encoder, encoder_file)

# Step 3: Split Data into Training and Testing
X = symptoms_df.drop(columns=["Disease"])  # Features (Symptoms)
y = symptoms_df["Disease"]  # Target (Disease)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train the Model (Random Forest)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Evaluate the Model
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy:.2f}")  # Example: 92%
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 6: Save the Trained Model
with open("disease_prediction_model.pkl", "wb") as model_file:
    pickle.dump(model, model_file)
print("✅ Model saved successfully!")

# Step 7: Function to Predict Disease & Fetch Precautions
def predict_disease(symptom_input):
    """Predicts disease from symptoms and provides precautions"""
    # Load trained model
    with open("disease_prediction_model.pkl", "rb") as model_file:
        model = pickle.load(model_file)

    # Load label encoder
    with open("label_encoder.pkl", "rb") as encoder_file:
        label_encoder = pickle.load(encoder_file)

    # Ensure correct shape
    symptom_input = np.array(symptom_input).reshape(1, -1)

    # Predict disease
    prediction = model.predict(symptom_input)
    predicted_disease = label_encoder.inverse_transform(prediction)[0]

    # Fetch Precautions
    precautions = precautions_df[precautions_df["Disease"] == predicted_disease].values
    if len(precautions) > 0:
        precautions_list = precautions[0][1:]  # Skip the disease column
    else:
        precautions_list = ["No specific precautions available."]

    return predicted_disease, precautions_list



✅ Model Accuracy: 0.00
Classification Report:
               precision    recall  f1-score   support

           4       0.00      0.00      0.00       1.0
           8       0.00      0.00      0.00       1.0
          10       0.00      0.00      0.00       0.0
          12       0.00      0.00      0.00       0.0
          13       0.00      0.00      0.00       1.0
          19       0.00      0.00      0.00       1.0
          20       0.00      0.00      0.00       0.0
          21       0.00      0.00      0.00       0.0
          24       0.00      0.00      0.00       1.0
          25       0.00      0.00      0.00       1.0
          29       0.00      0.00      0.00       1.0
          37       0.00      0.00      0.00       0.0
          38       0.00      0.00      0.00       0.0
          39       0.00      0.00      0.00       1.0
          40       0.00      0.00      0.00       1.0

    accuracy                           0.00       9.0
   macro avg       0.00      0.00

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
from google.colab import files
uploaded = files.upload()


Saving DiseasePredictionModel (11).pkl to DiseasePredictionModel (11).pkl
