<a href="https://colab.research.google.com/github/noor-prabh/disease_prediction/blob/main/Disease_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install xgboost scikit-learn pandas matplotlib seaborn --quiet


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.datasets import load_breast_cancer, load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [None]:
def run_models(X, y, dataset_name):
  print(f"n=={dataset_name} Dataset===")

  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  scaler = StandardScaler()
  X_train_scaled = scaler.fit_transform(X_train)
  X_test_scaled = scaler.transform(X_test)

  lr = LogisticRegression()
  lr.fit(X_train_scaled, y_train)
  lr_pred = lr.predict(X_test_scaled)

  rf = RandomForestClassifier()
  rf.fit(X_train, y_train)
  rf_pred = rf.predict(X_test)

  xgb = XGBClassifier()
  xgb.fit(X_train, y_train)
  xgb_pred = xgb.predict(X_test)

  def evaluate_model(name, y_pred):
      print(f"\n{name} Results:")
      print("Accuracy:", accuracy_score(y_test, y_pred))
      print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
      print("Classification Report:\n", classification_report(y_test, y_pred))

  evaluate_model("Logistic Regression", lr_pred)
  evaluate_model("Random Forest", rf_pred)
  evaluate_model("XGBoost", xgb_pred)



In [None]:
cancer = load_breast_cancer()
cancer_df = pd.DataFrame(cancer.data, columns=cancer.feature_names)
cancer_target = cancer.target
run_models(cancer_df, cancer_target, "Breast Cancer")

n==Breast Cancer Dataset===

Logistic Regression Results:
Accuracy: 0.9736842105263158
Confusion Matrix:
 [[41  2]
 [ 1 70]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.95      0.96        43
           1       0.97      0.99      0.98        71

    accuracy                           0.97       114
   macro avg       0.97      0.97      0.97       114
weighted avg       0.97      0.97      0.97       114


Random Forest Results:
Accuracy: 0.9649122807017544
Confusion Matrix:
 [[40  3]
 [ 1 70]]
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114


XGBoost Results:
Accuracy: 0.956140350877193
Confusion Matrix:
 [[40  3]
 [ 2 69]

In [None]:
diabetes_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv')
X_diabetes = diabetes_df.drop('Outcome', axis=1)
y_diabetes = diabetes_df['Outcome']
run_models(X_diabetes, y_diabetes, "Diabetes")

n==Diabetes Dataset===

Logistic Regression Results:
Accuracy: 0.7532467532467533
Confusion Matrix:
 [[79 20]
 [18 37]]
Classification Report:
               precision    recall  f1-score   support

           0       0.81      0.80      0.81        99
           1       0.65      0.67      0.66        55

    accuracy                           0.75       154
   macro avg       0.73      0.74      0.73       154
weighted avg       0.76      0.75      0.75       154


Random Forest Results:
Accuracy: 0.7402597402597403
Confusion Matrix:
 [[79 20]
 [20 35]]
Classification Report:
               precision    recall  f1-score   support

           0       0.80      0.80      0.80        99
           1       0.64      0.64      0.64        55

    accuracy                           0.74       154
   macro avg       0.72      0.72      0.72       154
weighted avg       0.74      0.74      0.74       154


XGBoost Results:
Accuracy: 0.7077922077922078
Confusion Matrix:
 [[73 26]
 [19 36]]
Cl