In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import numpy as np

In [2]:
data = pd.read_csv("medical.csv")

In [15]:
df = pd.DataFrame(data)

In [16]:
# Create target column: 1 if Diagnosis is 'Diabetes', else 0
df['Diabetes'] = df['Diagnosis'].apply(lambda x: 1 if x == 'Diabetes' else 0)

In [17]:
# Drop rows with missing values in necessary columns
df_clean = df[['Age', 'Gender', 'Cholesterol', 'Heart_Rate', 'Diabetes']].dropna()

In [14]:
# Encode Gender
le = LabelEncoder()
df_clean['Gender'] = le.fit_transform(df_clean['Gender'])  # F=0, M=1

In [13]:
# Features and Target
X = df_clean[['Age', 'Gender', 'Cholesterol', 'Heart_Rate']]
y = df_clean['Diabetes']

In [12]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [11]:
# 1. Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

In [19]:
# 2. Gradient Boosting
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)

In [21]:
# 3. Voting Classifier (Hard Voting)
voting_clf = VotingClassifier(estimators=[
    ('rf', rf_model),
    ('gb', gb_model)
], voting='hard')

In [22]:
# Train all models
rf_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)
voting_clf.fit(X_train, y_train)

In [23]:
# Predictions
rf_pred = rf_model.predict(X_test)
gb_pred = gb_model.predict(X_test)
voting_pred = voting_clf.predict(X_test)

In [24]:

# Results
print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("Gradient Boosting Accuracy:", accuracy_score(y_test, gb_pred))
print("Voting Classifier Accuracy:", accuracy_score(y_test, voting_pred))

Random Forest Accuracy: 0.5
Gradient Boosting Accuracy: 0.5
Voting Classifier Accuracy: 0.5


In [25]:
print("\nVoting Classifier Classification Report:\n", classification_report(y_test, voting_pred))


Voting Classifier Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.00      0.00      0.00         0

    accuracy                           0.50         2
   macro avg       0.50      0.25      0.33         2
weighted avg       1.00      0.50      0.67         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
