# Binary Classification with Both Algorithms

### 1. Import libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

### 2. Load dataset

In [2]:
data = pd.read_csv(r"C:\Users\bbuser\Desktop\Jupyter\diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### 3. Split features and target

In [3]:
X = data.drop("Outcome", axis=1)
y = data["Outcome"]

### 4. Split into training and test sets

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

### 5. Scale the features (important for KNN)

In [5]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 6. Train Logistic Regression

In [7]:
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train_scaled, y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [8]:
y_pred_log = log_reg.predict(X_test_scaled)

### 7. Train KNN

In [9]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)
y_pred_knn = knn.predict(X_test_scaled)

### 8. Evaluate Logistic Regression

In [11]:
print("Logistic Regression Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_log))
print("Precision:", precision_score(y_test, y_pred_log))
print("Recall:", recall_score(y_test, y_pred_log))
print("F1 Score:", f1_score(y_test, y_pred_log))

Logistic Regression Performance:
Accuracy: 0.7142857142857143
Precision: 0.6086956521739131
Recall: 0.5185185185185185
F1 Score: 0.56


In [13]:
print("\nClassification Report:\n", classification_report(y_test, y_pred_log))


Classification Report:
               precision    recall  f1-score   support

           0       0.76      0.82      0.79       100
           1       0.61      0.52      0.56        54

    accuracy                           0.71       154
   macro avg       0.68      0.67      0.67       154
weighted avg       0.71      0.71      0.71       154



### 9. Evaluate KNN

In [16]:
print("KNN Performance:")
print("Accuracy:", accuracy_score(y_test, y_pred_knn))
print("Precision:", precision_score(y_test, y_pred_knn))
print("Recall:", recall_score(y_test, y_pred_knn))
print("F1 Score:", f1_score(y_test, y_pred_knn))

KNN Performance:
Accuracy: 0.7012987012987013
Precision: 0.5833333333333334
Recall: 0.5185185185185185
F1 Score: 0.5490196078431373


In [17]:
print("\nClassification Report:\n", classification_report(y_test, y_pred_knn))


Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.80      0.78       100
           1       0.58      0.52      0.55        54

    accuracy                           0.70       154
   macro avg       0.67      0.66      0.66       154
weighted avg       0.69      0.70      0.70       154



### 10. Compare models

In [18]:
print("\nModel Comparison:")
print("Logistic Regression vs KNN")
print("- Logistic Regression is good when the relationship between features and outcome is more linear.")
print("- KNN can work well but is sensitive to scaling and choice of 'k'.")
print("- Compare based on which has higher F1-score and Recall, since detecting diabetes correctly is very important.")


Model Comparison:
Logistic Regression vs KNN
- Logistic Regression is good when the relationship between features and outcome is more linear.
- KNN can work well but is sensitive to scaling and choice of 'k'.
- Compare based on which has higher F1-score and Recall, since detecting diabetes correctly is very important.
