In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# Load data
df = pd.read_csv("insurance2.csv")

# Split features and target
X = df.drop("insuranceclaim", axis=1)
y = df["insuranceclaim"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

# Scaling (needed for Logistic Regression & KNN)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
lr = LogisticRegression(max_iter=1000)
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(random_state=42)

# Train models
lr.fit(X_train_scaled, y_train)
knn.fit(X_train_scaled, y_train)
dt.fit(X_train, y_train)

# Predictions
lr_pred = lr.predict(X_test_scaled)
knn_pred = knn.predict(X_test_scaled)
dt_pred = dt.predict(X_test)

# Accuracy
lr_acc = accuracy_score(y_test, lr_pred)
knn_acc = accuracy_score(y_test, knn_pred)
dt_acc = accuracy_score(y_test, dt_pred)

print("Logistic Regression Accuracy:", lr_acc)
print("KNN Accuracy:", knn_acc)
print("Decision Tree Accuracy:", dt_acc)

# Confusion Matrices
print("\nLogistic Regression Confusion Matrix")
print(confusion_matrix(y_test, lr_pred))

print("\nKNN Confusion Matrix")
print(confusion_matrix(y_test, knn_pred))

print("\nDecision Tree Confusion Matrix")
print(confusion_matrix(y_test, dt_pred))



Logistic Regression Accuracy: 0.8626865671641791
KNN Accuracy: 0.8805970149253731
Decision Tree Accuracy: 0.9671641791044776

Logistic Regression Confusion Matrix
[[117  22]
 [ 24 172]]

KNN Confusion Matrix
[[120  19]
 [ 21 175]]

Decision Tree Confusion Matrix
[[131   8]
 [  3 193]]
