 ## Binary Classification using Logistic Regression (Insurance Dataset)

In [None]:
# 📦 Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, log_loss


In [None]:
# 📥 Load the dataset
df = pd.read_csv('../datasets/insurance_data.csv')
df.head()


In [None]:
# 📊 Visualize the relationship between Age and Bought Insurance
sns.scatterplot(x='age', y='bought_insurance', data=df, color='red', marker='+')
plt.title("Age vs Bought Insurance")
plt.xlabel("Age")
plt.ylabel("Bought Insurance")
plt.grid(True)
plt.show()


In [None]:
# 🎯 Split features (X) and target (y)
X = df.drop('bought_insurance', axis=1)
y = df['bought_insurance']


In [None]:
# 🔀 Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)


In [None]:
# ⚙️ Initialize and train the Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)


In [None]:
# 📈 Predict the test data
y_pred = model.predict(X_test)
y_proba = model.predict_proba(X_test)  # Returns class probabilities


In [None]:
# 📊 Model Evaluation
acc_score = accuracy_score(y_test, y_pred)
con_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)
logloss = log_loss(y_test, y_proba)


In [None]:
# 🖨️ Display all metrics
print("✅ Accuracy:", acc_score)
print("\n📉 Confusion Matrix:\n", con_matrix)
print("\n📋 Classification Report:\n", class_report)
print("🔐 Log Loss:", logloss)
