binary classification

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Step 1: Load your dataset from a CSV file
# Replace 'your_dataset.csv' with the path to your CSV file
df = pd.read_csv('/content/drive/MyDrive/CSV Files/insurance_data.csv')

# Display the first few rows of the dataset
print(df.head())

X = df[['age']]  # Feature (independent variable)
y = df['bought_insurance']  # Target (dependent variable)

# Step 3: Visualize the data
plt.figure(figsize=(8, 6))
sns.scatterplot(x='age', y='bought_insurance', data=df)
plt.title('Age vs Bought Insurance')
plt.xlabel('Age')
plt.ylabel('Bought Insurance')
plt.show()

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Initialize and train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 6: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 7: Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 8: Visualize the decision boundary
plt.figure(figsize=(8, 6))
sns.scatterplot(x='age', y='bought_insurance', data=df, hue='bought_insurance')
plt.title('Age vs Bought Insurance with Decision Boundary')
plt.xlabel('Age')
plt.ylabel('Bought Insurance')

# Plot the logistic regression curve
ages = np.linspace(df['age'].min(), df['age'].max(), 100)
probabilities = model.predict_proba(ages.reshape(-1, 1))[:, 1]
plt.plot(ages, probabilities, color='red', label='Logistic Regression Curve')

plt.legend()
plt.show()