In [None]:
# -------------------------------
# Step 1: Import Libraries
# -------------------------------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

sns.set(style="whitegrid")
# -------------------------------
# Step 2: Load Dataset
# -------------------------------
from google.colab import files
uploaded = files.upload()  # Upload your Bank Marketing CSV file (e.g., bank.csv)

# Load CSV
df = pd.read_csv("bank.csv")
print("Dataset Loaded Successfully!\n")
display(df.head())
# -------------------------------
# Step 3: Inspect Dataset
# -------------------------------
print("Shape of dataset:", df.shape)
print("\nColumns:", df.columns)

print("\nDataset Info:")
df.info()

print("\nMissing values per column:")
print(df.isnull().sum())
# -------------------------------
# Step 4: Exploratory Data Analysis (EDA)
# -------------------------------
# Histogram of Age
plt.figure(figsize=(6,4))
sns.histplot(df['age'], bins=30, kde=True)
plt.title("Age Distribution")
plt.show()

# Countplot of Job
plt.figure(figsize=(8,4))
sns.countplot(x='job', data=df)
plt.title("Job Distribution")
plt.xticks(rotation=45)
plt.show()

# Countplot of Marital status vs Loan Acceptance
plt.figure(figsize=(6,4))
sns.countplot(x='marital', hue='y', data=df)
plt.title("Marital Status vs Loan Acceptance")
plt.show()
# -------------------------------
# Step 5: Encode Categorical Features
# -------------------------------
# Convert text features to numbers
le = LabelEncoder()

categorical_cols = ['job','marital','education','default','housing','loan','contact','month','poutcome','y']
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

print("\nDataset after encoding:")
display(df.head())
# -------------------------------
# Step 6: Define Features & Target
# -------------------------------
# Features = all columns except 'y' (loan acceptance)
X = df.drop('y', axis=1)

# Target = 'y' (0 = no, 1 = yes)
y = df['y']
# -------------------------------
# Step 7: Split Dataset into Train & Test
# -------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
# -------------------------------
# Step 8: Train Logistic Regression Model
# -------------------------------
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
# -------------------------------
# Step 9: Make Predictions
# -------------------------------
y_pred = model.predict(X_test)
# -------------------------------
# Step 10: Evaluate Model
# -------------------------------
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", round(accuracy*100,2), "%")

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()
# -------------------------------
# Step 11: Conclusion
# -------------------------------
print("""## Conclusion
- We trained a Logistic Regression model to predict personal loan acceptance.
- Features like age, job, marital status, education, and previous outcomes influence acceptance.
- The model achieved good accuracy on test data.
- Confusion matrix shows how many applications were correctly classified as accepted or rejected.
- This analysis can help banks target customers more effectively for loan offers.""")