In [3]:


# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Step 2: Load Dataset
# You can use "bank.csv" (small sample) or "bank-full.csv" (full dataset)
df = pd.read_csv("bank.csv", sep=";")
print("Dataset Shape:", df.shape)
print(df.head())

# Step 3: Encode Categorical Variables
label_encoders = {}
for col in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

print("\nEncoded Columns:", list(label_encoders.keys()))

# Step 4: Define Features & Target
X = df.drop("y", axis=1)
y = df["y"]

# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Step 6: Train Decision Tree Classifier
model = DecisionTreeClassifier(criterion="entropy", max_depth=5, random_state=42)
model.fit(X_train, y_train)

# Step 7: Predictions & Evaluation
y_pred = model.predict(X_test)

print("\n Model Evaluation")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# ===============================
# VISUALIZATIONS
# ===============================

# Plot 1: Data Distribution
plt.figure(figsize=(6,4))
sns.countplot(x=y, palette="Set2")
plt.title("Data Distribution (Subscribed vs Not Subscribed)")
plt.savefig("data_distribution.png")
plt.close()

# Plot 2: Confusion Matrix
plt.figure(figsize=(6,4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.savefig("confusion_matrix.png")
plt.close()

# Plot 3: Feature Importance
importances = model.feature_importances_
features = X.columns

plt.figure(figsize=(10,6))
sns.barplot(x=importances, y=features, palette="viridis")
plt.title("Feature Importance in Decision Tree")
plt.savefig("feature_importance.png")
plt.close()

# Plot 4: Decision Tree Visualization
plt.figure(figsize=(18,10))
plot_tree(model, feature_names=features, class_names=["No", "Yes"], filled=True, fontsize=8)
plt.savefig("decision_tree_plot.png")
plt.close()

print("\n All plots have been saved as PNG files:")
print(" - data_distribution.png")
print(" - confusion_matrix.png")
print(" - feature_importance.png")
print(" - decision_tree_plot.png")


Dataset Shape: (4521, 17)
   age          job  marital  education default  balance housing loan  \
0   30   unemployed  married    primary      no     1787      no   no   
1   33     services  married  secondary      no     4789     yes  yes   
2   35   management   single   tertiary      no     1350     yes   no   
3   30   management  married   tertiary      no     1476     yes  yes   
4   59  blue-collar  married  secondary      no        0     yes   no   

    contact  day month  duration  campaign  pdays  previous poutcome   y  
0  cellular   19   oct        79         1     -1         0  unknown  no  
1  cellular   11   may       220         1    339         4  failure  no  
2  cellular   16   apr       185         1    330         1  failure  no  
3   unknown    3   jun       199         4     -1         0  unknown  no  
4   unknown    5   may       226         1     -1         0  unknown  no  

Encoded Columns: ['job', 'marital', 'education', 'default', 'housing', 'loan', 'cont


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.countplot(x=y, palette="Set2")

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=importances, y=features, palette="viridis")



 All plots have been saved as PNG files:
 - data_distribution.png
 - confusion_matrix.png
 - feature_importance.png
 - decision_tree_plot.png
