# Step 1: Load the Dataset



In [23]:
import pandas as pd

# Load the Bank Marketing dataset
bank_data = pd.read_csv('bank-additional-full.csv', sep=';')  
bank_data.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


# Step 2: Data Preprocessing

In [24]:
from sklearn.model_selection import train_test_split

# Handle missing values (if any)
bank_data.dropna(inplace=True)

# Encode categorical variables (e.g., using one-hot encoding)
bank_data = pd.get_dummies(bank_data, columns=['job', 'marital', 'education', 'default','housing', 'loan',
                                               'contact', 'month', 'day_of_week', 'poutcome'], drop_first=True)

# Split the data into features (X) and the target variable (y)
X = bank_data.drop('y', axis=1)
y = bank_data['y']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Step 3: Build the Decision Tree Classifier

In [18]:
from sklearn.tree import DecisionTreeClassifier

# Create a decision tree classifier
clf = DecisionTreeClassifier()

# Train the classifier on the training data
clf.fit(X_train, y_train)


# Step 4: Make Predictions

In [19]:
# Make predictions on the test data
y_pred = clf.predict(X_test)


# Step 5: Evaluate the Model

In [20]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, pos_label='yes')
recall = recall_score(y_test, y_pred, pos_label='yes')
f1 = f1_score(y_test, y_pred, pos_label='yes')
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.8869871327992231
Precision: 0.502092050209205
Recall: 0.5133689839572193
F1 Score: 0.5076679005817027
Confusion Matrix:
 [[6827  476]
 [ 455  480]]


# Step 6: Visualize the Decision Tree 

In [21]:
from sklearn.tree import export_text

# Display the textual representation of the decision tree
tree_rules = export_text(clf, feature_names=list(X.columns))
print(tree_rules)


|--- nr.employed <= 5087.65
|   |--- duration <= 165.50
|   |   |--- pdays <= 7.50
|   |   |   |--- day_of_week_mon <= 0.50
|   |   |   |   |--- duration <= 127.50
|   |   |   |   |   |--- nr.employed <= 5013.10
|   |   |   |   |   |   |--- class: no
|   |   |   |   |   |--- nr.employed >  5013.10
|   |   |   |   |   |   |--- month_nov <= 0.50
|   |   |   |   |   |   |   |--- month_sep <= 0.50
|   |   |   |   |   |   |   |   |--- month_jun <= 0.50
|   |   |   |   |   |   |   |   |   |--- age <= 64.00
|   |   |   |   |   |   |   |   |   |   |--- duration <= 91.50
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 6
|   |   |   |   |   |   |   |   |   |   |--- duration >  91.50
|   |   |   |   |   |   |   |   |   |   |   |--- class: no
|   |   |   |   |   |   |   |   |   |--- age >  64.00
|   |   |   |   |   |   |   |   |   |   |--- duration <= 99.00
|   |   |   |   |   |   |   |   |   |   |   |--- truncated branch of depth 2
|   |   |   |   |   |   |   |   |   | 