In [1]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

In [2]:
# Function to preprocess the data
def preprocess_data(df):
    label_encoders = {}
    for column in df.select_dtypes(include=['object']).columns:
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le
    return df, label_encoders

In [3]:
# Function to train a decision tree classifier
def train_decision_tree(X_train, y_train, max_depth=None):
    clf = DecisionTreeClassifier(random_state=42, max_depth=max_depth)
    clf.fit(X_train, y_train)
    return clf

In [4]:
# Function to evaluate the model
def evaluate_model(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return accuracy, report

In [5]:
# Function to plot the decision tree
def plot_decision_tree(clf, feature_names, class_names, max_depth=None):
    plt.figure(figsize=(20,10))
    plot_tree(clf, filled=True, feature_names=feature_names, class_names=class_names, max_depth=max_depth, rounded=True, proportion=True)
    plt.title("Decision Tree for Bank Marketing Prediction", fontsize=16)
    plt.show()

In [6]:
# Loading the data
data = pd.read_csv("C:\\Users\\sreel\\OneDrive\\Desktop\\Project\\Prodigy Task 3\\bank.csv", delimiter=';')

In [7]:
# Preprocess the data
data, label_encoders = preprocess_data(data)

In [8]:
# Define features (X) and target (y)
X = data.drop(columns=['y'])
y = data['y']

In [9]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Train a decision tree with a controlled depth for better interpretability
clf = train_decision_tree(X_train, y_train, max_depth=4)