In [16]:
# prompt: build a decision tree classifier to predict whether a customer will purchase a product or service based on their demographic and behavioral data use the dataset given below such as the bank marketing dataset from the UCI machine learning repository
# https://archive.ics.uci.edu/dataset/222/bank+marketing
# make the report a little beautiful and like structured with proper tables and stuff

!wget https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip
!unzip bank.zip
!ls

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Load the dataset
# We will use the 'bank.csv' file from the unzipped folder
df = pd.read_csv('bank-full.csv', sep=';')

# Display the first few rows
print("Dataset Head:")
print(df.head())

# Display dataset information
print("\nDataset Info:")
df.info()

# Display descriptive statistics
print("\nDataset Description:")
print(df.describe(include='all'))

# Handle categorical features
# Use Label Encoding for simplicity for demonstration
for column in df.columns:
  if df[column].dtype == 'object':
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])

# Display the first few rows after encoding
print("\nDataset Head after Label Encoding:")
print(df.head())

# Separate features (X) and target (y)
X = df.drop('y', axis=1)
y = df['y']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print("\nTraining data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)

# Build and train the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)

# Make predictions
y_pred = dt_classifier.predict(X_test)

# Evaluate the model
print("\nModel Evaluation:")

# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix_df = pd.DataFrame(conf_matrix, index=['Actual No', 'Actual Yes'], columns=['Predicted No', 'Predicted Yes'])

print("\nConfusion Matrix:")
print(conf_matrix_df)

# Classification Report
class_report = classification_report(y_test, y_pred, target_names=['No Purchase', 'Purchase'], output_dict=True)
class_report_df = pd.DataFrame(class_report).transpose()

print("\nClassification Report:")
print(class_report_df.round(2))

# A simple summary table
summary_data = {
    'Metric': ['Accuracy', 'Precision (No Purchase)', 'Precision (Purchase)', 'Recall (No Purchase)', 'Recall (Purchase)', 'F1-Score (No Purchase)', 'F1-Score (Purchase)'],
    'Value': [
        class_report['accuracy'],
        class_report['No Purchase']['precision'],
        class_report['Purchase']['precision'],
        class_report['No Purchase']['recall'],
        class_report['Purchase']['recall'],
        class_report['No Purchase']['f1-score'],
        class_report['Purchase']['f1-score']
    ]
}
summary_df = pd.DataFrame(summary_data)

print("\nSummary of Key Metrics:")
print(summary_df.round(2))


--2025-07-17 11:22:39--  https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank.zip
Resolving archive.ics.uci.edu (archive.ics.uci.edu)... 128.195.10.252
Connecting to archive.ics.uci.edu (archive.ics.uci.edu)|128.195.10.252|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified
Saving to: ‘bank.zip’

bank.zip                [    <=>             ] 565.47K   730KB/s    in 0.8s    

2025-07-17 11:22:40 (730 KB/s) - ‘bank.zip’ saved [579043]

Archive:  bank.zip
  inflating: bank-full.csv           
  inflating: bank-names.txt          
  inflating: bank.csv                
bank-additional      bank.csv	    bank-names.txt  __MACOSX	 train.csv
bank-additional.zip  bank-full.csv  bank.zip	    sample_data
Dataset Head:
   age           job  marital  education default  balance housing loan  \
0   58    management  married   tertiary      no     2143     yes   no   
1   44    technician   single  secondary      no       29     yes   no   
2   33  ent