In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from zipfile import ZipFile
import requests
from io import BytesIO

# Download the ZIP file and extract the CSV
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00222/bank-additional.zip'
response = requests.get(url)

with ZipFile(BytesIO(response.content)) as zip_file:
    # Specify the CSV file inside the ZIP
    csv_file = zip_file.open('bank-additional/bank-additional-full.csv')
    bank_data = pd.read_csv(csv_file, sep=';')

# Encode categorical variables
label_encoders = {}
for column in bank_data.columns:
    if bank_data[column].dtype == 'object':
        label_encoders[column] = LabelEncoder()
        bank_data[column] = label_encoders[column].fit_transform(bank_data[column])

# Define features and target variable
X = bank_data.drop('y', axis=1)  # Features
y = bank_data['y']  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Decision Tree Classifier
clf = DecisionTreeClassifier(random_state=42)

# Fit the classifier to the training data
clf.fit(X_train, y_train)

# Predict on the test set
y_pred = clf.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

print('\nClassification Report:')
print(classification_report(y_test, y_pred))

print('\nConfusion Matrix:')
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.89

Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.94      0.94      7303
           1       0.51      0.51      0.51       935

    accuracy                           0.89      8238
   macro avg       0.73      0.73      0.73      8238
weighted avg       0.89      0.89      0.89      8238


Confusion Matrix:
[[6846  457]
 [ 454  481]]
