# Task 03 â€” Decision Tree Classification (Bank Marketing, Offline)

**Goal:** Train a simple classifier and report performance + basic model interpretation.

Dataset: `data/bank_marketing_synthetic.csv` (synthetic, offline-friendly)


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

DATA_PATH = "../../data/bank_marketing_synthetic.csv"
df = pd.read_csv(DATA_PATH)
df.head()

## 1) Prepare features

In [None]:
X = df.drop(columns=["subscribed"])
y = df["subscribed"]

# One-hot encode categorical variables
X_encoded = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X_encoded, y, test_size=0.25, random_state=42, stratify=y
)

X_train.shape, X_test.shape

## 2) Train Decision Tree

In [None]:
model = DecisionTreeClassifier(
    max_depth=4,
    random_state=42
)
model.fit(X_train, y_train)

pred = model.predict(X_test)
acc = accuracy_score(y_test, pred)
acc

In [None]:
print(classification_report(y_test, pred, digits=3))

## 3) Quick feature importance

In [None]:
importances = pd.Series(model.feature_importances_, index=X_encoded.columns).sort_values(ascending=False)
importances.head(10)