# Q4: ID3 Decision Tree — Loan Approval
Dataset: `id3_loan_dataset.csv`

In [None]:
# Common imports used across notebooks
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
RANDOM_STATE = 42


In [None]:
df = pd.read_csv('/mnt/data/aiml/id3_loan_dataset.csv')
df.head()

In [None]:
# Map target
df['approved'] = df['approved'].map({'Yes':1,'No':0}).fillna(df['approved'])
print(df['approved'].value_counts())

In [None]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_text, plot_tree

X = df.drop(columns=['approved'])
y = df['approved']
cat_cols = X.select_dtypes(include=['object','category']).columns.tolist()
if cat_cols:
    enc = OrdinalEncoder()
    X[cat_cols] = enc.fit_transform(X[cat_cols])

X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,test_size=0.2,random_state=RANDOM_STATE)
clf = DecisionTreeClassifier(criterion='entropy', max_depth=5, random_state=RANDOM_STATE)
clf.fit(X_train,y_train)
print('Train score:', clf.score(X_train,y_train))
print('Test score:', clf.score(X_test,y_test))

### Interpretation / Discussion

- **Briefly interpret the above results here.** Explain model performance (e.g., accuracy / R² / precision / recall), any issues (overfitting, class imbalance), and recommended next steps (feature engineering, hyperparameter tuning, regularization). Keep all outputs and interpretations visible inline — do **not** save files externally.

In [None]:
from sklearn.metrics import classification_report, confusion_matrix
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print(export_text(clf, feature_names=list(X.columns)))

In [None]:
plt.figure(figsize=(16,10))
plot_tree(clf, feature_names=X.columns, class_names=['Reject','Approve'], filled=True, fontsize=10)
plt.show()