# 2.3 Code Brief: Train and Visualize Decision Trees

Quick reference for training, visualizing, and extracting rules from decision trees.

## Setup

In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.tree import plot_tree, export_text
import matplotlib.pyplot as plt

## Load Data and Models

In [None]:
root_filepath = '/content/drive/MyDrive/projects/Applied-Data-Analytics-For-Higher-Education-Course-2/'
data_filepath = f'{root_filepath}data/'
models_path = f'{root_filepath}course_3/models/'

df_training = pd.read_csv(f'{data_filepath}training.csv')
X_train = df_training
y_train = df_training['SEM_3_STATUS']

balanced_dt_model = pickle.load(open(f'{models_path}balanced_decision_tree_model.pkl', 'rb'))

## Train Model

In [None]:
balanced_dt_model.fit(X_train, y_train)
tree = balanced_dt_model.named_steps['classifier']

print(f"Tree depth: {tree.get_depth()}")
print(f"Number of leaves: {tree.get_n_leaves()}")

## Get Feature Names

In [None]:
preprocessor = balanced_dt_model.named_steps['preprocessing']
numerical_columns = ['HS_GPA', 'GPA_1', 'GPA_2', 'DFW_RATE_1', 'DFW_RATE_2', 'UNITS_ATTEMPTED_1', 'UNITS_ATTEMPTED_2']

preprocessor.fit(X_train)
cat_encoder = preprocessor.named_transformers_['cat']
cat_feature_names = cat_encoder.get_feature_names_out(['GENDER', 'RACE_ETHNICITY', 'FIRST_GEN_STATUS']).tolist()
feature_names = numerical_columns + cat_feature_names

## Text Representation

In [None]:
tree_rules = export_text(tree, feature_names=feature_names, max_depth=4)
print(tree_rules)

## Graphical Visualization

In [None]:
plt.figure(figsize=(20, 12))
plot_tree(tree, feature_names=feature_names, class_names=['Enrolled', 'Not Enrolled'],
          filled=True, rounded=True, fontsize=8, max_depth=3)
plt.title('Decision Tree (First 3 Levels)', fontsize=14)
plt.tight_layout()
plt.show()

## Feature Importance

In [None]:
importances = tree.feature_importances_
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': importances
}).sort_values('Importance', ascending=False)

print("Feature Importances:")
display(importance_df[importance_df['Importance'] > 0])

## Save Trained Model

In [None]:
pickle.dump(balanced_dt_model, open(f'{models_path}balanced_decision_tree_trained.pkl', 'wb'))
print("Saved trained model.")