In [None]:
from typing import List
from sklearn.base import BaseEstimator
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from src.data import load_dataset
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from sklearn.inspection import PartialDependenceDisplay
import scienceplots

RANDOM_STATE = 42

X, y = load_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

models : List[BaseEstimator] = [
    GaussianNB(),
    RandomForestClassifier(random_state=RANDOM_STATE),
    GradientBoostingClassifier(random_state=RANDOM_STATE),
    SVC(random_state=RANDOM_STATE),
    MLPClassifier(random_state=RANDOM_STATE),
    KNeighborsClassifier(),
]

features = ['fnlwgt', 'age', 'education_num', 'capital-gain', 'hours-per-week', 'capital-loss']
categorical_features = [col for col in X.columns if X[col].dtype == 'bool']

plt.style.use(['science', 'ieee'])

In [2]:
fitted_estimators = [estimator.fit(X_train, y_train) for estimator in tqdm(models)]

100%|██████████| 6/6 [01:02<00:00, 10.39s/it]


In [4]:
def explain(fitted_estimators: List[BaseEstimator], X_test: pd.DataFrame, feature: str):

    _, axes = plt.subplots(ncols=2, nrows=3, figsize=(6, 8), sharey=True, constrained_layout=True)
    axes = axes.flatten()

    for index, estimator in tqdm(enumerate(fitted_estimators)):
        ax = axes[index]

        features_info = {
            "features": [feature],
            "kind": "both",
            "centered": True,
        } 
        
        if feature in categorical_features:
            features_info['categorical_features'] = [feature]

        PartialDependenceDisplay.from_estimator(
            estimator,
            X_test,
            **features_info,
            ax=ax,
            pd_line_kw={
                "color": "blue",
                "label": "PDP",
            }
        )

        plt.title(estimator.__class__.__name__)

    plt.savefig(f'../plots/ice_{feature}.pdf', dpi=300)

In [6]:
plt.rcParams['text.usetex'] = False

In [None]:
for feature in X.columns:
    explain(fitted_estimators, X_train[:10], feature)