In [None]:
from pathlib import Path
import warnings

import pandas as pd
from PyALE import ale
from sklearn.inspection import PartialDependenceDisplay

from config.config_modeling import CAT_COLS, TRAIN_SIZE, TEST_FROM_VAL, RANDOM_STATE
from src.explanation.global_exp.post_hoc_methods import (
    get_underscore,
    ohe_filter,
    categorical_partial_dependence,
    ohe_ale,
)
from src.modeling.create_data_split import split_data
from src.utils.models_pkl import load_pickle

In [None]:
warnings.filterwarnings("ignore")

# Initializing all the necessary variables

In [None]:
IN_PATH = Path("../../data") / "data.csv"
MODEL_PATH = Path("../../models") / "XGB.pkl"

In [None]:
data = pd.read_csv(IN_PATH)
model = load_pickle(MODEL_PATH)

In [None]:
ohe_data = split_data(
    cols=CAT_COLS,
    df=data,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
)

In [None]:
X_train = ohe_data["train"][0]
Y_train = ohe_data["train"][1]
X_test = ohe_data["test"][0]

In [None]:
cols = X_train.columns.values.tolist()
underscore = get_underscore(cols)
underscore_cols = underscore[0]
non_underscore_cols = underscore[1]
ohe_results = ohe_filter(non_underscore_cols, underscore_cols)
non_ohe = ohe_results[0]
ohe = ohe_results[1]
prefix_ohe = ohe_results[2]
numerical = [["Year"]]
non_ohe.remove(["Year"])

# PDP

### Numerical variables

In [None]:
PartialDependenceDisplay.from_estimator(model, X_test, features=numerical[0])

### Categorical, not one hot encoded variables 

In [None]:
for col in non_ohe:
    PartialDependenceDisplay.from_estimator(model, X_test, features=col, categorical_features=col)

### One hot encoded variables

In [None]:
categorical_partial_dependence(model, X_test, feature_names=ohe[0], figure_size=(7, 5))

In [None]:
categorical_partial_dependence(model, X_test, feature_names=ohe[1], figure_size=(7, 5))

In [None]:
categorical_partial_dependence(model, X_test, feature_names=ohe[2], figure_size=(12, 5))

In [None]:
categorical_partial_dependence(model, X_test, feature_names=ohe[3], figure_size=(10, 5))

In [None]:
categorical_partial_dependence(model, X_test, feature_names=ohe[4], figure_size=(10, 5))

# PYALE

### Numerical variables

In [None]:
ale(X=X_test, model=model, feature=["Year"], include_CI=False);

### Categorical, not one hot encoded variables

In [None]:
for col in non_ohe:
    ale(
        X=X_test,
        model=model,
        feature=non_ohe[0],
        feature_type="discrete",
        include_CI=False,
    )

### One hot encoded variables

In [None]:
Color_ALE = ohe_ale(
    prefix_ohe[0],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(10, 5),
)

In [None]:
Gender_ALE = ohe_ale(
    prefix_ohe[1],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(7, 5),
)

In [None]:
Make_ALE = ohe_ale(
    prefix_ohe[2],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(20, 8),
)

In [None]:
Race_ALE = ohe_ale(
    prefix_ohe[3],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(10, 5),
)

In [None]:
VehicleType_ALE = ohe_ale(
    prefix_ohe[4],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(10, 5),
)