In [None]:
import pandas as pd
import pandas as pd
from sklearn.inspection import PartialDependenceDisplay
from pathlib import Path
from src.modeling.create_data_split import split_data
from src.explanation.global_exp.post_hoc_methods import (
    get_underscore,
    ohe_filter,
    categorical_partial_dependence,
    ohe_ale,
)
from src.utils.models_pkl import load_pickle
from config.config_modeling import CAT_COLS, TRAIN_SIZE, TEST_FROM_VAL, RANDOM_STATE
from PyALE import ale
import warnings

In [None]:
warnings.filterwarnings("ignore")

# Initializing all the necessary variables

In [None]:
IN_PATH = Path("../data") / "data.csv"
MODEL_PATH = Path("../models") / "XGB.pkl"
data = pd.read_csv(IN_PATH)

In [None]:
ohe_data = split_data(
    cols=CAT_COLS,
    df=data,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
)

In [None]:
X_train = ohe_data["train"][0]
Y_train = ohe_data["train"][1]
X_test = ohe_data["test"][0]

In [None]:
cols = X_train.columns.values.tolist()
underscore = get_underscore(cols)
underscore_cols = underscore[0]
non_underscore_cols = underscore[1]
ohe_results = ohe_filter(non_underscore_cols, underscore_cols)
non_ohe = ohe_results[0]
ohe = ohe_results[1]
prefix_ohe = ohe_results[2]
numerical = [["Year"]]
non_ohe.remove(["Year"])

In [None]:
model = load_pickle(MODEL_PATH)

# PDP

### Numerical variables

In [None]:
year_PDP = PartialDependenceDisplay.from_estimator(model, X_test, features=numerical[0])

### Categorical, not one hot encoded variables 

In [None]:
Belts_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[0], categorical_features=non_ohe[0]
)

In [None]:
Commercial_license_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[1], categorical_features=non_ohe[1]
)

In [None]:
Contributed_To_Accident_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[2], categorical_features=non_ohe[2]
)

In [None]:
Personal_injury_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[3], categorical_features=non_ohe[3]
)

In [None]:
Property_damage_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[4], categorical_features=non_ohe[4]
)

In [None]:
State_MD_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[5], categorical_features=non_ohe[5]
)

In [None]:
Driver_State_MD_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[6], categorical_features=non_ohe[6]
)

In [None]:
DL_state_MD_PDP = PartialDependenceDisplay.from_estimator(
    model, X_test, features=non_ohe[7], categorical_features=non_ohe[7]
)

### One hot encoded variables

In [None]:
Color_PDP = categorical_partial_dependence(model, X_test, feature_names=ohe[0], figure_size=(7, 5))

In [None]:
Gender_PDP = categorical_partial_dependence(model, X_test, feature_names=ohe[1], figure_size=(7, 5))

In [None]:
Make_PDP = categorical_partial_dependence(model, X_test, feature_names=ohe[2], figure_size=(12, 5))

In [None]:
Race_PDP = categorical_partial_dependence(model, X_test, feature_names=ohe[3], figure_size=(10, 5))

In [None]:
VehicleType_PDP = categorical_partial_dependence(
    model, X_test, feature_names=ohe[4], figure_size=(10, 5)
)

# PYALE

### Numerical variables

In [None]:
Year_ALE = ale(X=X_test, model=model, feature=["Year"], include_CI=False)

### Categorical, not one hot encoded variables

In [None]:
Belts_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[0],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
Commercial_license_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[1],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
Contributed_To_Accident_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[2],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
Personal_injury_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[3],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
Property_damage_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[4],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
State_MD_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[5],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
Driver_State_MD_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[6],
    feature_type="discrete",
    include_CI=False,
)

In [None]:
DL_state_MD_ALE = ale(
    X=X_test,
    model=model,
    feature=non_ohe[7],
    feature_type="discrete",
    include_CI=False,
)

### One hot encoded variables

In [None]:
Color_ALE = ohe_ale(
    prefix_ohe[0],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(10, 5),
)

In [None]:
Gender_ALE = ohe_ale(
    prefix_ohe[1],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(7, 5),
)

In [None]:
Make_ALE = ohe_ale(
    prefix_ohe[2],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(20, 8),
)

In [None]:
Race_ALE = ohe_ale(
    prefix_ohe[3],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(10, 5),
)

In [None]:
VehicleType_ALE = ohe_ale(
    prefix_ohe[4],
    cat_cols=CAT_COLS,
    model_cols=cols,
    df=data,
    model=model,
    train_size=TRAIN_SIZE,
    test_size=TEST_FROM_VAL,
    random_state=RANDOM_STATE,
    figure_size=(10, 5),
)